(Apologies, used the old list address last time)
After applying
https://review.gerrithub.io/c/ffilz/nfs-ganesha/+/441566 our long-running
test is showing continually increasing memory usage. Eventually ganesha.nfsd consumes all
memory in the box and we OOM. Looking at a core, it appears that the mdcache lru contains
far more entries than the high water mark would normally allow.
We have...
CacheInode {
Dir_Chunk = 500000;
Entries_HWMark = 500000;
}
...and after we run for a while, we observe from a core (obtained at runtime using
gcore)...
(gdb) print lru_state
$8 = {entries_hiwat = 500000, entries_used = 2437134, chunks_hiwat = 100000, chunks_used =
2002, fds_system_imposed = 400000, fds_hard_limit = 396000, fds_hiwat = 360000, fds_lowat
= 200000, futility = 0, per_lane_work = 50, biggest_window = 160000, prev_fd_count = 160,
prev_time = 1548692973, fd_state = 0}
So we have 2.4M entries with a high water mark of 500K. The difference appears to account
for the unexpected memory usage.
This seems to be new behavior after applying the above patch, although it's hard to be
certain because earlier we hit the core with entries being freed twice before running into
the high memory usage.
Some more info from the core:
(gdb) print LRU
$2 = {{L1 = {q = {next = 0xe4788210, prev = 0x23a701f0}, id = LRU_ENTRY_L1, size =
147689}, L2 = {q = {
next = 0x23a8b7b0, prev = 0xe478ef80}, id = LRU_ENTRY_L2, size = 150}, cleanup =
{q = {next = 0xcca1ff70,
prev = 0x139e2bf0}, id = LRU_ENTRY_CLEANUP, size = 748}, mtx = {__data = {__lock =
0, __count = 0,
__owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next
= 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x1dd98360, prev
= 0x1100cc30}, id = LRU_ENTRY_L1,
size = 143935}, L2 = {q = {next = 0xfa204e0, prev = 0x1e26a0a0}, id = LRU_ENTRY_L2,
size = 145}, cleanup = {
q = {next = 0xb26f8720, prev = 0x18f1810}, id = LRU_ENTRY_CLEANUP, size = 811}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x4903dc0, prev
= 0x96cb4320}, id = LRU_ENTRY_L1,
size = 141362}, L2 = {q = {next = 0x2eea11f0, prev = 0x48fd050}, id = LRU_ENTRY_L2,
size = 147}, cleanup = {
q = {next = 0xdc74e5b0, prev = 0xca6f990}, id = LRU_ENTRY_CLEANUP, size = 817}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x79dcf850, prev
= 0x7d9445d0}, id = LRU_ENTRY_L1,
size = 122171}, L2 = {q = {next = 0x7e823b30, prev = 0x79dcfc90}, id = LRU_ENTRY_L2,
size = 146}, cleanup = {
q = {next = 0x3245ddb0, prev = 0x1121e510}, id = LRU_ENTRY_CLEANUP, size = 761}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x1a94af70, prev
= 0xec32de0}, id = LRU_ENTRY_L1,
size = 142951}, L2 = {q = {next = 0xec25300, prev = 0x1a944200}, id = LRU_ENTRY_L2,
size = 150}, cleanup = {
q = {next = 0xcf8eada0, prev = 0xba9afd0}, id = LRU_ENTRY_CLEANUP, size = 759}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x49b54c90, prev
= 0x1896b90}, id = LRU_ENTRY_L1,
size = 133691}, L2 = {q = {next = 0x413dbca0, prev = 0x66d15ce0}, id = LRU_ENTRY_L2,
size = 142}, cleanup = {
q = {next = 0xe249dcb0, prev = 0x135e9ae0}, id = LRU_ENTRY_CLEANUP, size = 804}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x25eba620, prev
= 0x96783780}, id = LRU_ENTRY_L1,
---Type <return> to continue, or q <return> to quit---
size = 146051}, L2 = {q = {next = 0x1d179420, prev = 0x25eb38b0}, id = LRU_ENTRY_L2,
size = 150}, cleanup = {
q = {next = 0xab572d30, prev = 0x11a0d550}, id = LRU_ENTRY_CLEANUP, size = 765}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x15bc6d50, prev
= 0x3c292070}, id = LRU_ENTRY_L1,
size = 135767}, L2 = {q = {next = 0x5d1fad10, prev = 0x15bbffe0}, id = LRU_ENTRY_L2,
size = 148}, cleanup = {
q = {next = 0x5b5c1490, prev = 0x88016f0}, id = LRU_ENTRY_CLEANUP, size = 720}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x67772f0, prev
= 0x964eb1b0}, id = LRU_ENTRY_L1,
size = 140729}, L2 = {q = {next = 0x30d68c80, prev = 0x6770580}, id = LRU_ENTRY_L2,
size = 147}, cleanup = {
q = {next = 0xc4710dd0, prev = 0x874dc10}, id = LRU_ENTRY_CLEANUP, size = 791}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x5d1c210, prev
= 0x302682d0}, id = LRU_ENTRY_L1,
size = 140670}, L2 = {q = {next = 0x3026f040, prev = 0x5d154a0}, id = LRU_ENTRY_L2,
size = 147}, cleanup = {
q = {next = 0xcb5487f0, prev = 0xc679490}, id = LRU_ENTRY_CLEANUP, size = 794}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xe2791600, prev
= 0x96252be0}, id = LRU_ENTRY_L1,
size = 148189}, L2 = {q = {next = 0xec06a870, prev = 0xe2798370}, id = LRU_ENTRY_L2,
size = 145}, cleanup = {
q = {next = 0xa4dec380, prev = 0xb29dc50}, id = LRU_ENTRY_CLEANUP, size = 746}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xe5dff590, prev
= 0x23d43b90}, id = LRU_ENTRY_L1,
size = 147297}, L2 = {q = {next = 0x23de7e10, prev = 0xe5e06300}, id = LRU_ENTRY_L2,
size = 141}, cleanup = {
q = {next = 0xa6b8d390, prev = 0x10229cb0}, id = LRU_ENTRY_CLEANUP, size = 744}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x1cc790a0, prev
= 0x4ba049f0}, id = LRU_ENTRY_L1,
size = 143750}, L2 = {q = {next = 0xf39d9d0, prev = 0x1cc72330}, id = LRU_ENTRY_L2,
size = 147}, cleanup = {
---Type <return> to continue, or q <return> to quit---
q = {next = 0xd37ff620, prev = 0x13b47010}, id = LRU_ENTRY_CLEANUP, size = 780}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xeaf4fd00, prev
= 0x25ab91d0}, id = LRU_ENTRY_L1,
size = 146505}, L2 = {q = {next = 0x25aa4980, prev = 0xeaf56a70}, id = LRU_ENTRY_L2,
size = 150}, cleanup = {
q = {next = 0x23c6c110, prev = 0x11e02670}, id = LRU_ENTRY_CLEANUP, size = 788}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xe3dea010, prev
= 0x2831da50}, id = LRU_ENTRY_L1,
size = 148717}, L2 = {q = {next = 0x283247c0, prev = 0xe3df0d80}, id = LRU_ENTRY_L2,
size = 149}, cleanup = {
q = {next = 0xb84809e0, prev = 0x87cee00}, id = LRU_ENTRY_CLEANUP, size = 850}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xce3ced10, prev
= 0xd5f2db20}, id = LRU_ENTRY_L1,
size = 153934}, L2 = {q = {next = 0xd5f12560, prev = 0xce3d5a80}, id = LRU_ENTRY_L2,
size = 149}, cleanup = {
q = {next = 0xca840ac0, prev = 0xda6b170}, id = LRU_ENTRY_CLEANUP, size = 779}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x2d292f90, prev
= 0x17851300}, id = LRU_ENTRY_L1,
size = 138008}, L2 = {q = {next = 0x1786c8c0, prev = 0x2d2854b0}, id = LRU_ENTRY_L2,
size = 147}, cleanup = {
q = {next = 0xc7c1e980, prev = 0xb2a3820}, id = LRU_ENTRY_CLEANUP, size = 761}, mtx
= {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev
= 0x0, __next = 0x0}},
__size = '\000' <repeats 39 times>, __align = 0}, iter = {active =
false, glist = 0x0, glistn = 0x0},
__pad0 = '\000' <repeats 63 times>}}
(gdb)
(gdb) print *(struct mdcache_lru__ *)0xe4788210
$3 = {q = {next = 0xe47814a0, prev = 0x7e0620 <LRU>}, qid = LRU_ENTRY_L1, refcnt =
1, flags = 0, lane = 0, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xe47814a0
$5 = {q = {next = 0xe477a730, prev = 0xe4788210}, qid = LRU_ENTRY_L1, refcnt = 1, flags =
0, lane = 0, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xe477a730
$6 = {q = {next = 0xe47739c0, prev = 0xe47814a0}, qid = LRU_ENTRY_L1, refcnt = 1, flags =
0, lane = 0, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xe47739c0
$7 = {q = {next = 0xe476cc50, prev = 0xe477a730}, qid = LRU_ENTRY_L1, refcnt = 1, flags =
0, lane = 0, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xb84809e0
$8 = {q = {next = 0xaa559720, prev = 0x7e12a0 <LRU+3200>}, qid = LRU_ENTRY_CLEANUP,
refcnt = 3, flags = 3,
lane = 14, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xaa559720
$9 = {q = {next = 0xd9754930, prev = 0xb84809e0}, qid = LRU_ENTRY_CLEANUP, refcnt = 3,
flags = 3, lane = 14, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xd9754930
$10 = {q = {next = 0xa94e7f40, prev = 0xaa559720}, qid = LRU_ENTRY_CLEANUP, refcnt = 2,
flags = 3, lane = 14, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xa94e7f40
$11 = {q = {next = 0x185aa7d0, prev = 0xd9754930}, qid = LRU_ENTRY_CLEANUP, refcnt = 3,
flags = 3, lane = 14, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0x185aa7d0
$12 = {q = {next = 0xcb912a30, prev = 0xa94e7f40}, qid = LRU_ENTRY_CLEANUP, refcnt = 1,
flags = 3, lane = 14, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xcb912a30
$13 = {q = {next = 0xc7727370, prev = 0x185aa7d0}, qid = LRU_ENTRY_CLEANUP, refcnt = 2,
flags = 3, lane = 14, cf = 0}
(gdb)
$14 = {q = {next = 0xc7727370, prev = 0x185aa7d0}, qid = LRU_ENTRY_CLEANUP, refcnt = 2,
flags = 3, lane = 14, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xf39d9d0
$15 = {q = {next = 0xf3a4740, prev = 0x7e10c0 <LRU+2720>}, qid = LRU_ENTRY_L2,
refcnt = 1, flags = 0, lane = 12,
cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xf3a4740
$16 = {q = {next = 0xf3ab4b0, prev = 0xf39d9d0}, qid = LRU_ENTRY_L2, refcnt = 1, flags =
0, lane = 12, cf = 0}
(gdb) print *(struct mdcache_lru__ *)0xf3ab4b0
$17 = {q = {next = 0xf3b2220, prev = 0xf3a4740}, qid = LRU_ENTRY_L2, refcnt = 1, flags =
0, lane = 12, cf = 0}