(Apologies, used the old list address last time)

 

After applying https://review.gerrithub.io/c/ffilz/nfs-ganesha/+/441566 our long-running test is showing continually increasing memory usage. Eventually ganesha.nfsd consumes all memory in the box and we OOM. Looking at a core, it appears that the mdcache lru contains far more entries than the high water mark would normally allow.

 

We have…

 

CacheInode {

    Dir_Chunk = 500000;

    Entries_HWMark = 500000;

}

 

…and after we run for a while, we observe from a core (obtained at runtime using gcore)...

 

(gdb) print lru_state

$8 = {entries_hiwat = 500000, entries_used = 2437134, chunks_hiwat = 100000, chunks_used = 2002, fds_system_imposed = 400000, fds_hard_limit = 396000, fds_hiwat = 360000, fds_lowat = 200000, futility = 0, per_lane_work = 50, biggest_window = 160000, prev_fd_count = 160, prev_time = 1548692973, fd_state = 0}

 

So we have 2.4M entries with a high water mark of 500K. The difference appears to account for the unexpected memory usage.

 

This seems to be new behavior after applying the above patch, although it’s hard to be certain because earlier we hit the core with entries being freed twice before running into the high memory usage.

 

Some more info from the core:

 

(gdb) print LRU

$2 = {{L1 = {q = {next = 0xe4788210, prev = 0x23a701f0}, id = LRU_ENTRY_L1, size = 147689}, L2 = {q = {

        next = 0x23a8b7b0, prev = 0xe478ef80}, id = LRU_ENTRY_L2, size = 150}, cleanup = {q = {next = 0xcca1ff70, 

        prev = 0x139e2bf0}, id = LRU_ENTRY_CLEANUP, size = 748}, mtx = {__data = {__lock = 0, __count = 0, 

        __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x1dd98360, prev = 0x1100cc30}, id = LRU_ENTRY_L1, 

      size = 143935}, L2 = {q = {next = 0xfa204e0, prev = 0x1e26a0a0}, id = LRU_ENTRY_L2, size = 145}, cleanup = {

      q = {next = 0xb26f8720, prev = 0x18f1810}, id = LRU_ENTRY_CLEANUP, size = 811}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x4903dc0, prev = 0x96cb4320}, id = LRU_ENTRY_L1, 

      size = 141362}, L2 = {q = {next = 0x2eea11f0, prev = 0x48fd050}, id = LRU_ENTRY_L2, size = 147}, cleanup = {

      q = {next = 0xdc74e5b0, prev = 0xca6f990}, id = LRU_ENTRY_CLEANUP, size = 817}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x79dcf850, prev = 0x7d9445d0}, id = LRU_ENTRY_L1, 

      size = 122171}, L2 = {q = {next = 0x7e823b30, prev = 0x79dcfc90}, id = LRU_ENTRY_L2, size = 146}, cleanup = {

      q = {next = 0x3245ddb0, prev = 0x1121e510}, id = LRU_ENTRY_CLEANUP, size = 761}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x1a94af70, prev = 0xec32de0}, id = LRU_ENTRY_L1, 

      size = 142951}, L2 = {q = {next = 0xec25300, prev = 0x1a944200}, id = LRU_ENTRY_L2, size = 150}, cleanup = {

      q = {next = 0xcf8eada0, prev = 0xba9afd0}, id = LRU_ENTRY_CLEANUP, size = 759}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x49b54c90, prev = 0x1896b90}, id = LRU_ENTRY_L1, 

      size = 133691}, L2 = {q = {next = 0x413dbca0, prev = 0x66d15ce0}, id = LRU_ENTRY_L2, size = 142}, cleanup = {

      q = {next = 0xe249dcb0, prev = 0x135e9ae0}, id = LRU_ENTRY_CLEANUP, size = 804}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x25eba620, prev = 0x96783780}, id = LRU_ENTRY_L1, 

---Type <return> to continue, or q <return> to quit---

      size = 146051}, L2 = {q = {next = 0x1d179420, prev = 0x25eb38b0}, id = LRU_ENTRY_L2, size = 150}, cleanup = {

      q = {next = 0xab572d30, prev = 0x11a0d550}, id = LRU_ENTRY_CLEANUP, size = 765}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x15bc6d50, prev = 0x3c292070}, id = LRU_ENTRY_L1, 

      size = 135767}, L2 = {q = {next = 0x5d1fad10, prev = 0x15bbffe0}, id = LRU_ENTRY_L2, size = 148}, cleanup = {

      q = {next = 0x5b5c1490, prev = 0x88016f0}, id = LRU_ENTRY_CLEANUP, size = 720}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x67772f0, prev = 0x964eb1b0}, id = LRU_ENTRY_L1, 

      size = 140729}, L2 = {q = {next = 0x30d68c80, prev = 0x6770580}, id = LRU_ENTRY_L2, size = 147}, cleanup = {

      q = {next = 0xc4710dd0, prev = 0x874dc10}, id = LRU_ENTRY_CLEANUP, size = 791}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x5d1c210, prev = 0x302682d0}, id = LRU_ENTRY_L1, 

      size = 140670}, L2 = {q = {next = 0x3026f040, prev = 0x5d154a0}, id = LRU_ENTRY_L2, size = 147}, cleanup = {

      q = {next = 0xcb5487f0, prev = 0xc679490}, id = LRU_ENTRY_CLEANUP, size = 794}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xe2791600, prev = 0x96252be0}, id = LRU_ENTRY_L1, 

      size = 148189}, L2 = {q = {next = 0xec06a870, prev = 0xe2798370}, id = LRU_ENTRY_L2, size = 145}, cleanup = {

      q = {next = 0xa4dec380, prev = 0xb29dc50}, id = LRU_ENTRY_CLEANUP, size = 746}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xe5dff590, prev = 0x23d43b90}, id = LRU_ENTRY_L1, 

      size = 147297}, L2 = {q = {next = 0x23de7e10, prev = 0xe5e06300}, id = LRU_ENTRY_L2, size = 141}, cleanup = {

      q = {next = 0xa6b8d390, prev = 0x10229cb0}, id = LRU_ENTRY_CLEANUP, size = 744}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x1cc790a0, prev = 0x4ba049f0}, id = LRU_ENTRY_L1, 

      size = 143750}, L2 = {q = {next = 0xf39d9d0, prev = 0x1cc72330}, id = LRU_ENTRY_L2, size = 147}, cleanup = {

---Type <return> to continue, or q <return> to quit---

      q = {next = 0xd37ff620, prev = 0x13b47010}, id = LRU_ENTRY_CLEANUP, size = 780}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xeaf4fd00, prev = 0x25ab91d0}, id = LRU_ENTRY_L1, 

      size = 146505}, L2 = {q = {next = 0x25aa4980, prev = 0xeaf56a70}, id = LRU_ENTRY_L2, size = 150}, cleanup = {

      q = {next = 0x23c6c110, prev = 0x11e02670}, id = LRU_ENTRY_CLEANUP, size = 788}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xe3dea010, prev = 0x2831da50}, id = LRU_ENTRY_L1, 

      size = 148717}, L2 = {q = {next = 0x283247c0, prev = 0xe3df0d80}, id = LRU_ENTRY_L2, size = 149}, cleanup = {

      q = {next = 0xb84809e0, prev = 0x87cee00}, id = LRU_ENTRY_CLEANUP, size = 850}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0xce3ced10, prev = 0xd5f2db20}, id = LRU_ENTRY_L1, 

      size = 153934}, L2 = {q = {next = 0xd5f12560, prev = 0xce3d5a80}, id = LRU_ENTRY_L2, size = 149}, cleanup = {

      q = {next = 0xca840ac0, prev = 0xda6b170}, id = LRU_ENTRY_CLEANUP, size = 779}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}, {L1 = {q = {next = 0x2d292f90, prev = 0x17851300}, id = LRU_ENTRY_L1, 

      size = 138008}, L2 = {q = {next = 0x1786c8c0, prev = 0x2d2854b0}, id = LRU_ENTRY_L2, size = 147}, cleanup = {

      q = {next = 0xc7c1e980, prev = 0xb2a3820}, id = LRU_ENTRY_CLEANUP, size = 761}, mtx = {__data = {__lock = 0, 

        __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}}, 

      __size = '\000' <repeats 39 times>, __align = 0}, iter = {active = false, glist = 0x0, glistn = 0x0}, 

    __pad0 = '\000' <repeats 63 times>}}

(gdb) 

 

(gdb) print *(struct mdcache_lru__ *)0xe4788210

$3 = {q = {next = 0xe47814a0, prev = 0x7e0620 <LRU>}, qid = LRU_ENTRY_L1, refcnt = 1, flags = 0, lane = 0, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xe47814a0

$5 = {q = {next = 0xe477a730, prev = 0xe4788210}, qid = LRU_ENTRY_L1, refcnt = 1, flags = 0, lane = 0, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xe477a730

$6 = {q = {next = 0xe47739c0, prev = 0xe47814a0}, qid = LRU_ENTRY_L1, refcnt = 1, flags = 0, lane = 0, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xe47739c0

$7 = {q = {next = 0xe476cc50, prev = 0xe477a730}, qid = LRU_ENTRY_L1, refcnt = 1, flags = 0, lane = 0, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xb84809e0

$8 = {q = {next = 0xaa559720, prev = 0x7e12a0 <LRU+3200>}, qid = LRU_ENTRY_CLEANUP, refcnt = 3, flags = 3, 

  lane = 14, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xaa559720

$9 = {q = {next = 0xd9754930, prev = 0xb84809e0}, qid = LRU_ENTRY_CLEANUP, refcnt = 3, flags = 3, lane = 14, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xd9754930

$10 = {q = {next = 0xa94e7f40, prev = 0xaa559720}, qid = LRU_ENTRY_CLEANUP, refcnt = 2, flags = 3, lane = 14, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xa94e7f40

$11 = {q = {next = 0x185aa7d0, prev = 0xd9754930}, qid = LRU_ENTRY_CLEANUP, refcnt = 3, flags = 3, lane = 14, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0x185aa7d0

$12 = {q = {next = 0xcb912a30, prev = 0xa94e7f40}, qid = LRU_ENTRY_CLEANUP, refcnt = 1, flags = 3, lane = 14, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xcb912a30

$13 = {q = {next = 0xc7727370, prev = 0x185aa7d0}, qid = LRU_ENTRY_CLEANUP, refcnt = 2, flags = 3, lane = 14, cf = 0}

(gdb) 

$14 = {q = {next = 0xc7727370, prev = 0x185aa7d0}, qid = LRU_ENTRY_CLEANUP, refcnt = 2, flags = 3, lane = 14, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xf39d9d0

$15 = {q = {next = 0xf3a4740, prev = 0x7e10c0 <LRU+2720>}, qid = LRU_ENTRY_L2, refcnt = 1, flags = 0, lane = 12, 

  cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xf3a4740

$16 = {q = {next = 0xf3ab4b0, prev = 0xf39d9d0}, qid = LRU_ENTRY_L2, refcnt = 1, flags = 0, lane = 12, cf = 0}

(gdb) print *(struct mdcache_lru__ *)0xf3ab4b0

$17 = {q = {next = 0xf3b2220, prev = 0xf3a4740}, qid = LRU_ENTRY_L2, refcnt = 1, flags = 0, lane = 12, cf = 0}