Hi,
 
We are using 2.7.3 and see a crash in _mdcache_lru_unref() when mdcache_readdir_chunked() passes a non-accessible 'entry'. The crash was seen around the same time when GPFS starts asserting due to a memory leak.
 
Please check the following crash. Are there any commits in upstream code which may help us to avoid crash in this code area? Thank you.
 
(gdb) bt
#0  0x00003fffb2829684 in .raise () from /lib64/libpthread.so.0
#1  0x0000000010056c78 in crash_handler (signo=11, info=0x3fff6488bf38, ctx=0x3fff6488b1c0)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/MainNFSD/nfs_init.c:244
#2  <signal handler called>
#3  0x00000000101b29f8 in _mdcache_lru_ref (entry=0xc0000000007b24a1, flags=0, func=0x10235e18 <__func__.23505> "mdcache_get", line=175)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:1889
#4  0x00000000101c4864 in mdcache_get (entry=0xc0000000007b24a1)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.h:175
#5  0x00000000101d26d8 in mdcache_readdir_chunked (directory=0x3fff4c0dde70, whence=0, dir_state=0x3fff6488c6d8, cb=@0x1025b708: 0x10041390 <populate_dirent>,
    attrmask=122830, eod_met=0x3fff6488c820) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:3000
#6  0x00000000101bad28 in mdcache_readdir (dir_hdl=0x3fff4c0ddea8, whence=0x3fff6488c790, dir_state=0x3fff6488c6d8, cb=@0x1025b708: 0x10041390 <populate_dirent>,
    attrmask=122830, eod_met=0x3fff6488c820) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:559
#7  0x0000000010041f9c in fsal_readdir (directory=0x3fff4c0ddea8, cookie=0, nbfound=0x3fff6488c830, eod_met=0x3fff6488c820, attrmask=122830,
    cb=@0x1025f6e8: 0x100a69ec <nfs4_readdir_callback>, opaque=0x3fff6488c838) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/fsal_helper.c:1160
#8  0x00000000100a8528 in nfs4_op_readdir (op=0x3ffd5021a300, data=0x3fff6488caf8, resp=0x3ffd5021a7a0)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/Protocols/NFS/nfs4_op_readdir.c:664
#9  0x0000000010082a94 in nfs4_Compound (arg=0x3ffd50203e78, req=0x3ffd50203770, res=0x3ffd5019cc40)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/Protocols/NFS/nfs4_Compound.c:942
#10 0x000000001007e630 in nfs_rpc_process_request (reqdata=0x3ffd50203770) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/MainNFSD/nfs_worker_thread.c:1328
#11 0x000000001007f004 in nfs_rpc_valid_NFS (req=0x3ffd50203770) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/MainNFSD/nfs_worker_thread.c:1538
#12 0x00003fffb2d1b420 in svc_vc_decode (req=0x3ffd50203770) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/svc_vc.c:829
#13 0x000000001006a520 in nfs_rpc_decode_request (xprt=0x3ffc68272f80, xdrs=0x3ffd501edd40)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
#14 0x00003fffb2d1b290 in svc_vc_recv (xprt=0x3ffc68272f80) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/svc_vc.c:802
#15 0x00003fffb2d15b58 in svc_rqst_xprt_task (wpe=0x3ffc68273198) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/svc_rqst.c:769
#16 0x00003fffb2d161cc in svc_rqst_epoll_events (sr_rec=0x10008c87e20, n_events=1)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/svc_rqst.c:941
#17 0x00003fffb2d16568 in svc_rqst_epoll_loop (sr_rec=0x10008c87e20) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/svc_rqst.c:1014
#18 0x00003fffb2d1669c in svc_rqst_run_task (wpe=0x10008c87e20) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/svc_rqst.c:1050
#19 0x00003fffb2d24124 in work_pool_thread (arg=0x3ffc9c005a90) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/libntirpc/src/work_pool.c:181
#20 0x00003fffb281c93c in .start_thread () from /lib64/libpthread.so.0
#21 0x00003fffb26c7a3c in .__clone () from /lib64/libc.so.6
(gdb) f 3
#3  0x00000000101b29f8 in _mdcache_lru_ref (entry=0xc0000000007b24a1, flags=0, func=0x10235e18 <__func__.23505> "mdcache_get", line=175)
    at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:1889
1889            struct lru_q_lane *qlane = &LRU[lru->lane];
(gdb) p *lru
Cannot access memory at address 0xc0000000007b26c9
(gdb) p *entry
Cannot access memory at address 0xc0000000007b24a1
(gdb) f 5
#5  0x00000000101d26d8 in mdcache_readdir_chunked (directory=0x3fff4c0dde70, whence=0, dir_state=0x3fff6488c6d8, cb=@0x1025b708: 0x10041390 <populate_dirent>,
    attrmask=122830, eod_met=0x3fff6488c820) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.09lookupfix/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:3000
3000                            mdcache_get(entry);
(gdb) l
2995                    status.major = ERR_FSAL_NO_ERROR;
2996                    /* We have the content_lock for at least read. */
2997                    if (dirent->entry) {
2998                            /* Take a ref for our use */
2999                            entry = dirent->entry;
3000                            mdcache_get(entry);
3001                    } else {
3002                            /* Not cached, get actual entry using the dirent ckey */
3003                            status = mdcache_find_keyed_reason(&dirent->ckey,
3004                                                               &entry,
(gdb)

Thanks,
Madhu Thorat.