Hi All,
 
When using 2.7.6 we noticed a crash related to avltree_remove(..) when a directory chunk was getting cleaned. At the time of crash "entries_used > entries_hiwat" and mdcache_lru_get() got an entry for re-use from lru_try_reap_entry(), where the entry was related to a directory. The crash happened when mdcache_lru_clean() -> mdc_clean_entry() -> mdcache_dirent_invalidate_all() was called to clean the directory chunk and "node" had an unexpected address.
 
We were running tests with the below configuration:
CacheInode
{
        Entries_HWMark = 1000;
        Chunks_HWMark = 1000;
        LRU_Run_Interval = 90;
        FD_HWMark_Percent = 60;
        FD_LWMark_Percent = 0;
        FD_Limit_Percent = 90;
        Dir_Chunk = 32000;
}

At the time of crash:
entries_hiwat = 1000, entries_used = 25238, chunks_hiwat = 1000, chunks_used = 366
 
We are using 2.7.6 along with some recent patches for MDCACHE code.
 
Has anybody seen this kind of crash ? Any patches that may help to fix the crash ?  We have the coredump, so can provide more information using it. But we have not been able to recreate the crash in our test attempts.
 
Backtrace for reference:
(gdb) bt
#0  0x00007f743993d23b in raise () from /lib64/libpthread.so.0
#1  0x0000000000443aa5 in crash_handler (signo=11, info=0x7f73ac6b82b0, ctx=0x7f73ac6b8180) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_init.c:244
#2  <signal handler called>
#3  0x00000000004f2f8b in get_first (node=0x7f740000) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:110
#4  0x00000000004f38b6 in avltree_remove (node=0x7f74102f1058, tree=0x7f73e09808a8) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:407
#5  0x0000000000561778 in mdcache_avl_remove (parent=0x7f73e09804f0, dirent=0x7f74102f1040)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_avl.c:220
#6  0x0000000000556b63 in mdcache_clean_dirent_chunk (chunk=0x7f7410044100)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:556
#7  0x0000000000546905 in lru_clean_chunk (chunk=0x7f7410044100) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:2061
#8  0x0000000000546a98 in _mdcache_lru_unref_chunk (chunk=0x7f7410044100, func=0x5bdf60 <__func__.23718> "mdcache_clean_dirent_chunks", line=597)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:2090
#9  0x0000000000556bfb in mdcache_clean_dirent_chunks (entry=0x7f73e09804f0)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:596
#10 0x0000000000556cdf in mdcache_dirent_invalidate_all (entry=0x7f73e09804f0)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:621
#11 0x000000000055594f in mdc_clean_entry (entry=0x7f73e09804f0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:302
#12 0x00000000005417ef in mdcache_lru_clean (entry=0x7f73e09804f0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:599
#13 0x0000000000545bdc in mdcache_lru_get (sub_handle=0x7f73b0367a10) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:1841
#14 0x00000000005550e4 in _mdcache_alloc_handle (export=0xe5a0d0, sub_handle=0x7f73b0367a10, fs=0xe68970, reason=MDC_REASON_DEFAULT,
    func=0x5bdfa0 <__func__.23745> "mdcache_new_entry", line=709) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:174
#15 0x0000000000556eff in mdcache_new_entry (export=0xe5a0d0, sub_handle=0x7f73b0367a10, attrs_in=0x7f73ac6b9460, attrs_out=0x0, new_directory=false, entry=0x7f73ac6b93d8,
    state=0x0, reason=MDC_REASON_DEFAULT) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:708
#16 0x000000000054b3a5 in mdcache_alloc_and_check_handle (export=0xe5a0d0, sub_handle=0x7f73b0367a10, new_obj=0x7f73ac6b9570, new_directory=false, attrs_in=0x7f73ac6b9460,
    attrs_out=0x0, tag=0x5bcd2c "lookup ", parent=0x7f7408a5f090, name=0x7f74183bfda0 'a' <repeats 200 times>..., invalidate=0x7f73ac6b945f, state=0x0)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:100
#17 0x000000000055915c in mdc_lookup_uncached (mdc_parent=0x7f7408a5f090, name=0x7f74183bfda0 'a' <repeats 200 times>..., new_entry=0x7f73ac6b9718, attrs_out=0x0)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:1410
#18 0x000000000055fb27 in mdcache_readdir_chunked (directory=0x7f7408a5f090, whence=872598721, dir_state=0x7f73ac6b98f0, cb=0x434b1c <populate_dirent>, attrmask=122830,
    eod_met=0x7f73ac6ba00b) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:3211
#19 0x000000000054d0c0 in mdcache_readdir (dir_hdl=0x7f7408a5f0c8, whence=0x7f73ac6b98d0, dir_state=0x7f73ac6b98f0, cb=0x434b1c <populate_dirent>, attrmask=122830,
    eod_met=0x7f73ac6ba00b) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:559
#20 0x0000000000435443 in fsal_readdir (directory=0x7f7408a5f0c8, cookie=872598721, nbfound=0x7f73ac6ba00c, eod_met=0x7f73ac6ba00b, attrmask=122830,
    cb=0x499e11 <nfs3_readdirplus_callback>, opaque=0x7f73ac6b9fc0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/fsal_helper.c:1160
#21 0x0000000000499c50 in nfs3_readdirplus (arg=0x7f73b018b2d8, req=0x7f73b018abd0, res=0x7f73b01742b0)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/Protocols/NFS/nfs3_readdirplus.c:310
#22 0x000000000045ea8b in nfs_rpc_process_request (reqdata=0x7f73b018abd0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_worker_thread.c:1331
#23 0x000000000045f257 in nfs_rpc_valid_NFS (req=0x7f73b018abd0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_worker_thread.c:1553
#24 0x00007f743b39333b in svc_vc_decode (req=0x7f73b018abd0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/svc_vc.c:829
#25 0x0000000000451bf4 in nfs_rpc_decode_request (xprt=0x7f7428000c10, xdrs=0x7f73b0123980)
    at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
#26 0x00007f743b39324c in svc_vc_recv (xprt=0x7f7428000c10) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/svc_vc.c:802
#27 0x00007f743b38f939 in svc_rqst_xprt_task (wpe=0x7f7428000e68) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/svc_rqst.c:769
#28 0x00007f743b398c5a in work_pool_thread (arg=0x3d2fc00) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/work_pool.c:181
#29 0x00007f7439935dc5 in start_thread () from /lib64/libpthread.so.0
#30 0x00007f743924373d in clone () from /lib64/libc.so.6
 
(gdb) frame 3
#3  0x00000000004f2f8b in get_first (node=0x7f740000) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:110
110             while (node->left)
(gdb) p node->left
Cannot access memory at address 0x7f740000

(gdb) p node
$2 = (struct avltree_node *) 0x7f740000

 
 
(gdb) frame 4
#4  0x00000000004f38b6 in avltree_remove (node=0x7f74102f1058, tree=0x7f73e09808a8) at /usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:407
407                     next = get_first(right);
(gdb) p right
$3 = (struct avltree_node *) 0x7f73cc1bab88

 
(gdb) p lru_state
$4 = {entries_hiwat = 1000, entries_used = 25238, chunks_hiwat = 1000, chunks_used = 366, fds_system_imposed = 1048576, fds_hard_limit = 943718, fds_hiwat = 629145,
  fds_lowat = 0, futility = 0, per_lane_work = 50, biggest_window = 419430, prev_fd_count = 4, prev_time = 1582628134, fd_state = 0}

 

Thanks,
Madhu Thorat.