Do you have this one:
11e0e375e40658267cbf449afacaa53a136f7097
MDCACHE - Fix race between lru functions for the chunk and the parent
of the chunk getting freed and reused.
Daniel
On 2/28/20 9:36 AM, Madhu P Punjabi wrote:
Hi All,
When using 2.7.6 we noticed a crash related to avltree_remove(..) when a
directory chunk was getting cleaned. At the time of crash "entries_used
> entries_hiwat" and mdcache_lru_get() got an entry for re-use
from lru_try_reap_entry(), where the entry was related to a directory.
The crash happened when mdcache_lru_clean() -> mdc_clean_entry() ->
mdcache_dirent_invalidate_all()was called to clean the directory chunk
and "node" had an unexpected address.
We were running tests with the below configuration:
/CacheInode
{
Entries_HWMark = 1000;
Chunks_HWMark = 1000;
LRU_Run_Interval = 90;
FD_HWMark_Percent = 60;
FD_LWMark_Percent = 0;
FD_Limit_Percent = 90;
Dir_Chunk = 32000;
}/
At the time of crash:
/*entries_hiwat = 1000, entries_used = 25238, chunks_hiwat = 1000,
chunks_used = 366*/
We are using 2.7.6 along with some recent patches for MDCACHE code.
Has anybody seen this kind of crash ?Any patches that may help to fix
the crash ? We have the coredump, so can provide more information using
it. But we have not been able to recreate the crash in our test attempts.
Backtrace for reference:
/(gdb) bt
#0 0x00007f743993d23b in raise () from /lib64/libpthread.so.0
#1 0x0000000000443aa5 in crash_handler (signo=11, info=0x7f73ac6b82b0,
ctx=0x7f73ac6b8180) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_init.c:244
#2 <signal handler called>
#3 0x00000000004f2f8b in *get_first* (node=0x7f740000) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:110
#4 0x00000000004f38b6 in *avltree_remove* (node=0x7f74102f1058,
tree=0x7f73e09808a8) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:407
#5 0x0000000000561778 in mdcache_avl_remove (parent=0x7f73e09804f0,
dirent=0x7f74102f1040)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_avl.c:220
#6 0x0000000000556b63 in *mdcache_clean_dirent_chunk*
(chunk=0x7f7410044100)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:556
#7 0x0000000000546905 in lru_clean_chunk (chunk=0x7f7410044100) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:2061
#8 0x0000000000546a98 in _mdcache_lru_unref_chunk
(chunk=0x7f7410044100, func=0x5bdf60 <__func__.23718>
"mdcache_clean_dirent_chunks", line=597)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:2090
#9 0x0000000000556bfb in mdcache_clean_dirent_chunks (entry=0x7f73e09804f0)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:596
#10 0x0000000000556cdf in mdcache_dirent_invalidate_all
(entry=0x7f73e09804f0)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:621
#11 0x000000000055594f in mdc_clean_entry (entry=0x7f73e09804f0) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:302
#12 0x00000000005417ef in *mdcache_lru_clean* (entry=0x7f73e09804f0) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:599
#13 0x0000000000545bdc in *mdcache_lru_get* (sub_handle=0x7f73b0367a10)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:1841
#14 0x00000000005550e4 in _mdcache_alloc_handle (export=0xe5a0d0,
sub_handle=0x7f73b0367a10, fs=0xe68970, reason=MDC_REASON_DEFAULT,
func=0x5bdfa0 <__func__.23745> "mdcache_new_entry", line=709) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:174
#15 0x0000000000556eff in *mdcache_new_entry* (export=0xe5a0d0,
sub_handle=0x7f73b0367a10, attrs_in=0x7f73ac6b9460, attrs_out=0x0,
new_directory=false, entry=0x7f73ac6b93d8,
state=0x0, reason=MDC_REASON_DEFAULT) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:708
#16 0x000000000054b3a5 in *mdcache_alloc_and_check_handle*
(export=0xe5a0d0, sub_handle=0x7f73b0367a10, new_obj=0x7f73ac6b9570,
new_directory=false, attrs_in=0x7f73ac6b9460,
attrs_out=0x0, tag=0x5bcd2c "lookup ", parent=0x7f7408a5f090,
name=0x7f74183bfda0 'a' <repeats 200 times>...,
invalidate=0x7f73ac6b945f, state=0x0)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:100
#17 0x000000000055915c in mdc_lookup_uncached
(mdc_parent=0x7f7408a5f090, name=0x7f74183bfda0 'a' <repeats 200
times>..., new_entry=0x7f73ac6b9718, attrs_out=0x0)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:1410
#18 0x000000000055fb27 in *mdcache_readdir_chunked*
(directory=0x7f7408a5f090, whence=872598721, dir_state=0x7f73ac6b98f0,
cb=0x434b1c <populate_dirent>, attrmask=122830,
eod_met=0x7f73ac6ba00b) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:3211
#19 0x000000000054d0c0 in mdcache_readdir (dir_hdl=0x7f7408a5f0c8,
whence=0x7f73ac6b98d0, dir_state=0x7f73ac6b98f0, cb=0x434b1c
<populate_dirent>, attrmask=122830,
eod_met=0x7f73ac6ba00b) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:559
#20 0x0000000000435443 in fsal_readdir (directory=0x7f7408a5f0c8,
cookie=872598721, nbfound=0x7f73ac6ba00c, eod_met=0x7f73ac6ba00b,
attrmask=122830,
cb=0x499e11 <nfs3_readdirplus_callback>, opaque=0x7f73ac6b9fc0) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/FSAL/fsal_helper.c:1160
#21 0x0000000000499c50 in nfs3_readdirplus (arg=0x7f73b018b2d8,
req=0x7f73b018abd0, res=0x7f73b01742b0)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/Protocols/NFS/nfs3_readdirplus.c:310
#22 0x000000000045ea8b in nfs_rpc_process_request
(reqdata=0x7f73b018abd0) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_worker_thread.c:1331
#23 0x000000000045f257 in nfs_rpc_valid_NFS (req=0x7f73b018abd0) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_worker_thread.c:1553
#24 0x00007f743b39333b in svc_vc_decode (req=0x7f73b018abd0) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/svc_vc.c:829
#25 0x0000000000451bf4 in nfs_rpc_decode_request (xprt=0x7f7428000c10,
xdrs=0x7f73b0123980)
at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
#26 0x00007f743b39324c in svc_vc_recv (xprt=0x7f7428000c10) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/svc_vc.c:802
#27 0x00007f743b38f939 in svc_rqst_xprt_task (wpe=0x7f7428000e68) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/svc_rqst.c:769
#28 0x00007f743b398c5a in work_pool_thread (arg=0x3d2fc00) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/libntirpc/src/work_pool.c:181
#29 0x00007f7439935dc5 in start_thread () from /lib64/libpthread.so.0
#30 0x00007f743924373d in clone () from /lib64/libc.so.6/
/(gdb) frame 3
#3 0x00000000004f2f8b in get_first (node=0x7f740000) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:110
110 while (node->left)
*(gdb) p node->left
Cannot access memory at address 0x7f740000*
(gdb) p node
$2 = (struct avltree_node *) 0x7f740000/
/(gdb) frame 4
#4 0x00000000004f38b6 in avltree_remove (node=0x7f74102f1058,
tree=0x7f73e09808a8) at
/usr/src/debug/nfs-ganesha-2.7.5-ibm054.06/avl/avl.c:407
407 next = get_first(right);
(gdb) p right
$3 = (struct avltree_node *) 0x7f73cc1bab88/
/(gdb) p lru_state
$4 = {*entries_hiwat = 1000, entries_used = 25238, chunks_hiwat = 1000,
chunks_used = 366*, fds_system_imposed = 1048576, fds_hard_limit =
943718, fds_hiwat = 629145,
fds_lowat = 0, futility = 0, per_lane_work = 50, biggest_window =
419430, prev_fd_count = 4, prev_time = 1582628134, fd_state = 0}/
Thanks,
Madhu Thorat.