Hi All,
A customer reported a crash in state_nfs4_state_wipe (..). The same crash was seen twice. It is happening when looping over to delete any SHARE state after we are done with looping over to delete the LOCK states.
Has anybody seen this kind of crash ? We are using ganesha 2.7.6 and don't have new patches after Jun 2019. Any recent patches that may help to fix the crash ?
I am not able to recreate this crash, any suggestions to have a test which may help to recreate this crash ?
Question related to state_nfs4_state_wipe() code: Here in the first loop is there a possibility of coming across a state with state_type as STATE_TYPE_NONE ? If yes, then the state with state_type as STATE_TYPE_NONE will be removed in the first loop, is that fine ?
644 void state_nfs4_state_wipe(struct state_hdl *ostate)
645 {
....
652 glist_for_each_safe(glist, glistn, &ostate->file.list_of_states) {
653 state = glist_entry(glist, state_t, state_list);
654 if (state->state_type > STATE_TYPE_LAYOUT)
655 continue;
656 /* Skip STATE_TYPE_SHARE
657 * It must be deleted after all the related LOCK states
658 */
659 if (state->state_type == STATE_TYPE_SHARE)
660 continue;
661 state_del_locked(state); <-- is there a possibility of removing a state with type as STATE_TYPE_NONE here ?
662 }
663
664 /* Loop over again to delete any STATE_TYPE_SHARE */
665 glist_for_each_safe(glist, glistn, &ostate->file.list_of_states) { <-- crash happening here
666 state = glist_entry(glist, state_t, state_list);
....
672 }
Backtrace:
(gdb) bt
#0 0x00007f15bb3594bb in raise () from /lib64/libpthread.so.0
#1 0x0000000000443839 in crash_handler (signo=11, info=0x7f11dfaf7ef0, ctx=0x7f11dfaf7dc0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/MainNFSD/nfs_init.c:244
#2 <signal handler called>
#3 0x00000000004d719c in state_nfs4_state_wipe (ostate=0x7f125c27d2f0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/SAL/nfs4_state.c:652
#4 0x00000000004cc3e8 in state_wipe_file (obj=0x7f125c27d088) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/SAL/state_misc.c:1309
#5 0x0000000000542c59 in _mdcache_lru_unref (entry=0x7f125c27d050, flags=0, func=0x5bf4ef <__func__.22954> "mdcache_put", line=196)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:1979
#6 0x0000000000547c1d in mdcache_put (entry=0x7f125c27d050) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.h:196
#7 0x000000000054bcbd in mdcache_put_ref (obj_hdl=0x7f125c27d088) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:1285
#8 0x00000000004605bc in set_current_entry (data=0x7f11dfaf8df0, obj=0x0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/include/nfs_proto_data.h:363
#9 0x0000000000461f18 in compound_data_Free (data=0x7f11dfaf8df0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/Protocols/NFS/nfs4_Compound.c:1197
#10 0x0000000000461cbb in nfs4_Compound (arg=0x7f13e83b4fc8, req=0x7f13e83b48c0, res=0x7f13e80e4730)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/Protocols/NFS/nfs4_Compound.c:1109
#11 0x000000000045e3c6 in nfs_rpc_process_request (reqdata=0x7f13e83b48c0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/MainNFSD/nfs_worker_thread.c:1325
#12 0x000000000045eb0d in nfs_rpc_valid_NFS (req=0x7f13e83b48c0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/MainNFSD/nfs_worker_thread.c:1535
#13 0x00007f15bcdb433b in svc_vc_decode (req=0x7f13e83b48c0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/svc_vc.c:829
#14 0x00000000004515bc in nfs_rpc_decode_request (xprt=0x7f1588000c20, xdrs=0x7f13e83a5900)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
#15 0x00007f15bcdb424c in svc_vc_recv (xprt=0x7f1588000c20) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/svc_vc.c:802
#16 0x00007f15bcdb0939 in svc_rqst_xprt_task (wpe=0x7f1588000e78) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/svc_rqst.c:769
#17 0x00007f15bcdb0d96 in svc_rqst_epoll_events (sr_rec=0x11da630, n_events=2) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/svc_rqst.c:941
#18 0x00007f15bcdb102b in svc_rqst_epoll_loop (sr_rec=0x11da630) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/svc_rqst.c:1014
#19 0x00007f15bcdb10de in svc_rqst_run_task (wpe=0x11da630) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/svc_rqst.c:1050
#20 0x00007f15bcdb9c5a in work_pool_thread (arg=0x7f1398045cd0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/libntirpc/src/work_pool.c:181
#21 0x00007f15bb351e65 in start_thread () from /lib64/libpthread.so.0
#22 0x00007f15bac5888d in clone () from /lib64/libc.so.6
(gdb) frame 3
#3 0x00000000004d719c in state_nfs4_state_wipe (ostate=0x7f125c27d2f0) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/SAL/nfs4_state.c:652
652 glist_for_each_safe(glist, glistn, &ostate->file.list_of_states) {
(gdb) l
647 continue;
648 state_del_locked(state);
649 }
650
651 /* Loop over again to delete any STATE_TYPE_SHARE */
652 glist_for_each_safe(glist, glistn, &ostate->file.list_of_states) {
653 state = glist_entry(glist, state_t, state_list);
654 if (state->state_type > STATE_TYPE_LAYOUT)
655 continue;
656 state_del_locked(state);
(gdb) p glist
$1 = (struct glist_head *) 0x0
(gdb) p glistn
$2 = (struct glist_head *) 0x0
(gdb) p &ostate->file.list_of_states
$3 = (struct glist_head *) 0x7f125c27d338
(gdb) p ostate->file.list_of_states
$4 = {next = 0x7f15702fa390, prev = 0x7f140824d670}
(gdb) p ostate->file.list_of_states.next
$5 = (struct glist_head *) 0x7f15702fa390
(gdb) p ostate->file.list_of_states.next.next
$6 = (struct glist_head *) 0x7f118829ee10
(gdb) p ostate->file.list_of_states.next.next.next
$7 = (struct glist_head *) 0x7f140824d670
(gdb) p ostate->file.list_of_states.next.next.next.next
$8 = (struct glist_head *) 0x7f125c27d338
(gdb) p state
$4 = (state_t *) 0x7f13e4335070
(gdb) p *state
$3 = {state_list = {next = 0x0, prev = 0x7b1}, state_owner_list = {next = 0x7f13e43253c0, prev = 0x7f13e40077d0}, state_export_list = {next = 0x0, prev = 0x0}, state_mutex = {
__data = {__lock = -467752592, __count = 32531, __owner = -468407056, __nusers = 32531, __kind = -995576587, __spins = 1994413512, __list = {
__prev = 0x7f15bcfd4cd0 <svc_auth_none>, __next = 0x0}},
__size = "p\251\036\344\023\177\000\000\360\254\024\344\023\177\000\000\365\264\250\304\310U\340v\320L\375\274\025\177\000\000\000\000\000\000\000\000\000",
__align = 139723408320880}, state_export = 0x0, state_owner = 0x1e2f32003, state_obj = 0x2, state_exp = 0x0, state_data = {share = {share_lockstates = {next = 0x0,
prev = 0x45c14b <xdr_COMPOUND4res>}, share_access = 3826560240, share_deny = 32531, share_access_prev = 0, share_deny_prev = 0}, nlm_share = {share_perclient = {
next = 0x0, prev = 0x45c14b <xdr_COMPOUND4res>}, share_access = 3826560240, share_deny = 32531, share_access_counts = {0, 0, 0, 0}, share_deny_counts = {0, 0, 0, 0}},
lock = {state_locklist = {next = 0x0, prev = 0x45c14b <xdr_COMPOUND4res>}, state_sharelist = {next = 0x7f13e414acf0, prev = 0x0}, openstate = 0x0}, deleg = {
sd_type = OPEN_DELEGATE_NONE, sd_state = (unknown: 0), sd_clfile_stats = {cfd_rs_time = 4571467, cfd_r_time = 139723407666416}}, layout = {state_segments = {next = 0x0,
prev = 0x45c14b <xdr_COMPOUND4res>}, state_layout_type = 3826560240, granting = 32531, state_return_on_close = false}, fid = {state_locklist = {next = 0x0,
prev = 0x45c14b <xdr_COMPOUND4res>}, share_access = 3826560240, share_deny = 32531}, io_advise = 0}, state_type = STATE_TYPE_NONE, state_seqid = 0, state_refcount = 0,
stateid_other = '\000' <repeats 11 times>, state_refer = {session = '\000' <repeats 15 times>, sequence = 0, slot = 0}}
(gdb) frame 4
#4 0x00000000004cc3e8 in state_wipe_file (obj=0x7f125c27d088) at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/SAL/state_misc.c:1309
1309 state_nfs4_state_wipe(obj->state_hdl);
(gdb) p *obj
$1 = {handles = {next = 0x7f12a41ca528, prev = 0x7f11c8425c48}, fs = 0x1207620, fsal = 0x800e20 <MDCACHE>, obj_ops = 0x800f78 <MDCACHE+344>, obj_lock = {__data = {__lock = 0,
__nr_readers = 0, __readers_wakeup = 0, __writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 0, __writer = 0, __shared = 0, __pad1 = 0, __pad2 = 0,
__flags = 0}, __size = '\000' <repeats 55 times>, __align = 0}, type = REGULAR_FILE, fsid = {major = 10817653282814806327, minor = 431}, fileid = 4861680837,
state_hdl = 0x7f125c27d2f0}
(gdb) p *(obj->state_hdl)
$9 = {state_lock = {__data = {__lock = 0, __nr_readers = 0, __readers_wakeup = 0, __writer_wakeup = 25, __nr_readers_queued = 0, __nr_writers_queued = 0, __writer = 3261,
__shared = 0, __pad1 = 0, __pad2 = 0, __flags = 0}, __size = '\000' <repeats 12 times>, "\031", '\000' <repeats 11 times>, "\275\f", '\000' <repeats 29 times>,
__align = 0}, no_cleanup = false, {file = {obj = 0x7f125c27d088, list_of_states = {next = 0x7f15702fa390, prev = 0x7f140824d670}, layoutrecall_list = {
next = 0x7f125c27d348, prev = 0x7f125c27d348}, lock_list = {next = 0x7f125c27d358, prev = 0x7f125c27d358}, nlm_share_list = {next = 0x7f125c27d368,
prev = 0x7f125c27d368}, write_delegated = false, fdeleg_stats = {fds_curr_delegations = 0, fds_deleg_type = OPEN_DELEGATE_NONE, fds_delegation_count = 0,
fds_recall_count = 0, fds_avg_hold = 0, fds_last_delegation = 0, fds_last_recall = 0, fds_num_opens = 0, fds_first_open = 0}, anon_ops = 0}, dir = {
junction_export = 0x7f125c27d088, export_roots = {next = 0x7f15702fa390, prev = 0x7f140824d670}, exp_root_refcount = 1546113864}}}
(gdb) frame 5
#5 0x0000000000542c59 in _mdcache_lru_unref (entry=0x7f125c27d050, flags=0, func=0x5bf4ef <__func__.22954> "mdcache_put", line=196)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm053.00/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_lru.c:1979
1979 state_wipe_file(&entry->obj_handle);
(gdb) p *entry
$2 = {attr_lock = {__data = {__lock = 0, __nr_readers = 0, __readers_wakeup = 19, __writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 0, __writer = 0,
__shared = 0, __pad1 = 0, __pad2 = 0, __flags = 0}, __size = "\000\000\000\000\000\000\000\000\023", '\000' <repeats 46 times>, __align = 0}, obj_handle = {handles = {
next = 0x7f12a41ca528, prev = 0x7f11c8425c48}, fs = 0x1207620, fsal = 0x800e20 <MDCACHE>, obj_ops = 0x800f78 <MDCACHE+344>, obj_lock = {__data = {__lock = 0,
__nr_readers = 0, __readers_wakeup = 0, __writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 0, __writer = 0, __shared = 0, __pad1 = 0, __pad2 = 0,
__flags = 0}, __size = '\000' <repeats 55 times>, __align = 0}, type = REGULAR_FILE, fsid = {major = 10817653282814806327, minor = 431}, fileid = 4861680837,
state_hdl = 0x7f125c27d2f0}, sub_handle = 0x7f125c1fdd60, attrs = {request_mask = 9223372036872986590, valid_mask = 385006, supported = 26599422, type = REGULAR_FILE,
filesize = 0, fsid = {major = 10817653282814806327, minor = 431}, acl = 0x7f15a0003340, fileid = 4861680837, mode = 420, numlinks = 0, owner = 25913, group = 5292,
rawdev = {major = 0, minor = 0}, atime = {tv_sec = 1573542827, tv_nsec = 616394000}, creation = {tv_sec = 0, tv_nsec = 0}, ctime = {tv_sec = 1573542827,
tv_nsec = 622900000}, mtime = {tv_sec = 1573542827, tv_nsec = 616394000}, chgtime = {tv_sec = 1573542827, tv_nsec = 622900000}, spaceused = 0, change = 2196442827,
generation = 0, expire_time_attr = 60, fs_locations = 0x0, sec_label = {slai_lfs = {lfs_lfs = 0, lfs_pi = 0}, slai_data = {slai_data_len = 0, slai_data_val = 0x0}}},
fh_hk = {node_k = {left = 0x0, right = 0x0, parent = 139719985754106}, key = {hk = 6903532094004211886, fsal = 0x7f15b48b6540 <GPFS>, kv = {addr = 0x7f125c16cd10, len = 40}},
inavl = false}, mde_flags = 3, attr_time = 1573542827, acl_time = 1573542827, fs_locations_time = 0, lru = {q = {next = 0x7f12a018a948, prev = 0x817c20 <LRU+39040>},
qid = LRU_ENTRY_CLEANUP, refcnt = 4, flags = 3, lane = 174, cf = 0}, export_list = {next = 0x7f125c251530, prev = 0x7f125c251530}, first_export_id = 9, content_lock = {
__data = {__lock = 0, __nr_readers = 0, __readers_wakeup = 0, __writer_wakeup = 0, __nr_readers_queued = 0, __nr_writers_queued = 0, __writer = 0, __shared = 0, __pad1 = 0,
__pad2 = 0, __flags = 0}, __size = '\000' <repeats 55 times>, __align = 0}, fsobj = {hdl = {state_lock = {__data = {__lock = 0, __nr_readers = 0, __readers_wakeup = 0,
__writer_wakeup = 25, __nr_readers_queued = 0, __nr_writers_queued = 0, __writer = 3261, __shared = 0, __pad1 = 0, __pad2 = 0, __flags = 0},
__size = '\000' <repeats 12 times>, "\031", '\000' <repeats 11 times>, "\275\f", '\000' <repeats 29 times>, __align = 0}, no_cleanup = false, {file = {
obj = 0x7f125c27d088, list_of_states = {next = 0x7f15702fa390, prev = 0x7f140824d670}, layoutrecall_list = {next = 0x7f125c27d348, prev = 0x7f125c27d348},
lock_list = {next = 0x7f125c27d358, prev = 0x7f125c27d358}, nlm_share_list = {next = 0x7f125c27d368, prev = 0x7f125c27d368}, write_delegated = false, fdeleg_stats = {
fds_curr_delegations = 0, fds_deleg_type = OPEN_DELEGATE_NONE, fds_delegation_count = 0, fds_recall_count = 0, fds_avg_hold = 0, fds_last_delegation = 0,
fds_last_recall = 0, fds_num_opens = 0, fds_first_open = 0}, anon_ops = 0}, dir = {junction_export = 0x7f125c27d088, export_roots = {next = 0x7f15702fa390,
prev = 0x7f140824d670}, exp_root_refcount = 1546113864}}}, fsdir = {chunks = {next = 0x0, prev = 0x1900000000}, detached = {next = 0x0, prev = 0xcbd}, spin = 0,
detached_count = 0, dhdl = {state_lock = {__data = {__lock = 0, __nr_readers = 0, __readers_wakeup = 0, __writer_wakeup = 0, __nr_readers_queued = 0,
__nr_writers_queued = 0, __writer = 1546113160, __shared = 32530, __pad1 = 139730053211024, __pad2 = 139724012705392, __flags = 1546113864},
__size = '\000' <repeats 24 times>, "\210\320'\\\022\177\000\000\220\243/p\025\177\000\000p\326$\b\024\177\000\000H\323'\\\022\177\000", __align = 0},
no_cleanup = 72, {file = {obj = 0x7f125c27d358, list_of_states = {next = 0x7f125c27d358, prev = 0x7f125c27d368}, layoutrecall_list = {next = 0x7f125c27d368,
prev = 0x0}, lock_list = {next = 0x0, prev = 0x0}, nlm_share_list = {next = 0x0, prev = 0x0}, write_delegated = false, fdeleg_stats = {fds_curr_delegations = 0,
fds_deleg_type = OPEN_DELEGATE_NONE, fds_delegation_count = 0, fds_recall_count = 0, fds_avg_hold = 0, fds_last_delegation = 0, fds_last_recall = 0,
fds_num_opens = 0, fds_first_open = 0}, anon_ops = 0}, dir = {junction_export = 0x7f125c27d358, export_roots = {next = 0x7f125c27d358, prev = 0x7f125c27d368},
exp_root_refcount = 1546113896}}}, parent = {addr = 0x0, len = 0}, parent_time = 0, first_ck = 0, avl = {t = {root = 0x0, cmp_fn = 0x0, height = 0, first = 0x0,
last = 0x0, size = 0}, ck = {root = 0x0, cmp_fn = 0x0, height = 0, first = 0x0, last = 0x0, size = 0}, sorted = {root = 0x0, cmp_fn = 0x0, height = 0, first = 0x0,
last = 0x0, size = 0}, collisions = 0}}}}