We are seeing crash in svc_release_it().
We are using ganesha V2.7.3 and have taken some recent patches for ntirpc which includes the patch to avoid leaking FDs "Mark ANYFD clients as local clients".
We are seeing the following crash repeatedly. Please check the backtrace below. Can there be a chance of having the same 'xprt' getting destroyed twice in clnt_vc_destroy()? Could this crash be related to it?
(gdb) bt
#0 0x00003fffaed59614 in __lseek_nocancel () from /lib64/libpthread.so.0
#1 0x0000000010056bcc in crash_handler (signo=11, info=0x3ffd54ffd8e8, ctx=0x3ffd54ffcb70) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/MainNFSD/nfs_init.c:243
#2 <signal handler called>
#3 0x00003fffaf2429c0 in svc_release_it (xprt=0x3ffc9c0cb630, flags=0, tag=0x3fffaf256180 <__func__.8741> "svc_ioq_write", line=233)
at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/libntirpc/ntirpc/rpc/svc.h:433
#4 0x00003fffaf243288 in svc_ioq_write (xprt=0x3ffc9c0cb630, xioq=0x3ffc9c0b8a30, ifph=0x10017c2c340) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/libntirpc/src/svc_ioq.c:233
#5 0x00003fffaf243444 in svc_ioq_write_callback (wpe=0x3ffc9c0b8a98) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/libntirpc/src/svc_ioq.c:257
#6 0x00003fffaf24411c in work_pool_thread (arg=0x3ffdf011e5d0) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/libntirpc/src/work_pool.c:181
#7 0x00003fffaed4c95c in .__make_stacks_executable () from /lib64/libpthread.so.0
#8 0x0000000000000000 in ?? ()
(gdb) f 3
#3 0x00003fffaf2429c0 in svc_release_it (xprt=0x3ffc9c0cb630, flags=0, tag=0x3fffaf256180 <__func__.8741> "svc_ioq_write", line=233)
at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/libntirpc/ntirpc/rpc/svc.h:433
433 (*(xprt)->xp_ops->xp_destroy)(xprt, flags, tag, line);
(gdb) p *xprt
$1 = {xp_ops = 0x3ffc9c0f8970, xp_dispatch = {process_cb = 0x3ffc9c000078, rendezvous_cb = 0x3ffc9c000078}, xp_parent = 0x0, xp_tp = 0x0, xp_netid = 0x3ffc9c03cd60 "", xp_p1 = 0x0,
xp_p2 = 0x0, xp_p3 = 0x0, xp_u1 = 0x0, xp_u2 = 0x0, xp_local = {nb = {maxlen = 128, len = 28, buf = 0x3ffc9c0cb690}, ss = {ss_family = 10,
__ss_padding = "º)", '\000' <repeats 28 times>, "ÿÿÿÿ", '\000' <repeats 23 times>, "\001\000\000\000\000ÿÿÿÿÿÿÿÿ", '\000' <repeats 47 times>, __ss_align = 0}}, xp_remote = {
nb = {maxlen = 0, len = 0, buf = 0x0}, ss = {ss_family = 0, __ss_padding = '\000' <repeats 117 times>, __ss_align = 0}}, xp_lock = {__data = {__lock = 0, __count = 0,
__owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}, xp_fd = 0,
xp_ifindex = 0, xp_si_type = 0, xp_type = 0, xp_refcnt = 0, xp_flags = 64}
(gdb) p *(xprt)->xp_ops->xp_destroy
Cannot access memory at address 0x0
(gdb) info locals
refs = 0
xp_flags = 0
__func__ = "svc_release_it"
(gdb) l -
423 /* enforce once-only semantic, trace others */
424 xp_flags = atomic_postset_uint16_t_bits(&xprt->xp_flags,
425 SVC_XPRT_FLAG_RELEASING);
426 if (xp_flags & SVC_XPRT_FLAG_RELEASING) {
427 XPRT_TRACE(xprt, "WARNING! already destroying!", tag, line);
428 return;
429 }
430
431 /* Releasing last reference */
432 (*(xprt)->xp_ops->xp_destroy)(xprt, flags, tag, line);
(gdb) f 4
#4 0x00003fffaf243288 in svc_ioq_write (xprt=0x3ffc9c0cb630, xioq=0x3ffc9c0b8a30, ifph=0x10017c2c340) at /usr/src/debug/nfs-ganesha-2.7.3-ibm050.05/libntirpc/src/svc_ioq.c:233
233 SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE);