Hi Daniel
Thanks for looking into this. I hadn't installed ceph-debuginfo, here's the
bt with line numbers:
#0  operator uint64_t (this=0x10) at
/usr/src/debug/ceph-14.2.2/src/include/object.h:123
#1  Client::fill_statx (this=this@entry=0x274b980, in=0x0, mask=mask@entry=341,
stx=stx@entry=0x7fccdbefa210) at
/usr/src/debug/ceph-14.2.2/src/client/Client.cc:7336
#2  0x00007fce4ea1d4ca in fill_statx (stx=0x7fccdbefa210, mask=341, in=...,
this=0x274b980) at /usr/src/debug/ceph-14.2.2/src/client/Client.h:898
#3  Client::_readdir_cache_cb (this=this@entry=0x274b980, dirp=dirp@entry
=0x7fcb7d0e7860,
    cb=cb@entry=0x7fce4e9d0950 <_readdir_single_dirent_cb(void*, dirent*,
ceph_statx*, off_t, Inode*)>, p=p@entry=0x7fccdbefa6a0, caps=caps@entry=341,
    getref=getref@entry=true) at
/usr/src/debug/ceph-14.2.2/src/client/Client.cc:7999
#4  0x00007fce4ea1e865 in Client::readdir_r_cb (this=0x274b980,
d=0x7fcb7d0e7860,
    cb=cb@entry=0x7fce4e9d0950 <_readdir_single_dirent_cb(void*, dirent*,
ceph_statx*, off_t, Inode*)>, p=p@entry=0x7fccdbefa6a0, want=want@entry
=1775,
    flags=flags@entry=0, getref=true) at
/usr/src/debug/ceph-14.2.2/src/client/Client.cc:8138
#5  0x00007fce4ea1f3dd in Client::readdirplus_r (this=<optimized out>,
d=<optimized out>, de=de@entry=0x7fccdbefa8c0, stx=stx@entry=0x7fccdbefa730,
want=want@entry=1775,
    flags=flags@entry=0, out=0x7fccdbefa720) at
/usr/src/debug/ceph-14.2.2/src/client/Client.cc:8307
#6  0x00007fce4e9c92d8 in ceph_readdirplus_r (cmount=<optimized out>,
dirp=<optimized out>, de=de@entry=0x7fccdbefa8c0, stx=stx@entry
=0x7fccdbefa730,
    want=want@entry=1775, flags=flags@entry=0, out=out@entry=0x7fccdbefa720)
at /usr/src/debug/ceph-14.2.2/src/libcephfs.cc:629
#7  0x00007fce4ece7b0e in fsal_ceph_readdirplus (dir=<optimized out>,
cred=<optimized out>, out=0x7fccdbefa720, flags=0, want=1775,
stx=0x7fccdbefa730, de=0x7fccdbefa8c0,
    dirp=<optimized out>, cmount=<optimized out>) at
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/FSAL_CEPH/statx_compat.h:314
#8  ceph_fsal_readdir (dir_pub=<optimized out>, whence=<optimized out>,
dir_state=0x7fccdbefaa30, cb=0x522640 <mdc_readdir_uncached_cb>,
attrmask=122830,
    eof=0x7fccdbefac0b) at
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/FSAL_CEPH/handle.c:211
#9  0x00000000005256e1 in mdcache_readdir_uncached
(directory=directory@entry=0x7fcaa8bb84a0, whence=<optimized out>,
dir_state=<optimized out>, cb=<optimized out>,
    attrmask=<optimized out>, eod_met=<optimized out>) at
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:1654
#10 0x0000000000517a88 in mdcache_readdir (dir_hdl=0x7fcaa8bb84d8,
whence=0x7fccdbefab18, dir_state=0x7fccdbefab30, cb=0x432db0
<populate_dirent>, attrmask=122830,
    eod_met=0x7fccdbefac0b) at
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:551
#11 0x000000000043434a in fsal_readdir
(directory=directory@entry=0x7fcaa8bb84d8,
cookie=cookie@entry=0, nbfound=nbfound@entry=0x7fccdbefac0c,
    eod_met=eod_met@entry=0x7fccdbefac0b, attrmask=122830, cb=cb@entry=0x46f600
<nfs4_readdir_callback>, opaque=opaque@entry=0x7fccdbefac20)
    at /usr/src/debug/nfs-ganesha-2.7.3/FSAL/fsal_helper.c:1164
#12 0x00000000004705b9 in nfs4_op_readdir (op=0x7fcb7fed1f80,
data=0x7fccdbefaea0, resp=0x7fcb7d106c40)
    at /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_op_readdir.c:664
#13 0x000000000045d120 in nfs4_Compound (arg=<optimized out>,
req=<optimized out>, res=0x7fcb7e001000)
    at /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_Compound.c:942
#14 0x00000000004512cd in nfs_rpc_process_request (reqdata=0x7fcb7e1d1950)
at /usr/src/debug/nfs-ganesha-2.7.3/MainNFSD/nfs_worker_thread.c:1328
#15 0x0000000000450766 in nfs_rpc_decode_request (xprt=0x7fcaf17fb0e0,
xdrs=0x7fcb7e1ddb90) at
/usr/src/debug/nfs-ganesha-2.7.3/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
#16 0x00007fce6165707d in svc_rqst_xprt_task (wpe=0x7fcaf17fb2f8) at
/usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:769
#17 0x00007fce6165759a in svc_rqst_epoll_events (n_events=<optimized out>,
sr_rec=0x56a24c0) at
/usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:941
#18 svc_rqst_epoll_loop (sr_rec=<optimized out>) at
/usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:1014
#19 svc_rqst_run_task (wpe=0x56a24c0) at
/usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:1050
#20 0x00007fce6165f123 in work_pool_thread (arg=0x7fcd381c77b0) at
/usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/work_pool.c:181
#21 0x00007fce5fc17dd5 in start_thread (arg=0x7fccdbefe700) at
pthread_create.c:307
#22 0x00007fce5ed8eead in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:111
On Mon, Oct 7, 2019 at 3:40 PM Daniel Gryniewicz <dang(a)redhat.com> wrote:
 Client::fill_statx() is a fairly large function, so it's hard to
know
 what's causing the crash.  Can you get line numbers from your backtrace?
 Daniel
 On 10/7/19 9:59 AM, David C wrote:
 > Hi All
 >
 > Further to my previous messages, I upgraded
 > to libcephfs2-14.2.2-0.el7.x86_64 as suggested and things certainly seem
 > a lot more stable, I have had some crashes though, could someone assist
 > in debugging this latest crash please?
 >
 > (gdb) bt
 > #0  0x00007fce4e9fc1bb in Client::fill_statx(Inode*, unsigned int,
 > ceph_statx*) () from /lib64/libcephfs.so.2
 > #1  0x00007fce4ea1d4ca in Client::_readdir_cache_cb(dir_result_t*, int
 > (*)(void*, dirent*, ceph_statx*, long, Inode*), void*, int, bool) ()
 > from /lib64/libcephfs.so.2
 > #2  0x00007fce4ea1e865 in Client::readdir_r_cb(dir_result_t*, int
 > (*)(void*, dirent*, ceph_statx*, long, Inode*), void*, unsigned int,
 > unsigned int, bool) () from /lib64/libcephfs.so.2
 > #3  0x00007fce4ea1f3dd in Client::readdirplus_r(dir_result_t*, dirent*,
 > ceph_statx*, unsigned int, unsigned int, Inode**) () from
 > /lib64/libcephfs.so.2
 > #4  0x00007fce4ece7b0e in fsal_ceph_readdirplus (dir=<optimized out>,
 > cred=<optimized out>, out=0x7fccdbefa720, flags=0, want=1775,
 > stx=0x7fccdbefa730, de=0x7fccdbefa8c0, dirp=<optimized out>,
 > cmount=<optimized out>)
 >      at
 /usr/src/debug/nfs-ganesha-2.7.3/FSAL/FSAL_CEPH/statx_compat.h:314
 > #5  ceph_fsal_readdir (dir_pub=<optimized out>, whence=<optimized out>,
 > dir_state=0x7fccdbefaa30, cb=0x522640 <mdc_readdir_uncached_cb>,
 > attrmask=122830, eof=0x7fccdbefac0b) at
 > /usr/src/debug/nfs-ganesha-2.7.3/FSAL/FSAL_CEPH/handle.c:211
 > #6  0x00000000005256e1 in mdcache_readdir_uncached
 > (directory=directory@entry=0x7fcaa8bb84a0, whence=<optimized out>,
 > dir_state=<optimized out>, cb=<optimized out>, attrmask=<optimized
out>,
 > eod_met=<optimized out>)
 >      at
 >
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:1654
 > #7  0x0000000000517a88 in mdcache_readdir (dir_hdl=0x7fcaa8bb84d8,
 > whence=0x7fccdbefab18, dir_state=0x7fccdbefab30, cb=0x432db0
 > <populate_dirent>, attrmask=122830, eod_met=0x7fccdbefac0b) at
 >
 /usr/src/debug/nfs-ganesha-2.7.3/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:551
 > #8  0x000000000043434a in fsal_readdir
 > (directory=directory@entry=0x7fcaa8bb84d8, cookie=cookie@entry=0,
 > nbfound=nbfound@entry=0x7fccdbefac0c,
 > eod_met=eod_met@entry=0x7fccdbefac0b, attrmask=122830,
 > cb=cb@entry=0x46f600 <nfs4_readdir_callback>,
 > opaque=opaque@entry=0x7fccdbefac20)
 >      at /usr/src/debug/nfs-ganesha-2.7.3/FSAL/fsal_helper.c:1164
 > #9  0x00000000004705b9 in nfs4_op_readdir (op=0x7fcb7fed1f80,
 > data=0x7fccdbefaea0, resp=0x7fcb7d106c40) at
 > /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_op_readdir.c:664
 > #10 0x000000000045d120 in nfs4_Compound (arg=<optimized out>,
 > req=<optimized out>, res=0x7fcb7e001000) at
 > /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_Compound.c:942
 > #11 0x00000000004512cd in nfs_rpc_process_request
 > (reqdata=0x7fcb7e1d1950) at
 > /usr/src/debug/nfs-ganesha-2.7.3/MainNFSD/nfs_worker_thread.c:1328
 > #12 0x0000000000450766 in nfs_rpc_decode_request (xprt=0x7fcaf17fb0e0,
 > xdrs=0x7fcb7e1ddb90) at
 >
 /usr/src/debug/nfs-ganesha-2.7.3/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
 > #13 0x00007fce6165707d in svc_rqst_xprt_task (wpe=0x7fcaf17fb2f8) at
 > /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:769
 > #14 0x00007fce6165759a in svc_rqst_epoll_events (n_events=<optimized
 > out>, sr_rec=0x56a24c0) at
 > /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:941
 > #15 svc_rqst_epoll_loop (sr_rec=<optimized out>) at
 > /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:1014
 > #16 svc_rqst_run_task (wpe=0x56a24c0) at
 > /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:1050
 > #17 0x00007fce6165f123 in work_pool_thread (arg=0x7fcd381c77b0) at
 > /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/work_pool.c:181
 > #18 0x00007fce5fc17dd5 in start_thread () from /lib64/libpthread.so.0
 > #19 0x00007fce5ed8eead in clone () from /lib64/libc.so.6
 >
 > Package versions:
 >
 > nfs-ganesha-vfs-2.7.3-0.1.el7.x86_64
 > nfs-ganesha-debuginfo-2.7.3-0.1.el7.x86_64
 > nfs-ganesha-ceph-2.7.3-0.1.el7.x86_64
 > nfs-ganesha-2.7.3-0.1.el7.x86_64
 > libcephfs2-14.2.2-0.el7.x86_64
 > librados2-14.2.2-0.el7.x86_64
 >
 > Ganesha export:
 >
 > EXPORT
 > {
 >      Export_ID=100;
 >      Protocols = 4;
 >      Transports = TCP;
 >      Path = /;
 >      Pseudo = /ceph/;
 >      Access_Type = RW;
 >      Attr_Expiration_Time = 0;
 >      Disable_ACL = FALSE;
 >      Manage_Gids = TRUE;
 >      Filesystem_Id = 100.1;
 >      FSAL {
 >          Name = CEPH;
 >      }
 > }
 >
 > Ceph.conf:
 >
 > [client]
 >          mon host = --removed--
 >          client_oc_size = 6291456000 #6GB
 >          client_acl_type=posix_acl
 >          client_quota = true
 >          client_quota_df = true
 >
 > Client mount options:
 >
 >
rw,relatime,vers=4.1,rsize=1048576,wsize=1048576,namlen=255,soft,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=removed,local_lock=none,addr=removed)
 >
 > On Fri, Jul 19, 2019 at 5:47 PM David C <dcsysengineer(a)gmail.com
 > <mailto:dcsysengineer@gmail.com>> wrote:
 >
 >     Thanks, Jeff. I'll give 14.2.2 a go when it's released.
 >
 >     On Wed, 17 Jul 2019, 22:29 Jeff Layton, <jlayton(a)poochiereds.net
 >     <mailto:jlayton@poochiereds.net>> wrote:
 >
 >         Ahh, I just noticed you were running nautilus on the client
 >         side. This
 >         patch went into v14.2.2, so once you update to that you should
 >         be good
 >         to go.
 >
 >         -- Jeff
 >
 >         On Wed, 2019-07-17 at 17:10 -0400, Jeff Layton wrote:
 >          > This is almost certainly the same bug that is fixed here:
 >          >
 >          > 
https://github.com/ceph/ceph/pull/28324
 >          >
 >          > It should get backported soon-ish but I'm not sure which
 luminous
 >          > release it'll show up in.
 >          >
 >          > Cheers,
 >          > Jeff
 >          >
 >          > On Wed, 2019-07-17 at 10:36 +0100, David C wrote:
 >          > > Thanks for taking a look at this, Daniel. Below is the only
 >         interesting bit from the Ceph MDS log at the time of the crash
 >         but I suspect the slow requests are a result of the Ganesha
 >         crash rather than the cause of it. Copying the Ceph list in case
 >         anyone has any ideas.
 >          > >
 >          > > 2019-07-15 15:06:54.624007 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : 6 slow requests, 5 included
 >         below; oldest blocked for > 34.588509 secs
 >          > > 2019-07-15 15:06:54.624017 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 33.113514 seconds
 >         old, received at 2019-07-15 15:06:21.510423:
 >         client_request(client.16140784:5571174 setattr mtime=2019-07-15
 >         14:59:45.642408 #0x10009079cfb 2019-07
 >          > > -15 14:59:45.642408 caller_uid=1161, caller_gid=1131{})
 >         currently failed to xlock, waiting
 >          > > 2019-07-15 15:06:54.624020 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 34.588509 seconds
 >         old, received at 2019-07-15 15:06:20.035428:
 >         client_request(client.16129440:1067288 create
 >         #0x1000907442e/filePathEditorRegistryPrefs.melDXAtss 201
 >          > > 9-07-15 14:59:53.694087 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,35
 >          > > 22,3520,3523,}) currently failed to wrlock, waiting
 >          > > 2019-07-15 15:06:54.624025 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 34.583918 seconds
 >         old, received at 2019-07-15 15:06:20.040019:
 >         client_request(client.16140784:5570551 getattr pAsLsXsFs
 >         #0x1000907443b 2019-07-15 14:59:44.171408 cal
 >          > > ler_uid=1161, caller_gid=1131{}) currently failed to
 >         rdlock, waiting
 >          > > 2019-07-15 15:06:54.624028 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 34.580632 seconds
 >         old, received at 2019-07-15 15:06:20.043305:
 >         client_request(client.16129440:1067293 unlink
 >         #0x1000907442e/filePathEditorRegistryPrefs.melcdzxxc 201
 >          > > 9-07-15 14:59:53.701964 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,35
 >          > > 22,3520,3523,}) currently failed to wrlock, waiting
 >          > > 2019-07-15 15:06:54.624032 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 34.538332 seconds
 >         old, received at 2019-07-15 15:06:20.085605:
 >         client_request(client.16129440:1067308 create
 >         #0x1000907442e/filePathEditorRegistryPrefs.melHHljMk 201
 >          > > 9-07-15 14:59:53.744266 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,3522,3520,3523,})
 >         currently failed to wrlock, waiting
 >          > > 2019-07-15 15:06:55.014073 7f5fdcdc0700  1 mds.mds01
 >         Updating MDS map to version 68166 from mon.2
 >          > > 2019-07-15 15:06:59.624041 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : 7 slow requests, 2 included
 >         below; oldest blocked for > 39.588571 secs
 >          > > 2019-07-15 15:06:59.624048 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 30.495843 seconds
 >         old, received at 2019-07-15 15:06:29.128156:
 >         client_request(client.16129440:1072227 create
 >         #0x1000907442e/filePathEditorRegistryPrefs.mel58AQSv 2019-07-15
 >         15:00:02.786754 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,3522,3520,3523,})
 >         currently failed to wrlock, waiting
 >          > > 2019-07-15 15:06:59.624053 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 39.432848 seconds
 >         old, received at 2019-07-15 15:06:20.191151:
 >         client_request(client.16140784:5570649 mknod
 >         #0x1000907442e/filePathEditorRegistryPrefs.mel3HZLNE 2019-07-15
 >         14:59:44.322408 caller_uid=1161, caller_gid=1131{}) currently
 >         failed to wrlock, waiting
 >          > > 2019-07-15 15:07:03.014108 7f5fdcdc0700  1 mds.mds01
 >         Updating MDS map to version 68167 from mon.2
 >          > > 2019-07-15 15:07:04.624096 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : 8 slow requests, 1 included
 >         below; oldest blocked for > 44.588632 secs
 >          > > 2019-07-15 15:07:04.624103 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 34.904077 seconds
 >         old, received at 2019-07-15 15:06:29.719983:
 >         client_request(client.16129440:1072228 getattr pAsLsXsFs
 >         #0x1000907443b 2019-07-15 15:00:03.378512 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,3522,3520,3523,})
 >         currently failed to rdlock, waiting
 >          > > 2019-07-15 15:07:07.013972 7f5fdcdc0700  1 mds.mds01
 >         Updating MDS map to version 68168 from mon.2
 >          > > 2019-07-15 15:07:09.624166 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : 10 slow requests, 2 included
 >         below; oldest blocked for > 49.588693 secs
 >          > > 2019-07-15 15:07:09.624173 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 32.689838 seconds
 >         old, received at 2019-07-15 15:06:36.934283:
 >         client_request(client.16129440:1072271 getattr pAsLsXsFs
 >         #0x1000907443b 2019-07-15 15:00:10.592734 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,3522,3520,3523,})
 >         currently failed to rdlock, waiting
 >          > > 2019-07-15 15:07:09.624177 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 34.962719 seconds
 >         old, received at 2019-07-15 15:06:34.661402:
 >         client_request(client.16129440:1072256 getattr pAsLsXsFs
 >         #0x1000907443b 2019-07-15 15:00:08.319912 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,3522,3520,3523,})
 >         currently failed to rdlock, waiting
 >          > > 2019-07-15 15:07:11.519928 7f5fdcdc0700  1 mds.mds01
 >         Updating MDS map to version 68169 from mon.2
 >          > > 2019-07-15 15:07:19.624272 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : 11 slow requests, 1 included
 >         below; oldest blocked for > 59.588812 secs
 >          > > 2019-07-15 15:07:19.624278 7f5fda5bb700  0
 >         log_channel(cluster) log [WRN] : slow request 32.164260 seconds
 >         old, received at 2019-07-15 15:06:47.459980:
 >         client_request(client.16129440:1072326 getattr pAsLsXsFs
 >         #0x1000907443b 2019-07-15 15:00:21.118372 caller_uid=1161,
 >
 
caller_gid=1131{1131,4121,2330,2683,4115,2322,2779,2979,1503,3511,2783,2707,2942,2980,2258,2829,1238,1237,2793,1235,1249,2097,1154,2982,2983,3860,4101,1208,3638,3641,3644,3640,3643,3639,3642,3822,3945,4045,3521,3522,3520,3523,})
 >         currently failed to rdlock, waiting
 >          > >
 >          > >
 >          > > On Tue, Jul 16, 2019 at 1:18 PM Daniel Gryniewicz
 >         <dang(a)redhat.com <mailto:dang@redhat.com>> wrote:
 >          > > > This is not one I've seen before, and a quick look at
the
 >         code looks
 >          > > > strange.  The only assert in that bit is asserting the
 >         parent is a
 >          > > > directory, but the parent directory is not something that
 >         was passed in
 >          > > > by Ganesha, but rather something that was looked up
 >         internally in
 >          > > > libcephfs.  This is beyond my expertise, at this point.
 >         Maybe some ceph
 >          > > > logs would help?
 >          > > >
 >          > > > Daniel
 >          > > >
 >          > > > On 7/15/19 10:54 AM, David C wrote:
 >          > > > > This list has been deprecated. Please subscribe to the
 >         new devel list at 
lists.nfs-ganesha.org
 >         <
http://lists.nfs-ganesha.org>.
 >          > > > >
 >          > > > >
 >          > > > > Hi All
 >          > > > >
 >          > > > > I'm running 2.7.3 using the CEPH FSAL to export
CephFS
 >         (Luminous), it
 >          > > > > ran well for a few days and crashed. I have a
coredump,
 >         could someone
 >          > > > > assist me in debugging this please?
 >          > > > >
 >          > > > > (gdb) bt
 >          > > > > #0  0x00007f04dcab6207 in raise () from
/lib64/libc.so.6
 >          > > > > #1  0x00007f04dcab78f8 in abort () from
/lib64/libc.so.6
 >          > > > > #2  0x00007f04d2a9d6c5 in
ceph::__ceph_assert_fail(char
 >         const*, char
 >          > > > > const*, int, char const*) () from
 >         /usr/lib64/ceph/libceph-common.so.0
 >          > > > > #3  0x00007f04d2a9d844 in
 >         ceph::__ceph_assert_fail(ceph::assert_data
 >          > > > > const&) () from
/usr/lib64/ceph/libceph-common.so.0
 >          > > > > #4  0x00007f04cc807f04 in Client::_lookup_name(Inode*,
 >         Inode*, UserPerm
 >          > > > > const&) () from /lib64/libcephfs.so.2
 >          > > > > #5  0x00007f04cc81c41f in
 >         Client::ll_lookup_inode(inodeno_t, UserPerm
 >          > > > > const&, Inode**) () from /lib64/libcephfs.so.2
 >          > > > > #6  0x00007f04ccadbf0e in create_handle
 >         (export_pub=0x1baff10,
 >          > > > > desc=<optimized out>, pub_handle=0x7f0470fd4718,
 >          > > > > attrs_out=0x7f0470fd4740) at
 >          > > > >
 >         /usr/src/debug/nfs-ganesha-2.7.3/FSAL/FSAL_CEPH/export.c:256
 >          > > > > #7  0x0000000000523895 in mdcache_locate_host
 >         (fh_desc=0x7f0470fd4920,
 >          > > > > export=export@entry=0x1bafbf0,
 >         entry=entry@entry=0x7f0470fd48b8,
 >          > > > > attrs_out=attrs_out@entry=0x0)
 >          > > > >      at
 >          > > > >
 >
 
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:1011
 >          > > > > #8  0x000000000051d278 in mdcache_create_handle
 >         (exp_hdl=0x1bafbf0,
 >          > > > > fh_desc=<optimized out>, handle=0x7f0470fd4900,
 >         attrs_out=0x0) at
 >          > > > >
 >
 
/usr/src/debug/nfs-ganesha-2.7.3/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:1578
 >          > > > > #9  0x000000000046d404 in nfs4_mds_putfh
 >          > > > > (data=data@entry=0x7f0470fd4ea0) at
 >          > > > >
 >
  /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_op_putfh.c:211
 >          > > > > #10 0x000000000046d8e8 in nfs4_op_putfh
 >         (op=0x7f03effaf1d0,
 >          > > > > data=0x7f0470fd4ea0, resp=0x7f03ec1de1f0) at
 >          > > > >
 >
  /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_op_putfh.c:281
 >          > > > > #11 0x000000000045d120 in nfs4_Compound
(arg=<optimized
 >         out>,
 >          > > > > req=<optimized out>, res=0x7f03ec1de9d0) at
 >          > > > >
 >
  /usr/src/debug/nfs-ganesha-2.7.3/Protocols/NFS/nfs4_Compound.c:942
 >          > > > > #12 0x00000000004512cd in nfs_rpc_process_request
 >          > > > > (reqdata=0x7f03ee5ed4b0) at
 >          > > > >
 >
  /usr/src/debug/nfs-ganesha-2.7.3/MainNFSD/nfs_worker_thread.c:1328
 >          > > > > #13 0x0000000000450766 in nfs_rpc_decode_request
 >         (xprt=0x7f02180c2320,
 >          > > > > xdrs=0x7f03ec568ab0) at
 >          > > > >
 >
  /usr/src/debug/nfs-ganesha-2.7.3/MainNFSD/nfs_rpc_dispatcher_thread.c:1345
 >          > > > > #14 0x00007f04df45d07d in svc_rqst_xprt_task
 >         (wpe=0x7f02180c2538) at
 >          > > > >
 >         /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:769
 >          > > > > #15 0x00007f04df45d59a in svc_rqst_epoll_events
 >         (n_events=<optimized
 >          > > > > out>, sr_rec=0x4bb53e0) at
 >          > > > >
 >         /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:941
 >          > > > > #16 svc_rqst_epoll_loop (sr_rec=<optimized out>)
at
 >          > > > >
 >         /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:1014
 >          > > > > #17 svc_rqst_run_task (wpe=0x4bb53e0) at
 >          > > > >
 >         /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/svc_rqst.c:1050
 >          > > > > #18 0x00007f04df465123 in work_pool_thread
 >         (arg=0x7f044c0008c0) at
 >          > > > >
 >         /usr/src/debug/nfs-ganesha-2.7.3/libntirpc/src/work_pool.c:181
 >          > > > > #19 0x00007f04dda05dd5 in start_thread () from
 >         /lib64/libpthread.so.0
 >          > > > > #20 0x00007f04dcb7dead in clone () from
/lib64/libc.so.6
 >          > > > >
 >          > > > > Package versions:
 >          > > > >
 >          > > > > nfs-ganesha-2.7.3-0.1.el7.x86_64
 >          > > > > nfs-ganesha-ceph-2.7.3-0.1.el7.x86_64
 >          > > > > libcephfs2-14.2.1-0.el7.x86_64
 >          > > > > librados2-14.2.1-0.el7.x86_64
 >          > > > >
 >          > > > > I notice in my Ceph log I have a bunch of slow
requests
 >         around the time
 >          > > > > it went down, I'm not sure if it's a symptom of
Ganesha
 >         segfaulting or
 >          > > > > if it was a contributing factor.
 >          > > > >
 >          > > > > Thanks,
 >          > > > > David
 >          > > > >
 >          > > > >
 >          > > > > _______________________________________________
 >          > > > > Nfs-ganesha-devel mailing list
 >          > > > > Nfs-ganesha-devel(a)lists.sourceforge.net
 >         <mailto:Nfs-ganesha-devel@lists.sourceforge.net>
 >          > > > >
 >         
https://lists.sourceforge.net/lists/listinfo/nfs-ganesha-devel
 >          > > > >
 >          > >
 >          > > _______________________________________________
 >          > > ceph-users mailing list
 >          > > ceph-users(a)lists.ceph.com <mailto:ceph-users@lists.ceph.com
 >
 >          > > 
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
 >         --
 >         Jeff Layton <jlayton(a)poochiereds.net
 >         <mailto:jlayton@poochiereds.net>>
 >