Hello,

 

We are seeing the following crash with NFS Ganesha 2.7.1 – we crash further in our fsal module’s lookup method trying to use the “name” which has an invalid pointer. 

 

This has the following 3 patches applied to NFS Ganesha 2.7.1

https://github.com/nfs-ganesha/nfs-ganesha/commit/654dd706d22663c6ae6029e0c8c5814fe0d6ff6a

 

https://github.com/nfs-ganesha/nfs-ganesha/commit/5dc6a70ed42275a4f6772b9802e79f23dc25fa73

 

The most recent patch to not return dead hash entries:

https://github.com/nfs-ganesha/nfs-ganesha/commit/25320e6544f6c5a045f20c51446f57c9dc036412

 

 

The workload:

Concurrent access from multiple threads. 1 thread continuously (in a loop) running python os.walk (i.e., readdir) of the entire filesystem, roughly ~5M files total. 5 more threads are writing a few thousand files each. When the writes complete, a single thread verifies written content, then deletes it. Then the writes repeat again.

 

This is the same workload that causes our OOM issue.

https://lists.nfs-ganesha.org/archives/list/devel@lists.nfs-ganesha.org/thread/A6BSM65DZKYRJY7QJL5ECGRPLTRCA2F2/

 

 

#5  0x00007f6acd6a1dca in foo_lookup (parent=0x6fe2e420, name=0xa0 <Address 0xa0 out of bounds>, 

    handle=0x7f6abf3255a8, attrs_out=0x7f6abf3254a0) at /opt/src/src/handle.c:364

#6  0x000000000053a7a6 in mdc_lookup_uncached (mdc_parent=0x291a4ba0, name=0xa0 <Address 0xa0 out of bounds>, 

    new_entry=0x7f6abf325728, attrs_out=0x0) at /src/src/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:1293

#7  0x0000000000541344 in mdcache_readdir_chunked (directory=0x291a4ba0, whence=0, dir_state=0x7f6abf325900, 

    cb=0x43217c <populate_dirent>, attrmask=122830, eod_met=0x7f6abf325ffb)

    at /src/src/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_helpers.c:3065

#8  0x000000000052e8c3 in mdcache_readdir (dir_hdl=0x291a4bd8, whence=0x7f6abf3258e0, dir_state=0x7f6abf325900, 

    cb=0x43217c <populate_dirent>, attrmask=122830, eod_met=0x7f6abf325ffb)

    at /src/src/FSAL/Stackable_FSALs/FSAL_MDCACHE/mdcache_handle.c:559

#9  0x0000000000432a76 in fsal_readdir (directory=0x291a4bd8, cookie=0, nbfound=0x7f6abf325ffc, 

    eod_met=0x7f6abf325ffb, attrmask=122830, cb=0x492018 <nfs3_readdirplus_callback>, opaque=0x7f6abf325fb0)

    at /src/src/FSAL/fsal_helper.c:1158

#10 0x0000000000491e71 in nfs3_readdirplus (arg=0x10704818, req=0x10704110, res=0x3103f090)

    at /src/src/Protocols/NFS/nfs3_readdirplus.c:310

#11 0x00000000004574d1 in nfs_rpc_process_request (reqdata=0x10704110) at /src/src/MainNFSD/nfs_worker_thread.c:1329

#12 0x0000000000457c90 in nfs_rpc_valid_NFS (req=0x10704110) at /src/src/MainNFSD/nfs_worker_thread.c:1549

#13 0x00007f6ad115ce75 in svc_vc_decode (req=0x10704110) at /src/src/libntirpc/src/svc_vc.c:825

---Type <return> to continue, or q <return> to quit---

#14 0x000000000044a688 in nfs_rpc_decode_request (xprt=0x18390200, xdrs=0x174943c0)

    at /src/src/MainNFSD/nfs_rpc_dispatcher_thread.c:1341

#15 0x00007f6ad115cd86 in svc_vc_recv (xprt=0x18390200) at /src/src/libntirpc/src/svc_vc.c:798

#16 0x00007f6ad11594d3 in svc_rqst_xprt_task (wpe=0x18390418) at /src/src/libntirpc/src/svc_rqst.c:767

#17 0x00007f6ad115994d in svc_rqst_epoll_events (sr_rec=0x2779260, n_events=1)

    at /src/src/libntirpc/src/svc_rqst.c:939

#18 0x00007f6ad1159be2 in svc_rqst_epoll_loop (sr_rec=0x2779260) at /src/src/libntirpc/src/svc_rqst.c:1012

#19 0x00007f6ad1159c95 in svc_rqst_run_task (wpe=0x2779260) at /src/src/libntirpc/src/svc_rqst.c:1048

#20 0x00007f6ad11625f6 in work_pool_thread (arg=0x3b5d170) at /src/src/libntirpc/src/work_pool.c:181

#21 0x00007f6ad0169de5 in start_thread () from /lib64/libpthread.so.0

#22 0x00007f6acfa71bad in clone () from /lib64/libc.so.6

(gdb) select-frame 7

(gdb) info locals

status = {major = ERR_FSAL_INVAL, minor = 0}

cb_result = DIR_CONTINUE

entry = 0x0

attrs = {request_mask = 122830, valid_mask = 1433550, supported = 1433582, type = REGULAR_FILE, filesize = 1024, 

  fsid = {major = 0, minor = 0}, acl = 0x0, fileid = 47680710, mode = 438, numlinks = 1, owner = 65534, 

  group = 65534, rawdev = {major = 0, minor = 0}, atime = {tv_sec = 1548784955, tv_nsec = 582000000}, creation = {

    tv_sec = 0, tv_nsec = 0}, ctime = {tv_sec = 1548784955, tv_nsec = 582000000}, mtime = {tv_sec = 1548784955, 

    tv_nsec = 582000000}, chgtime = {tv_sec = 1548784955, tv_nsec = 582000000}, spaceused = 1024, 

  change = 1548784955582, generation = 0, expire_time_attr = 60, fs_locations = 0x0}

dirent = 0x5296230

has_write = true

set_first_ck = false

next_ck = 2419507

look_ck = 2419507

chunk = 0x156d7240

first_pass = true

eod = false

reload_chunk = false

__func__ = "mdcache_readdir_chunked"

__PRETTY_FUNCTION__ = "mdcache_readdir_chunked"

(gdb) 

status = {major = ERR_FSAL_INVAL, minor = 0}

cb_result = DIR_CONTINUE

entry = 0x0

attrs = {request_mask = 122830, valid_mask = 1433550, supported = 1433582, type = REGULAR_FILE, filesize = 1024, 

  fsid = {major = 0, minor = 0}, acl = 0x0, fileid = 47680710, mode = 438, numlinks = 1, owner = 65534, 

  group = 65534, rawdev = {major = 0, minor = 0}, atime = {tv_sec = 1548784955, tv_nsec = 582000000}, creation = {

    tv_sec = 0, tv_nsec = 0}, ctime = {tv_sec = 1548784955, tv_nsec = 582000000}, mtime = {tv_sec = 1548784955, 

    tv_nsec = 582000000}, chgtime = {tv_sec = 1548784955, tv_nsec = 582000000}, spaceused = 1024, 

  change = 1548784955582, generation = 0, expire_time_attr = 60, fs_locations = 0x0}

dirent = 0x5296230

has_write = true

set_first_ck = false

next_ck = 2419507

look_ck = 2419507

chunk = 0x156d7240

first_pass = true

eod = false

reload_chunk = false

__func__ = "mdcache_readdir_chunked"

__PRETTY_FUNCTION__ = "mdcache_readdir_chunked"

(gdb) print *dirent

$1 = {chunk_list = {next = 0x0, prev = 0xa1}, chunk = 0x3018770, node_name = {

    left = 0x7f6acfd39848 <main_arena+232>, right = 0x0, parent = 0}, node_ck = {left = 0x5296258, right = 0x0, 

    parent = 0}, node_sorted = {left = 0xffffffff, right = 0x0, parent = 0}, ck = 0, eod = false, namehash = 8192, 

  ckey = {hk = 1056768, fsal = 0x0, kv = {addr = 0x0, len = 134650068}}, flags = 0, 

  name = 0xa0 <Address 0xa0 out of bounds>, name_buffer = 0x52962d8 " "}

(gdb) print dirent.name

$3 = 0xa0 <Address 0xa0 out of bounds>

(gdb) print *0x52962d8

$4 = 32

(gdb) print dirent.name_buffer

$5 = 0x52962d8 " "

(gdb) print *dirent.name_buffer

$6 = 32 ' '

(gdb) print *chunk

$7 = {chunks = {next = 0x291a4e28, prev = 0x291a4e28}, dirents = {next = 0x2ed8c060, prev = 0x48ff2870}, 

  parent = 0x291a4ba0, chunk_lru = {q = {next = 0x7e1c00 <CHUNK_LRU+1792>, prev = 0x4d6f5c88}, qid = LRU_ENTRY_L1, 

    refcnt = 0, flags = 0, lane = 8, cf = 0}, reload_ck = 0, next_ck = 0, num_entries = 2500}