Hi all,
We have seen multiple crashes (with and without ASAN) Posting the stack
traces for reference...
Crash 1:
(gdb) bt
#0 0x00007f6d41f3523b in raise () from /lib64/libpthread.so.0
#1 0x000000000047a9f0 in crash_handler (signo=6, info=0x7f6d1db0a030,
ctx=0x7f6d1db09f00)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/MainNFSD/nfs_init.c:244
#2 <signal handler called>
#3 0x00007f6d417791d7 in raise () from /lib64/libc.so.6
#4 0x00007f6d4177a8c8 in abort () from /lib64/libc.so.6
#5 0x00007f6d43cb8e6e in __sanitizer::Abort() () from /lib64/libasan.so.5
#6 0x00007f6d43cc1558 in __sanitizer::Die() () from /lib64/libasan.so.5
#7 0x00007f6d43ca4b15 in __asan::ReportGenericError(unsigned long,
unsigned long, unsigned long, unsigned long, bool, unsigned long, unsigned
int, bool) () from /lib64/libasan.so.5
#8 0x00007f6d43ca5567 in __asan_report_load8 () from /lib64/libasan.so.5
#9 0x00000000005c63ca in process_blocked_lock_upcall
(block_data=0x60c0007feac0)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/SAL/state_lock.c:1809
#10 0x00000000005ba962 in state_blocked_lock_caller (ctx=0x616000a6fe80)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/SAL/state_async.c:81
#11 0x0000000000688479 in fridgethr_start_routine (arg=0x616000a6fe80)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/support/fridgethr.c:554
#12 0x00007f6d41f2ddc5 in start_thread () from /lib64/libpthread.so.0
#13 0x00007f6d4183b73d in clone () from /lib64/libc.so.6
(gdb) f 9
#9 0x00000000005c63ca in process_blocked_lock_upcall
(block_data=0x60c0007feac0)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/SAL/state_lock.c:1809
1809 PTHREAD_RWLOCK_unlock(&lock_entry->sle_obj->state_hdl->
state_lock);
(gdb) print *block_data
$1 = {sbd_list = {next = 0x20000011, prev = 0x0}, sbd_grant_type =
STATE_GRANT_POLL,
sbd_block_type = STATE_BLOCK_POLL, sbd_granted_callback = 0x57afd2
<nlm_granted_callback>,
sbd_blocked_cookie = 0x0, sbd_lock_entry = 0x6110007a1240, sbd_prot =
{sbd_nlm = {
sbd_nlm_fh = {n_len = 56, n_bytes = 0x60c0007feb00 "C"},
sbd_nlm_fh_buf = "C\000\000\004\060\060\000\n\000\002\000(\000\300
\250z\254^%j\365\002\000\000\002\000\000\000\000\251\344\000\000\000\000
\000\000\000\000\000\000=k@y\273\344", '\000' <repeats 16 times>},
sbd_v4 =
0x38}}
(gdb) print *(state_lock_entry_t *)0x6110007a1240
$2 = {sle_list = {next = 0x7400001a, prev = 0x0}, sle_owner_locks = {next =
0x0, prev = 0x0},
sle_client_locks = {next = 0x0, prev = 0x0}, sle_state_locks = {next =
0x0, prev = 0x0},
sle_export_locks = {next = 0x0, prev = 0x0}, sle_export = 0x615000000348,
sle_obj = 0x6190013ce5b8, sle_block_data = 0x60c0007feac0, sle_owner =
0x0,
sle_state = 0x61200159cc40, sle_blocked = STATE_CANCELED, sle_ref_count =
0, sle_lock = { <<< cancelled
lock_sle_type = FSAL_POSIX_LOCK, lock_type = FSAL_LOCK_W, lock_start =
527338800,
lock_length = 517175, lock_reclaim = false}, sle_mutex = {__data =
{__lock = 0, __count = 0,
__owner = 0, __nusers = 0, __kind = -1, __spins = 0, __elision = 0,
__list = {
__prev = 0x0, __next = 0x0}},
__size = '\000' <repeats 16 times>, "\377\377\377\377",
'\000' <repeats
19 times>,
__align = 0}}
(gdb)
The problem is seen when doing unlock using lock_entry and ASAN reports
heap-use-after-free.
try_to_grant_lock() finds cancelled block entry and invokes
remove_from_locklist() which calls lock_entry_dec_ref() causing free.
************************************************************************************************************************************************************************************
Crash 2:
(gdb) bt
#0 0x00007f212862923b in raise () from /lib64/libpthread.so.0
#1 0x000000000047a9f0 in crash_handler (signo=6, info=0x7f210bf6d070,
ctx=0x7f210bf6cf40)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/MainNFSD/nfs_init.c:244
#2 <signal handler called>
#3 0x00007f2127e6d1d7 in raise () from /lib64/libc.so.6
#4 0x00007f2127e6e8c8 in abort () from /lib64/libc.so.6
#5 0x00007f212a3ace6e in __sanitizer::Abort() () from /lib64/libasan.so.5
#6 0x00007f212a3b5558 in __sanitizer::Die() () from /lib64/libasan.so.5
#7 0x00007f212a398b15 in __asan::ReportGenericError(unsigned long,
unsigned long, unsigned long, unsigned long, bool, unsigned long, unsigned
int, bool) () from /lib64/libasan.so.5
#8 0x00007f212a399567 in __asan_report_load8 () from /lib64/libasan.so.5
#9 0x00000000005ba881 in state_blocked_lock_caller (ctx=0x616000530480)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/SAL/state_async.c:72
#10 0x0000000000688479 in fridgethr_start_routine (arg=0x616000530480)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/support/fridgethr.c:554
#11 0x00007f2128621dc5 in start_thread () from /lib64/libpthread.so.0
#12 0x00007f2127f2f73d in clone () from /lib64/libc.so.6
(gdb)
(gdb) f 9
#9 0x00000000005ba881 in state_blocked_lock_caller (ctx=0x616000530480)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm056.02_asan/SAL/state_async.c:72
72 export = block->sbd_lock_entry->sle_export;
(gdb) print *block
$1 = {sbd_list = {next = 0x72800009, prev = 0x613000a8e128}, sbd_grant_type
= STATE_GRANT_NONE,
sbd_block_type = STATE_BLOCK_NONE, sbd_granted_callback = 0x0,
sbd_blocked_cookie = 0x0,
sbd_lock_entry = 0x0, sbd_prot = {sbd_nlm = {sbd_nlm_fh = {n_len = 0,
n_bytes = 0x0}, <<<<<<lock_entry is freed, we did
state_add_grant_cookie just before
sbd_nlm_fh_buf = '\000' <repeats 16 times>,
"\002\000\000\000\000\000
\000\000\000\341a\004Pb\000\000\000\341a\004Pb\000\000\334\341a\004Pb\000
\000\000\001b\004Pb\000\000\000\000\000\000\000\000\000"}, sbd_v4 = 0x0}}
(gdb)
***********************************************************************************************************************************************************************************
Crash 3:
(gdb) bt
#0 0x00007fd4324824fb in raise (sig=11)
at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:36
#1 0x0000000000443ae5 in crash_handler (signo=11, info=0x7fd00b8049f0,
ctx=0x7fd00b8048c0)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/MainNFSD/nfs_init.c:244
#2 <signal handler called>
#3 0x00000000004a3bf1 in nlm_granted_callback (obj=0x7fd36c002ba8,
lock_entry=0x7fd304102d80)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/Protocols/NLM/nlm_util.c:609
#4 0x00000000004c217a in try_to_grant_lock (lock_entry=0x7fd304102d80)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/SAL/state_lock.c:1758
#5 0x00000000004c252e in process_blocked_lock_upcall
(block_data=0x7fd304102320)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/SAL/state_lock.c:1807
#6 0x00000000004bd43f in state_blocked_lock_caller (ctx=0x7fd0400028d0)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/SAL/state_async.c:81
#7 0x0000000000514b92 in fridgethr_start_routine (arg=0x7fd0400028d0)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/support/fridgethr.c:554
#8 0x00007fd43247aea5 in start_thread (arg=0x7fd00b806700) at
pthread_create.c:307
#9 0x00007fd431d818cd in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) f 3
#3 0x00000000004a3bf1 in nlm_granted_callback (obj=0x7fd36c002ba8,
lock_entry=0x7fd304102d80)
at /usr/src/debug/nfs-ganesha-2.7.5-ibm055.00/Protocols/NLM/nlm_util.c:609
609 state_nlm_client_t *nlm_grant_client = nlm_grant_owner->
so_client;
The nlm_grant_owner comes from lock entry's sle_owner which is NULL.
Thanks and regards,
Trishali.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Trishali Nayar
IBM Systems
ETZ, Pune.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~