Hi All,
Recently a customer reported below crash in avltree_remove function:
(gdb) where
#0 0x000000001017e590 in get_balance (node=0x0) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/avl/avl.c:84
#1 0x000000001017f8f0 in avltree_remove (node=0x3ffb2073fea0,
tree=0x3ff1907b5210) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/avl/avl.c:552
#2 0x0000000010167c2c in cache_inode_release_dirents
(entry=0x3ff1907b5070, which=CACHE_INODE_AVL_NAMES)
at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_misc.c:804
#3 0x0000000010167bd0 in cache_inode_release_dirents
(entry=0x3ff1907b5070, which=CACHE_INODE_AVL_BOTH)
at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_misc.c:785
#4 0x00000000101716d4 in cache_inode_lru_clean (entry=0x3ff1907b5070) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_lru.c:417
#5 0x0000000010175984 in cache_inode_lru_get (entry=0x3fff4398cf18) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_lru.c:1207
#6 0x0000000010165754 in cache_inode_new_entry (new_obj=0x3ffbc8d478d0,
flags=0, entry=0x3fff4398d0f0)
at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_misc.c:281
#7 0x0000000010161818 in cache_inode_get (fsdata=0x3fff4398d0f8,
entry=0x3fff4398d0f0) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_get.c:256
#8 0x000000001019a5b8 in nfs3_FhandleToCache (fh3=0x3ff9cc000e70,
status=0x3ff1d42ea310, rc=0x3fff4398d1d0)
at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/support/nfs_filehandle_mgmt.c:163
#9 0x000000001007f1e4 in nfs3_getattr (arg=0x3ff9cc000e70,
req=0x3ff9cc000cb0, res=0x3ff1d42ea310) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/Protocols/NFS/nfs3_getattr.c:81
#10 0x000000001005f748 in nfs_rpc_execute (reqdata=0x3ff9cc000c80) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/MainNFSD/nfs_worker_thread.c:1288
#11 0x0000000010060498 in worker_run (ctx=0x1000e5a6b40) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/MainNFSD/nfs_worker_thread.c:1550
#12 0x00000000101ae5ec in fridgethr_start_routine (arg=0x1000e5a6b40) at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/support/fridgethr.c:561
#13 0x00003fff9bd7c2bc in .start_thread () from /lib64/libpthread.so.0
#14 0x00003fff9bb9b304 in .__clone () from /lib64/libc.so.6
(gdb) frame 2
#2 0x0000000010167c2c in cache_inode_release_dirents
(entry=0x3ff1907b5070, which=CACHE_INODE_AVL_NAMES)
at
/usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/cache_inode/cache_inode_misc.c:804
804 avltree_remove(dirent_node, tree);
(gdb) p *dirent
$1 = {node_hk = {left = 0x0, right = 0x0, parent = 70347813813922}, hk = {k
= 6039456505800813999, p = 0}, ckey = {
hk = 16814622367593888045, fsal = 0x3fff996b25f8 <GPFS>, kv = {addr =
0x3ffd3aaecce0, len = 40}}, flags = 0,
name = 0x3ffd39a9da8c "J2"}
(gdb) p *tree
$2 = {root = 0x3ffb2073fea0, cmp_fn = @0x10288238: 0x1016e834
<avl_dirent_hk_cmpf>, height = 1, first = 0x3ffb2073fea0,
last = 0x3ff70fab2120, size = 2}
(gdb) p *dirent_node
$3 = {left = 0x0, right = 0x0, parent = 70347813813922}
(gdb) frame 1
#1 0x000000001017f8f0 in avltree_remove (node=0x3ffb2073fea0,
tree=0x3ff1907b5210)
at /usr/src/debug/nfs-ganesha-2.3.2-ibm59-0.1.1-Source/avl/avl.c:552
552 switch (get_balance(right)) {
(gdb) p *left
Cannot access memory at address 0x0
(gdb) p *right
Cannot access memory at address 0x0
(gdb) p *next
Cannot access memory at address 0x0
(gdb) p *tree
$4 = {root = 0x3ffb2073fea0, cmp_fn = @0x10288238: 0x1016e834
<avl_dirent_hk_cmpf>, height = 1, first = 0x3ffb2073fea0,
last = 0x3ff70fab2120, size = 2}
(gdb) p *parent
$5 = {left = 0x3ffb2073fea0, right = 0x0, parent = 70319593422913}
(gdb) p tree->root
$6 = (struct avltree_node *) 0x3ffb2073fea0
(gdb) p *tree->root
$7 = {left = 0x0, right = 0x0, parent = 70330352345380}
(gdb) p balance
$8 = 2
(gdb) p *node
$9 = {left = 0x0, right = 0x0, parent = 70330352345380}
When I checked the relevant code. I doubt on below code block:
if (is_left) {
is_left = parent && parent->left == node;
balance = inc_balance(node);
if (balance == 0) /* case 1 */
continue;
if (balance == 1) /* case 2 */
return;
*right = node->right; /* case 3 */*
switch (get_balance(right)) {
case 0: /* case 3.1 */
....
....
}
The block is for left side of the node/subtree (is_left is true), and we
are actually trying to refer right side of the node/tree (which may be
NULL, actually crash is due to NULL in the next line)
Does it mean that the block should be like :
* if (! is_left) {*
is_left = parent && parent->left == node;
balance = inc_balance(node);
if (balance == 0) /* case 1 */
continue;
if (balance == 1) /* case 2 */
return;
right = node->right; /* case 3 */
switch (get_balance(right)) {
case 0: /* case 3.1 */
....
....
}
Let me know if my understanding is incorrect.
--
with regards,
Sachin Punadikar