Memory leak nfs-ganesha
by akarchagin2012@gmail.com
Good morning everybody. Does anybody can help me in case on memory leak of nfs-ganesha?
After 4-7 hours of copy files from local disk to nfs share I catch OOM killer because of nfs-ganesha use all hosts memory.
I've changed all parameters in my ganesha config file in MDCACHE block - nothing is changed - memory leak is present. Please help!!!
My host
CPU - 8
RAM - 16 GB
My os:
cat /etc/os-release
NAME="CentOS Stream"
VERSION="8"
kernel
4.18.0-383.el8.x86_64
Version
nfs-ganesha-4.0-1.el8s.x86_64
my config is:
cat /etc/ganesha/ganesha.conf
NFS_CORE_PARAM {
# possible to mount with NFSv3 to NFSv4 Pseudo path
mount_path_pseudo = true;
# NFS protocol
Protocols = "3","4"; # NFS protocols supported
#Use a non-privileged port for RQuota
Rquota_Port = 875;
}
MDCACHE {
Entries_HWMark = 500;
Chunks_HWMark = 500;
Entries_Release_Size = 500;
FD_HWMark_Percent = 10;
FD_LWMark_Percent = 5;
FD_Limit_Percent = 2;
}
NFSv4 {
Lease_Lifetime = 30;
Grace_Period = 40;
}
EXPORT_DEFAULTS {
# default access mode
Access_Type = RW;
}
LOG {
Default_log_level = WARN;
COMPONENTS {
ALL = WARN;
}
FACILITY {
name = FILE;
destination = "/var/log/ganesha/ganesha.log";
max_level = WARN;
enable = active;
}
}
%include "/etc/ganesha/conf.d/ERPAS2_01.conf"
cat /etc/ganesha/conf.d/ERPAS2_01.conf
EXPORT{
Export_Id = 1001 ; # Unique identifier for each EXPORT (share)
Path = "/mnt/gluster/nfs/ERPAS2_01"; # Export path of our NFS share
FSAL {
name = GLUSTER; # Backing type is Gluster
hostname = "localhost"; # Hostname of Gluster server
volume = "ERPAS2_01"; # The name of our Gluster volume
}
Squash = No_root_squash; # Control NFS root squashing
Disable_ACL = FALSE; # Enable NFSv4 ACLs
Pseudo = "/ERPAS2_01"; # NFSv4 pseudo path for our NFS share
Transports = "UDP","TCP" ; # Transport protocols supported
SecType = "sys"; # NFS Security flavors supported
}
Error is:
[99062.180392] ip invoked oom-killer: gfp_mask=0x6200ca(GFP_HIGHUSER_MOVABLE), order=0, oom_score_adj=0
[99062.180399] CPU: 4 PID: 1104454 Comm: ip Not tainted 4.18.0-383.el8.x86_64 #1
[99062.180402] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 12/12/2018
[99062.180404] Call Trace:
[99062.180407] dump_stack+0x41/0x60
[99062.180414] dump_header+0x4a/0x1db
[99062.180420] oom_kill_process.cold.32+0xb/0x10
[99062.180424] out_of_memory+0x1bd/0x4e0
[99062.180427] __alloc_pages_slowpath+0xbdc/0xcc0
[99062.180434] __alloc_pages_nodemask+0x2db/0x310
[99062.180437] alloc_pages_vma+0x74/0x1d0
[99062.180440] do_fault+0x6f/0x3c0
[99062.180444] __handle_mm_fault+0x4a3/0x7e0
[99062.180447] handle_mm_fault+0xc1/0x1e0
[99062.180449] do_user_addr_fault+0x1b5/0x440
[99062.180454] do_page_fault+0x37/0x130
[99062.180457] ? page_fault+0x8/0x30
[99062.180461] page_fault+0x1e/0x30
[99062.180464] RIP: 0033:0x7f152c145f23
[99062.180466] Code: 0f 7f 44 17 f0 f3 0f 7f 07 c3 48 83 fa 40 77 16 f3 0f 7f 07 f3 0f 7f 47 10 f3 0f 7f 44 17 f0 f3 0f 7f 44 17 e0 c3 48 8d 4f 40 <f3> 0f 7f 07 48 83 e1 c0 f3 0f 7f 44 17 f0 f3 0f 7f 47 10 f3 0f 7f
[99062.180469] RSP: 002b:00007ffcc4cc6e98 EFLAGS: 00010206
[99062.180471] RAX: 00007f152b0e0160 RBX: 00007f152c3634f0 RCX: 00007f152b0e01a0
[99062.180472] RDX: 00000000000000c8 RSI: 0000000000000000 RDI: 00007f152b0e0160
[99062.180474] RBP: 00007ffcc4cc7190 R08: 00007f152b0e0160 R09: 0000000000082000
[99062.180475] R10: 0000000000000002 R11: 00007f152b0e0228 R12: 00007ffcc4cc6ed0
[99062.180476] R13: 00007ffcc4cc7278 R14: 00007ffcc4cc7230 R15: 000000000000fd00
[99062.180479] Mem-Info:
[99062.180480] active_anon:3490970 inactive_anon:388300 isolated_anon:0
active_file:294 inactive_file:499 isolated_file:0
unevictable:51586 dirty:2 writeback:0
slab_reclaimable:40628 slab_unreclaimable:42036
mapped:14000 shmem:8598 pagetables:13124 bounce:0
free:33881 free_pcp:314 free_cma:0
[99062.180484] Node 0 active_anon:13963880kB inactive_anon:1553200kB active_file:1176kB inactive_file:1996kB unevictable:206344kB isolated(anon):0kB isolated(file):0kB mapped:56000kB dirty:8kB writeback:0kB shmem:34392kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 438272kB writeback_tmp:0kB kernel_stack:11280kB pagetables:52496kB all_unreclaimable? no
[99062.180489] Node 0 DMA free:13296kB min:60kB low:72kB high:84kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15360kB mlocked:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
[99062.180493] lowmem_reserve[]: 0 2960 15964 15964 15964
[99062.180496] Node 0 DMA32 free:65668kB min:12520kB low:15648kB high:18776kB active_anon:2800952kB inactive_anon:122084kB active_file:612kB inactive_file:1352kB unevictable:3072kB writepending:0kB present:3129216kB managed:3063680kB mlocked:3072kB bounce:0kB free_pcp:1348kB local_pcp:436kB free_cma:0kB
[99062.180501] lowmem_reserve[]: 0 0 13003 13003 13003
[99062.180504] Node 0 Normal free:56560kB min:57044kB low:70792kB high:84540kB active_anon:11162928kB inactive_anon:1431116kB active_file:684kB inactive_file:0kB unevictable:203272kB writepending:0kB present:13631488kB managed:13323868kB mlocked:203272kB bounce:0kB free_pcp:60kB local_pcp:28kB free_cma:0kB
[99062.180525] lowmem_reserve[]: 0 0 0 0 0
[99062.180528] Node 0 DMA: 0*4kB 0*8kB 1*16kB (U) 1*32kB (U) 1*64kB (U) 1*128kB (U) 1*256kB (U) 1*512kB (U) 0*1024kB 2*2048kB (UM) 2*4096kB (M) = 13296kB
[99062.180540] Node 0 DMA32: 64*4kB (UMEH) 309*8kB (UMEH) 288*16kB (UMEH) 277*32kB (UMEH) 210*64kB (UMEH) 123*128kB (UMEH) 48*256kB (UME) 14*512kB (UMEH) 1*1024kB (H) 0*2048kB 0*4096kB = 65864kB
[99062.180554] Node 0 Normal: 1267*4kB (UME) 1715*8kB (UMEH) 1186*16kB (UMEH) 362*32kB (UMEH) 95*64kB (UMH) 2*128kB (H) 2*256kB (H) 2*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 57220kB
[99062.180566] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB
[99062.180567] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
[99062.180569] 22686 total pagecache pages
[99062.180570] 8450 pages in swap cache
[99062.180571] Swap cache stats: add 1539241, delete 1530735, find 450286/879391
[99062.180572] Free swap = 0kB
[99062.180573] Total swap = 1048572kB
[99062.180573] 4194174 pages RAM
[99062.180574] 0 pages HighMem/MovableOnly
[99062.180575] 93447 pages reserved
[99062.180575] 0 pages hwpoisoned
[99062.180576] Tasks state (memory values in pages):
[99062.180577] [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
[99062.180584] [ 1247] 0 1247 36819 583 278528 356 0 systemd-journal
[99062.180587] [ 1292] 0 1292 28913 583 229376 540 -1000 systemd-udevd
[99062.180591] [ 1562] 0 1562 34363 6702 212992 0 -1000 dmeventd
[99062.180595] [ 1663] 32 1663 16806 836 180224 191 0 rpcbind
[99062.180597] [ 1665] 0 1665 16996 61 135168 128 -1000 auditd
[99062.180599] [ 1687] 0 1687 31249 633 151552 128 0 irqbalance
[99062.180602] [ 1688] 0 1688 21554 586 192512 423 0 VGAuthService
[99062.180604] [ 1689] 0 1689 108649 190 327680 333 0 vmtoolsd
[99062.180606] [ 1693] 0 1693 48584 827 380928 438 0 sssd
[99062.180608] [ 1694] 81 1694 19260 645 167936 89 -900 dbus-daemon
[99062.180610] [ 1698] 0 1698 74837 4194 491520 3293 0 firewalld
[99062.180612] [ 1702] 995 1702 37792 643 188416 131 0 chronyd
[99062.180614] [ 1723] 0 1723 50567 437 405504 556 0 sssd_be
[99062.180616] [ 1724] 0 1724 81574 1582 532480 447 0 sssd_be
[99062.180617] [ 1731] 0 1731 48705 897 425984 263 0 sssd_nss
[99062.180619] [ 1732] 0 1732 43458 1038 376832 312 0 sssd_pam
[99062.180621] [ 1733] 0 1733 41696 779 368640 314 0 sssd_ssh
[99062.180623] [ 1745] 0 1745 22633 848 200704 164 0 systemd-logind
[99062.180625] [ 1792] 0 1792 97932 785 372736 435 0 NetworkManager
[99062.180626] [ 1810] 0 1810 59491 5083 471040 8707 0 puppet
[99062.180628] [ 1812] 0 1812 12849 331 139264 115 0 oddjobd
[99062.180630] [ 1813] 0 1813 10448 330 106496 106 0 rhsmcertd
[99062.180632] [ 1815] 0 1815 23096 810 196608 195 -1000 sshd
[99062.180634] [ 1819] 0 1819 42373 306 196608 449 0 gssproxy
[99062.180636] [ 1839] 0 1839 13953 168 126976 141 0 rpc.gssd
[99062.180638] [ 1844] 0 1844 9240 478 98304 159 0 crond
[99062.180640] [ 1849] 0 1849 3416 304 65536 30 0 agetty
[99062.180642] [ 1917] 998 1917 509185 431 389120 2164 0 polkitd
[99062.180644] [ 2075] 0 2075 167591 49448 622592 0 0 corosync
[99062.180646] [ 2077] 0 2077 500773 8644 765952 22760 0 pcsd
[99062.180650] [ 2079] 0 2079 182806 426 1015808 753 0 rsyslogd
[99062.180652] [ 2138] 0 2138 33747 1103 286720 250 0 pacemakerd
[99062.180653] [ 2139] 0 2139 28235 2551 241664 554 0 corosync-qdevic
[99062.180655] [ 2140] 0 2140 15745 285 163840 105 0 corosync-qdevic
[99062.180657] [ 2142] 189 2142 39236 1390 327680 2046 0 pacemaker-based
[99062.180659] [ 2143] 0 2143 38434 960 327680 983 0 pacemaker-fence
[99062.180661] [ 2144] 0 2144 29008 1099 241664 185 0 pacemaker-execd
[99062.180662] [ 2145] 189 2145 36482 971 311296 812 0 pacemaker-attrd
[99062.180664] [ 2146] 189 2146 33508 909 274432 524 0 pacemaker-sched
[99062.180666] [ 2147] 189 2147 42562 1608 352256 614 0 pacemaker-contr
[99062.180667] [ 2355] 0 2355 53990 3288 434176 2991 0 pcsd
[99062.180669] [ 2463] 994 2463 15215 307 114688 85 0 zabbix_agentd
[99062.180671] [ 2464] 994 2464 15215 346 114688 71 0 zabbix_agentd
[99062.180673] [ 2465] 994 2465 15215 5 114688 86 0 zabbix_agentd
[99062.180675] [ 2466] 994 2466 15215 5 114688 86 0 zabbix_agentd
[99062.180677] [ 2467] 994 2467 15215 5 114688 86 0 zabbix_agentd
[99062.180679] [ 2468] 994 2468 17719 226 118784 62 0 zabbix_agentd
[99062.180681] [ 2519] 0 2519 51925 2682 311296 3107 0 glusterd
[99062.180683] [ 2529] 0 2529 73517 2718 401408 2010 0 glusterfsd
[99062.180684] [ 2548] 0 2548 134829 54153 917504 12465 0 glusterfsd
[99062.180687] [ 2566] 0 2566 73516 3168 409600 2109 0 glusterfsd
[99062.180688] [ 2599] 0 2599 74030 2680 401408 1962 0 glusterfsd
[99062.180690] [ 2618] 0 2618 158724 2394 544768 8404 0 glusterfs
[99062.180692] [ 2907] 29 2907 17963 358 180224 6461 0 rpc.statd
[99062.180694] [ 140707] 0 140707 73516 2153 409600 2116 0 glusterfsd
[99062.180696] [ 259891] 994 259891 475267 4331 524288 1006 0 zabbix_agent2
[99062.180699] [ 565190] 0 565190 4665647 3754190 32448512 165004 0 ganesha.nfsd
[99062.180701] [ 998412] 1000 998412 22410 1222 208896 0 0 systemd
[99062.180703] [ 998416] 1000 998416 81160 188 327680 763 0 (sd-pam)
[99062.180705] [1007136] 0 1007136 44579 915 331776 115 0 sshd
[99062.180708] [1007138] 1000 1007138 44662 501 319488 122 0 sshd
[99062.180710] [1007139] 1000 1007139 6262 568 81920 265 0 bash
[99062.180711] [1007501] 1000 1007501 35836 796 274432 2 0 sudo
[99062.180714] [1007502] 0 1007502 6828 896 81920 0 0 bash
[99062.180716] [1084448] 28 1084448 124401 86 188416 37 0 nscd
[99062.180719] [1103885] 0 1103885 133802 14911 528384 1451 0 puppet
[99062.180721] [1104142] 0 1104142 9321 519 102400 0 0 vi
[99062.180722] [1104188] 994 1104188 35836 778 282624 0 0 sudo
[99062.180724] [1104189] 0 1104189 17150 2620 176128 0 0 python3
[99062.180726] [1104230] 0 1104230 35394 1898 229376 0 0 gluster
[99062.180727] [1104236] 0 1104236 58344 2528 352256 0 0 glfsheal
[99062.180729] [1104429] 0 1104429 49186 5027 393216 0 0 firewall-cmd
[99062.180731] [1104437] 0 1104437 6794 653 86016 0 0 IPaddr2
[99062.180733] [1104450] 0 1104450 6794 376 73728 0 0 IPaddr2
[99062.180734] [1104453] 0 1104453 6794 507 73728 0 0 IPaddr2
[99062.180737] [1104454] 0 1104454 5166 141 73728 0 0 ip
[99062.180738] [1104455] 0 1104455 266 1 45056 0 0 awk
[99062.180740] oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),cpuset=/,mems_allowed=0,global_oom,task_memcg=/system.slice/nfs-ganesha.service,task=ganesha.nfsd,pid=565190,uid=0
[99062.180949] Out of memory: Killed process 565190 (ganesha.nfsd) total-vm:18662588kB, anon-rss:15016760kB, file-rss:0kB, shmem-rss:0kB, UID:0 pgtables:31688kB oom_score_adj:0
[99063.227775] oom_reaper: reaped process 565190 (ganesha.nfsd), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
2 years, 5 months
Minimum memory requirements
by michael.mattsson@gmail.com
Hi,
I have a use case where we need to run multiple instances of Ganesha on Kubernetes. To better understand our performance envelope and boundaries to plan the deployment better, I need to understand the lowest amount of memory Ganesha requires in theory to operate sufficiently without being reaped by the oom killer. Setting the limit extremely low, like in the hundred of megabytes, almost instantly kills Ganesha when applying pressure.
I'd appreciate any pointers or configuration options to consider.
Regards
Michael
2 years, 5 months