[sr-dev] [kamailio/kamailio] dead lock and 100% CPU ussage on shutdown (#2795)

sergey-safarov notifications at github.com
Fri Jul 2 11:15:07 CEST 2021


### Description

When Kamailio child core generated, then Kamailio goes to shutdown.
I observed a case when the Kamailio process shutdown process a deadlocked and not able to shutdown for a long time.

#### Debugging Data
```
(gdb) bt
#0  0x0000ffff8645b648 in sched_yield () from /lib64/libc.so.6
#1  0x0000000000527820 in timer_del_safe (tl=0xffff841969f8) at core/timer.c:718
#2  0x0000ffff80121630 in unlink_timers (t=0xffff84196738) at timer.h:363
#3  0x0000ffff80122488 in free_cell_helper (dead_cell=0xffff84196738, silent=1, fname=0xffff8022be70 "h_table.c", fline=466) at h_table.c:150
#4  0x0000ffff80124bac in free_hash_table () at h_table.c:466
#5  0x0000ffff801da5c0 in tm_shutdown () at t_funcs.c:88
#6  0x00000000005c8e08 in destroy_modules () at core/sr_module.c:830
#7  0x000000000041df3c in cleanup (show_status=1) at main.c:575
#8  0x000000000041ffd4 in shutdown_children (sig=15, show_status=1) at main.c:718
#9  0x0000000000423a90 in handle_sigs () at main.c:816
#10 0x0000000000433018 in main_loop () at main.c:1903
#11 0x000000000043de90 in main (argc=11, argv=0xfffffac1fb18) at main.c:3053
(gdb) f 1
#1  0x0000000000527820 in timer_del_safe (tl=0xffff841969f8) at core/timer.c:718
718					sched_yield(); /* wait for it to complete */
(gdb) list
713							tl->add_line, tl->del_calls, tl->del_func,
714							tl->del_file, tl->del_line, tl->init, tl->expires_no);
715	#endif
716						return -2; /* do nothing */
717					}
718					sched_yield(); /* wait for it to complete */
719					goto again;
720				}
721				if ((tl->next!=0)&&(tl->prev!=0)){
722					_timer_rm_list(tl); /* detach */
(gdb) bt full
#0  0x0000ffff8645b648 in sched_yield () from /lib64/libc.so.6
No symbol table info available.
#1  0x0000000000527820 in timer_del_safe (tl=0xffff841969f8) at core/timer.c:718
        ret = -1
        __func__ = "timer_del_safe"
#2  0x0000ffff80121630 in unlink_timers (t=0xffff84196738) at timer.h:363
        i = 0
#3  0x0000ffff80122488 in free_cell_helper (dead_cell=0xffff84196738, silent=1, fname=0xffff8022be70 "h_table.c", fline=466) at h_table.c:150
        b = 0x0
        i = 65535
        rpl = 0x0
        tt = 0x0
        foo = 0xffff80089b00 <__func__.11765>
        cbs = 0x0
        cbs_tmp = 0xffff83817f88
        __func__ = "free_cell_helper"
#4  0x0000ffff80124bac in free_hash_table () at h_table.c:466
        p_cell = 0xffff84196738
        tmp_cell = 0xffff80606630
        i = 33128
        __func__ = "free_hash_table"
#5  0x0000ffff801da5c0 in tm_shutdown () at t_funcs.c:88
        __func__ = "tm_shutdown"
#6  0x00000000005c8e08 in destroy_modules () at core/sr_module.c:830
        t = 0xffff84c44808
        foo = 0xffff84c44038
        __func__ = "destroy_modules"
#7  0x000000000041df3c in cleanup (show_status=1) at main.c:575
        memlog = 435649383
        __func__ = "cleanup"
#8  0x000000000041ffd4 in shutdown_children (sig=15, show_status=1) at main.c:718
        __func__ = "shutdown_children"
#9  0x0000000000423a90 in handle_sigs () at main.c:816
        chld = 0
        chld_status = 139
        any_chld_stopped = 1
        memlog = 4403228
        __func__ = "handle_sigs"
#10 0x0000000000433018 in main_loop () at main.c:1903
        i = 8
        pid = 175104
        si = 0x0
        si_desc = "udp receiver child=7 sock=[2600:1f18:578:5703::c6]:5080\000\220\352Ą\377\377\000\000@\365\301\372\377\377\000\000\360\004)\205\377\377\000\000`\365\301\372\377\377\000\000p\365\301\372\377\377\000\000p\365\301\372\377\377\000\000@\365\301\372\377\377\000\000\320\377\377\377\200\377\377\377\000l\377\362g{\367\031"
        nrprocs = 8
        woneinit = 1
        __func__ = "main_loop"
#11 0x000000000043de90 in main (argc=11, argv=0xfffffac1fb18) at main.c:3053
        cfg_stream = 0x177f02d0
        c = -1
        r = 0
        tmp = 0xfffffac1fee1 ""
        tmp_len = 65535
        port = 1
        proto = 0
        ahost = 0x0
        aport = 0
        options = 0x8ae128 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
        ret = -1
        seed = 4190370820
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 2
        n_lst = 0x0
        p = 0xffff863c0da8 <__libc_start_main+168> ""
        st = {st_dev = 25, st_ino = 18103, st_mode = 16832, st_nlink = 2, st_uid = 991, st_gid = 986, st_rdev = 0, __pad1 = 0, st_size = 60, st_blksize = 65536, __pad2 = 0, st_blocks = 0, st_atim = {tv_sec = 1624774599, tv_nsec = 29999981}, st_mtim = {tv_sec = 1625138047, tv_nsec = 607511242}, st_ctim = {tv_sec = 1625138047, tv_nsec = 607511242}, __glibc_reserved = {0, 0}}
        tbuf = "\000\000\000\000\000\000\000\000\000\000n\206\377\377\000\000\000\000\000\000\000\000\000\000\330\375q\206\377\377", '\000' <repeats 58 times>, "x\376q\206\377\377\000\000h\376q\206\377\377\000\000\b\376q\206\377\377\000\000(\376q\206\377\377\000\000\070\376q\206\377\377\000\000\250\376q\206\377\377\000\000\270\376q\206\377\377\000\000\310\376q\206\377\377\000\000H\376q\206\377\377\000\000X\376q\206\377\377", '\000' <repeats 18 times>, "\330\375q\206\377\377", '\000' <repeats 42 times>...
        option_index = 12
        long_options = {{name = 0x8b04e8 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x8ab520 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x8b04f0 "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x8b04f8 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x8b0500 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x8b0510 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x8b0520 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x8b0530 "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x8b0540 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x8b0550 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {name = 0x8b0560 "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x8b0568 "cfg-print", has_arg = 0, flag = 0x0, val = 1033}, {name = 0x8b0578 "atexit", has_arg = 1, flag = 0x0, val = 1034}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}}
        __func__ = "main"
(gdb) info locals
ret = -1
__func__ = "timer_del_safe"
```


### Additional Information

  * **Kamailio Version** - output of `kamailio -v`

```
[root at bcf-e-3 ~]# kamailio -v
version: kamailio 5.6.0-dev0 (aarch64/linux) f03900
flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT-NOSMP, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLOCKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED
ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
id: f03900 
compiled on 17:17:36 Jun 26 2021 with gcc 8.4.1
```
Used master branch with customization.

* **Operating System**:

```
[root at bcf-e-3 ~]# uname -a
Linux bcf-e-3.la.cal911.net 4.18.0-305.3.1.el8.aarch64 #1 SMP Tue Jun 1 16:22:50 UTC 2021 aarch64 aarch64 aarch64 GNU/Linux
[root at bcf-e-3 ~]# cat /etc/os-release 
NAME="CentOS Linux"
VERSION="8"
ID="centos"
ID_LIKE="rhel fedora"
VERSION_ID="8"
PLATFORM_ID="platform:el8"
PRETTY_NAME="CentOS Linux 8"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:centos:centos:8"
HOME_URL="https://centos.org/"
BUG_REPORT_URL="https://bugs.centos.org/"
CENTOS_MANTISBT_PROJECT="CentOS-8"
CENTOS_MANTISBT_PROJECT_VERSION="8"
```


-- 
You are receiving this because you are subscribed to this thread.
Reply to this email directly or view it on GitHub:
https://github.com/kamailio/kamailio/issues/2795
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.kamailio.org/pipermail/sr-dev/attachments/20210702/476760f1/attachment-0001.htm>


More information about the sr-dev mailing list