### Description Kamailio service crashed occasionally on `sock_cb` callback function of `http_async_client` module
#### Reproduction The crash happens occasionally, no specific conditions have been identified for this issue.
#### Debugging Data ``` Program terminated with signal SIGSEGV, Segmentation fault.
(gdb) bt full #0 0x0000ffff9f4205e4 in event_del_ () from /lib64/libevent-2.1.so.6 No symbol table info available. #1 0x0000ffff9e7c9b24 in sock_cb (e=0xffffa2027ba8, s=9, what=4, cbp=0xffffa1c0fe58, sockp=0xffffa202d258) at http_multi.c:166 g = 0xffffa1c0fe58 cell = 0xffffa202d258 whatstr = {0xffff9e7eb508 "none", 0xffff9e7eb510 "IN", 0xffff9e7eb518 "OUT", 0xffff9e7eb520 "INOUT", 0xffff9e7eb528 "REMOVE"} __func__ = "sock_cb" #2 0x0000ffff9e73ffa8 in Curl_multi_closed () from /lib64/libcurl.so.4 No symbol table info available. #3 0x0000ffff9e73d43c in Curl_closesocket () from /lib64/libcurl.so.4 No symbol table info available. #4 0x0000ffff9e72c8f8 in conn_free.part () from /lib64/libcurl.so.4 No symbol table info available. #5 0x0000ffff9e72d544 in Curl_disconnect () from /lib64/libcurl.so.4 No symbol table info available. #6 0x0000ffff9e740948 in multi_done () from /lib64/libcurl.so.4 No symbol table info available. #7 0x0000ffff9e742b10 in curl_multi_remove_handle () from /lib64/libcurl.so.4 No symbol table info available. #8 0x0000ffff9e7c7f2c in event_cb (fd=59, kind=1, userp=0xffffa2027ba8) at http_multi.c:123 error = 0xffff9e7eac70 "TIMEOUT" g = 0xffffa1c0fe58 rc = 1968268110 easy = 0xffffa2027ba8 cell = 0xffffa202d258 __func__ = "event_cb" action = 0 #9 0x0000ffff9f423628 in event_process_active_single_queue () from /lib64/libevent-2.1.so.6 No symbol table info available. #10 0x0000ffff9f423ee8 in event_base_loop () from /lib64/libevent-2.1.so.6 No symbol table info available. #11 0x0000ffff9e7b7160 in async_http_run_worker (worker=0xffffa1606ed8) at async_http.c:95 ret = 65535 __func__ = "async_http_run_worker" #12 0x0000ffff9e7dc364 in child_init (rank=0) at http_async_client_mod.c:380 pid = 0 i = 0 __func__ = "child_init" #13 0x0000000000607fa8 in init_mod_child (m=0xffffb136c2f0, rank=0) at core/sr_module.c:899 --Type <RET> for more, q to quit, c to continue without paging-- ret = 0 __func__ = "init_mod_child" #14 0x0000000000607b38 in init_mod_child (m=0xffffb136d018, rank=0) at core/sr_module.c:892 ret = 0 __func__ = "init_mod_child" #15 0x0000000000607b38 in init_mod_child (m=0xffffb136ead0, rank=0) at core/sr_module.c:892 ret = 0 __func__ = "init_mod_child" #16 0x0000000000607b38 in init_mod_child (m=0xffffb13a98f8, rank=0) at core/sr_module.c:892 ret = 0 __func__ = "init_mod_child" #17 0x0000000000607b38 in init_mod_child (m=0xffffb140ab28, rank=0) at core/sr_module.c:892 ret = 0 __func__ = "init_mod_child" #18 0x0000000000607b38 in init_mod_child (m=0xffffb145ebf8, rank=0) at core/sr_module.c:892 ret = 1 __func__ = "init_mod_child" #19 0x00000000006089c4 in init_child (rank=0) at core/sr_module.c:953 ret = -783429656 type = 0x923dc0 "PROC_MAIN" __func__ = "init_child" #20 0x0000000000433030 in main_loop () at main.c:1843 i = 8 pid = 2823204 si = 0x0 si_desc = "udp receiver child=7 sock=[2605:84c0:48:205:1::6]:5080\000\000\220\036ٰ\377\377\000\000 \321M\321\377\377\000\000h\220\066\261\377\377\000\000\320\377\377\377\200\377\377\377\020\321M\321\377\377\000\000 \321M\321\377\377\000\000 \321M\321\377\377\000\000\360\320M\321\377\377\000\000\320\377\377\377\200\377\377\377" nrprocs = 8 woneinit = 1 __func__ = "main_loop" #21 0x000000000043f4e8 in main (argc=11, argv=0xffffd14dd6a8) at main.c:3086 cfg_stream = 0xc2502a0 c = -1 r = 0 tmp = 0xffffd14dfe34 "" tmp_len = 0 port = 0 proto = 65535 ahost = 0x0 --Type <RET> for more, q to quit, c to continue without paging-- aport = 0 options = 0x8e7c08 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 203142350 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 2 n_lst = 0xffffffff p = 0xffffb3d34348 <__libc_start_main+160> "" st = {st_dev = 25, st_ino = 18459, st_mode = 16877, st_nlink = 3, st_uid = 0, st_gid = 987, st_rdev = 0, __pad1 = 0, st_size = 80, st_blksize = 65536, __pad2 = 0, st_blocks = 0, st_atim = {tv_sec = 1707863213, tv_nsec = 9999979}, st_mtim = {tv_sec = 1729635590, tv_nsec = 702238079}, st_ctim = { tv_sec = 1729635598, tv_nsec = 32030431}, __glibc_reserved = {0, 0}} tbuf = '\000' <repeats 56 times>, "x\376\b\264\377\377\000\000h\376\b\264\377\377\000\000\b\376\b\264\377\377\000\000(\376\b\264\377\377\000\000\070\376\b\264\377\377\000\000\250\376\b\264\377\377\000\000\270\376\b\264\377\377\000\000\310\376\b\264\377\377\000\000H\376\b\264\377\377\000\000X\376\b\264\377\377", '\000' <repeats 18 times>, "\330\375\b\264\377\377", '\000' <repeats 42 times>... option_index = 12 long_options = {{name = 0x8e9fe0 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x8e4e48 "version", has_arg = 0, flag = 0x0, val = 118}, { name = 0x8e9fe8 "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x8e9ff0 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x8e9ff8 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x8ea008 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x8ea018 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x8ea028 "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x8ea038 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x8ea048 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {name = 0x8ea058 "debug", has_arg = 1, flag = 0x0, val = 1032}, { name = 0x8ea060 "cfg-print", has_arg = 0, flag = 0x0, val = 1033}, {name = 0x8ea070 "atexit", has_arg = 1, flag = 0x0, val = 1034}, { name = 0x8ea078 "all-errors", has_arg = 0, flag = 0x0, val = 1035}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __func__ = "main" (gdb) f 8 #8 0x0000ffff9e7c7f2c in event_cb (fd=59, kind=1, userp=0xffffa2027ba8) at http_multi.c:123 123 curl_multi_remove_handle(g->multi, easy); (gdb) f 1 #1 0x0000ffff9e7c9b24 in sock_cb (e=0xffffa2027ba8, s=9, what=4, cbp=0xffffa1c0fe58, sockp=0xffffa202d258) at http_multi.c:166 166 event_del(cell->ev); (gdb) p cell $1 = (struct http_m_cell *) 0xffffa202d258 (gdb) p cell->ev $2 = (struct event *) 0x92fbf0 (gdb) p *cell->ev $3 = {type = 1701998435, name = {s = 0x615f7273752f6572 <error: Cannot access memory at address 0x615f7273752f6572>, len = 1663987830}, params = {hooks = {contact = { expires = 0x7273752f65726f63, q = 0x632e7076615f, methods = 0x5b203e65726f633c, received = 0x7273752f65726f63, instance = 0x393a632e7076615f, reg_id = 0x203a5d32, ob = 0x6f6e20646c756f63, flags = 0x61636f6c6c612074}, uri = {transport = 0x7273752f65726f63, lr = 0x632e7076615f, r2 = 0x5b203e65726f633c, maddr = 0x7273752f65726f63, ttl = 0x393a632e7076615f, dstip = 0x203a5d32, dstport = 0x6f6e20646c756f63, ftag = 0x61636f6c6c612074, ob = 0x6572616873206574}, event_dialog = {call_id = 0x7273752f65726f63, from_tag = 0x632e7076615f, to_tag = 0x5b203e65726f633c, include_session_description = 0x7273752f65726f63, sla = 0x393a632e7076615f, ma = 0x203a5d32}}, list = 0x79726f6d656d2064}} ``` As it's seen in the stack backtrace the `cell` object is already released at the moment when `event_del` with this object is called.
#### Log Messages -
#### SIP Traffic -
### Possible Solutions -
### Additional Information ``` [centos@esrp-1b ~]$ kamailio -v version: kamailio 5.7.2 (aarch64/linux) 968dbe flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, MEM_JOIN_FREE, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT-NOSMP, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLOCKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB poll method support: poll, epoll_lt, epoll_et, sigio_rt, select. id: 968dbe compiled on 19:25:05 Nov 10 2023 with gcc 8.5.0 ```
* **Operating System**: ``` [centos@esrp-1b ~]$ cat /etc/centos-release CentOS Stream release 8 [centos@esrp-1b ~]$ uname -a Linux esrp-1b.fl.base911.com 4.18.0-536.el8.aarch64 #1 SMP Thu Jan 18 15:21:54 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux ```
Are you on OpenSSL 3.x library on this distribution and are using https? Then you should upgrade at least to the latest bugfix release of 5.7 branch, e.g. 5.7.6. There have been important bugfixes for this module. Even if this is not the case, an upgrade is probably a good idea.
The same crash happens on kamailio 5.8.2 ``` [centos@esrp-0a ~]$ cat /etc/centos-release CentOS Stream release 8
[centos@esrp-0a ~]$ uname -a Linux esrp-0a.fl.nga911.com 4.18.0-536.el8.aarch64 #1 SMP Thu Jan 18 15:21:54 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux
[centos@esrp-0a coredump]# sudo rpm -qa | grep openssl openssl-devel-1.1.1k-12.el8.aarch64 openssl-libs-1.1.1k-12.el8.aarch64 openssl-1.1.1k-12.el8.aarch64
[centos@esrp-0a ~]$ kamailio -v version: kamailio 5.8.2 (aarch64/linux) f0f03c-dirty flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, MEM_JOIN_FREE, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT-NOSMP, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLOCKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_SEND_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB poll method support: poll, epoll_lt, epoll_et, sigio_rt, select. id: f0f03c -dirty compiled on 18:18:25 Jul 17 2024 with gcc 8.5.0 ```
This is stack backtrace for this crash on kamailio 5.8.2, it looks the same ``` (gdb) bt full #0 0x0000ffff9e7f05d8 in event_del_ () from /lib64/libevent-2.1.so.6 No symbol table info available. #1 0x0000ffff9db99c14 in sock_cb (e=0xffffa11cdb80, s=9, what=4, cbp=0xffffa106c4b0, sockp=0xffffa11fb400) at http_multi.c:170 g = 0xffffa106c4b0 cell = 0xffffa11fb400 whatstr = {0xffff9dbbb718 "none", 0xffff9dbbb720 "IN", 0xffff9dbbb728 "OUT", 0xffff9dbbb730 "INOUT", 0xffff9dbbb738 "REMOVE"} __func__ = "sock_cb" #2 0x0000ffff9db0ffa8 in Curl_multi_closed () from /lib64/libcurl.so.4 No symbol table info available. #3 0x0000ffff9db0d43c in Curl_closesocket () from /lib64/libcurl.so.4 No symbol table info available. #4 0x0000ffff9dafc8f8 in conn_free.part () from /lib64/libcurl.so.4 No symbol table info available. #5 0x0000ffff9dafd544 in Curl_disconnect () from /lib64/libcurl.so.4 No symbol table info available. #6 0x0000ffff9db10948 in multi_done () from /lib64/libcurl.so.4 No symbol table info available. #7 0x0000ffff9db12b10 in curl_multi_remove_handle () from /lib64/libcurl.so.4 No symbol table info available. #8 0x0000ffff9db9801c in event_cb (fd=59, kind=1, userp=0xffffa11cdb80) at http_multi.c:124 error = 0xffff9dbbae80 "TIMEOUT" g = 0xffffa106c4b0 rc = 2040172201 easy = 0xffffa11cdb80 cell = 0xffffa11fb400 __func__ = "event_cb" action = 0 #9 0x0000ffff9e7f3628 in event_process_active_single_queue () from /lib64/libevent-2.1.so.6 No symbol table info available. #10 0x0000ffff9e7f3ee8 in event_base_loop () from /lib64/libevent-2.1.so.6 No symbol table info available. #11 0x0000ffff9db87250 in async_http_run_worker (worker=0xffffa09e0f40) at async_http.c:95 ret = 65535 __func__ = "async_http_run_worker" #12 0x0000ffff9dbac578 in child_init (rank=0) at http_async_client_mod.c:388 pid = 0 i = 0 __func__ = "child_init" #13 0x00000000005fd458 in init_mod_child (m=0xffffb086bd80, rank=0) at core/sr_module.c:920 ret = 0 __func__ = "init_mod_child" #14 0x00000000005fcfe8 in init_mod_child (m=0xffffb086cba0, rank=0) at core/sr_module.c:912 ret = 0 __func__ = "init_mod_child" #15 0x00000000005fcfe8 in init_mod_child (m=0xffffb086e740, rank=0) at core/sr_module.c:912 ret = 0 __func__ = "init_mod_child" #16 0x00000000005fcfe8 in init_mod_child (m=0xffffb08aaf90, rank=0) at core/sr_module.c:912 ret = 0 __func__ = "init_mod_child" #17 0x00000000005fcfe8 in init_mod_child (m=0xffffb090dbe0, rank=0) at core/sr_module.c:912 ret = 0 __func__ = "init_mod_child" #18 0x00000000005fcfe8 in init_mod_child (m=0xffffb09634e0, rank=0) at core/sr_module.c:912 ret = 1 __func__ = "init_mod_child" #19 0x00000000005fde74 in init_child (rank=0) at core/sr_module.c:999 ret = -762743888 type = 0x939e78 "PROC_MAIN" __func__ = "init_child" #20 0x00000000004339e0 in main_loop () at main.c:1942 i = 8 pid = 3440811 si = 0x0 si_desc = "udp receiver child=7 sock=[2605:84c0:4a:204:1::6]:5080\000\000\270\322&\260\377\377\000\000\360t\211\322\377\377\000\000\020\211\206\260\377\377\000\000\300t\211\322\377\377\000\000\320\377\377\377\200\377\377\377\340t\211\322\377\377\000\000\360t\211\322\377\377\000\000\360t\211\322\377\377\000\000\300t\211\322\377\377\000" nrprocs = 8 woneinit = 1 __func__ = "main_loop" #21 0x0000000000440020 in main (argc=11, argv=0xffffd2897a88) at main.c:3256 cfg_stream = 0x2c8702a0 c = -1 r = 0 tmp = 0xffffd289fe34 "" tmp_len = 65535 port = 5060 proto = 0 aproto = 0 ahost = 0x0 aport = 0 options = 0x8fd0b8 ":f:cm:M:dVIhEeb:B:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 1933999959 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 2 n_lst = 0xffffb3276390 <__pthread_initialize_minimal+688> p = 0xffffb3114348 <__libc_start_main+160> "" st = {st_dev = 25, st_ino = 18453, st_mode = 16877, st_nlink = 3, st_uid = 0, st_gid = 987, st_rdev = 0, __pad1 = 0, st_size = 80, st_blksize = 65536, __pad2 = 0, st_blocks = 0, st_atim = {tv_sec = 1708716961, tv_nsec = 749999981}, st_mtim = {tv_sec = 1730276508, tv_nsec = 825303614}, st_ctim = {tv_sec = 1730276531, tv_nsec = 494692804}, __glibc_reserved = {0, 0}} l1 = 256 tbuf = "\330\375F\263\377\377", '\000' <repeats 58 times>, "x\376F\263\377\377\000\000h\376F\263\377\377\000\000\b\376F\263\377\377\000\000(\376F\263\377\377\000\000\070\376F\263\377\377\000\000\250\376F\263\377\377\000\000\270\376F\263\377\377\000\000\310\376F\263\377\377\000\000H\376F\263\377\377\000\000X\376F\263\377\377", '\000' <repeats 18 times>, "\330\375F\263\377\377", '\000' <repeats 42 times>... option_index = 12 long_options = {{name = 0x8ff528 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x8fa228 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x8ff530 "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x8ff538 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x8ff540 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x8ff550 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x8ff560 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x8ff570 "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x8ff580 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x8ff590 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {name = 0x8ff5a0 "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x8ff5a8 "cfg-print", has_arg = 0, flag = 0x0, val = 1033}, {name = 0x8ff5b8 "atexit", has_arg = 1, flag = 0x0, val = 1034}, {name = 0x8ff5c0 "all-errors", has_arg = 0, flag = 0x0, val = 1035}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __func__ = "main" ```
Just for completeness, in order for the tls fixes from the update to work its also necessary to set tls_threads_mode = 2. But as you suggested in the linked PR, its probably another issue.
This issue is stale because it has been open 6 weeks with no activity. Remove stale label or comment or this will be closed in 2 weeks.
Closed #4019 as completed.
Fixed by https://github.com/kamailio/kamailio/pull/4020.