Hi,
we have crashes on some of our kamailio hosts:
``` /usr/sbin/kamailio[1833]: CRITICAL: <core> [core/pass_fd.c:277]: receive_fd(): EOF on 9 /usr/sbin/kamailio[1820]: ALERT: <core> [main.c:738]: handle_sigs(): child process 1831 exited by a signal 6 /usr/sbin/kamailio[1820]: ALERT: <core> [main.c:741]: handle_sigs(): core was not generated /usr/sbin/kamailio[1820]: INFO: <core> [main.c:764]: handle_sigs(): terminating due to SIGCHLD ```
## Problem
Kamailio segfaults when processing tcp replies.
## Config file and use case
We're running a somewhat complex Kemi/python configuration. It routes internal udp traffic from Asterisk to PSTN providers with various protocols (udp/tcp/tls).
I was able to isolate the problem on a test rig to a simple test setup. It happens if kamailio uses a outgoing tcp socket, udp is not affected. The segfault occurs always after approx. 12-13 register/auth/ok cycles (8 packets, each). No other messages or calls are involved at all. I can reproduce the case now with a short register timeout within 4-5 minutes. Registers are not directly processed in kamailio (no usrloc at al).
A downgrade to 5.1.6 shows a similar problem.
## Kamailio version / os
Prebuilt 5.2.1 from deb.kamailio.org in a Ubuntu 16.04 lxc container.
kamailio -V ``` version: kamailio 5.2.1 (x86_64/linux)
flags: STATS: Off, USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MEM, SHM_MMAP, PKG_MALLOC, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLACKLIST, HAVE_RESOLV_RES ADAPTIVE_WAIT_LOOPS=1024, MAX_RECV_BUFFER_SIZE 262144 MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB poll method support: poll, epoll_lt, epoll_et, sigio_rt, select. id: unknown compiled with gcc 5.3.1 ```
## gdb debug info
Core dumps where analyzed with the source and dbg packages from deb.kamailio.org.
(gdb) list ``` 251 clear_branches(); 252 253 if(unlikely(ksr_route_locks_set!=NULL && msg->callid && msg->callid->body.s 254 && msg->callid->body.len >0)) { 255 cidlockidx = get_hash1_raw(msg->callid->body.s, msg->callid->body.len); 256 cidlockidx = cidlockidx % ksr_route_locks_set->size; 257 cidlockset = 1; 258 } 259 260 if(msg->first_line.type == SIP_REQUEST) { ```
(gdb) bt full
``` #0 0x0000000000596b86 in receive_msg ( buf=0x2969590 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.253.100; branch=z9hG4"..., len=573, rcv_info=0x7f256b187e08) at core/receive.c:256 msg = 0x7f2577648d90 ctx = {rec_lev = -878175776, run_flags = 32767, last_retcode = 6442811, jmp_env = {{__jmpbuf = {8108147, 134217728, 8388608, 6, 140736610180220, 139798687284720, 1, 70364449210377}, __mask_was_saved = 878175761, __saved_mask = {__val = {139798687284848, 140736610179776, 139798687284720, 42949672963, 139798687286050, 8388608, 139798687286053, 140736610179792, 6445147, 0, 0, 255, 0, 140736610180220, 139798687284720, 2314885530818453536}}}}} bctx = 0x0 ret = 1 stats_on = 0 tvb = {tv_sec = 140736610180220, tv_usec = 139798687285480} tve = {tv_sec = 139798687284720, tv_usec = 38654722047} tz = {tz_minuteswest = 2014554944, tz_dsttime = 1} diff = 0 inb = { s = 0x2969590 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.25 3.100;branch=z9hG4"..., len = 573} netinfo = {data = {s = 0x7bb873 "INFO", len = 134217728}, rcv = 0x800000, dst = 0x6} keng = 0x0 evp = {data = 0x7fffcba81900, rcv = 0x7f256b187e08, dst = 0x0} cidlockidx = 2555451508 cidlockset = 0 errsipmsg = 0 __func__ = "receive_msg" #1 0x000000000062cb6c in receive_tcp_msg ( tcpbuf=0x7f256b1880e8 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72. 253.100;branch=z9hG4"..., len=573, rcv_info=0x7f256b187e08, con=0x7f256b187df0) at core/tcp_read.c:1448 buf = 0x2969590 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.25 3.100;branch=z9hG4"... bsize = 65535 blen = 65535 __func__ = "receive_tcp_msg" #2 0x000000000062ec28 in tcp_read_req (con=0x7f256b187df0, bytes_read=0x7fffcba81c78, read_flags=0x7fffcba81c7c) at core/tcp_read.c:1631 bytes = 573 total_bytes = 573 resp = 1 size = 24 req = 0x7f256b187e70 dst = {send_sock = 0x0, to = {s = {sa_family = 61216, sa_data = "\255\000\000\000\000\000\270\034\250\313\377\177\000"}, sin = {sin_family = 61216, sin_port = 173, sin_addr = {s_addr = 0}, sin_zero = "\270\034\250\313\377\177\000"}, sin6 = {sin6_family = 61216, sin6_port = 173, sin6_flowinfo = 0, sin6_addr = {__in6_u = { __u6_addr8 = "\270\034\250\313\377\177\000\000\b\000\000\000\000\000\000", __u6_addr16 = {7352, 52136, 32767, 0, 8, 0, 0, 0}, __u6_addr32 = {3416792248, 32767, 8, 0}}}, sin6_scope_id = 20}}, id = 0, proto = 1 '\001', send_flags = {f = 0, blst_imask = 1}} c = 57 '9' ret = -878175312 __func__ = "tcp_read_req" #3 0x00000000006318c1 in handle_io (fm=0x7f2577636468, events=1, idx=-1) at core/tcp_read.c:1804 ret = 8 n = 8 read_flags = 1 con = 0x7f256b187df0 s = 9 resp = 140736610180352 t = 32549 __func__ = "handle_io" #4 0x000000000062017f in io_wait_loop_epoll (h=0xadef20 <io_w>, t=2, repeat=0) at core/io_wait.h:1065 n = 1 r = 0 fm = 0x7f2577636468 revents = 1 __func__ = "io_wait_loop_epoll" #5 0x000000000063375a in tcp_receive_loop (unix_sock=25) at core/tcp_read.c:1974 __func__ = "tcp_receive_loop" #6 0x000000000051a6ed in tcp_init_children () at core/tcp_main.c:4853 r = 0 i = 1 reader_fd_1 = 25 pid = 0 si_desc = "tcp receiver (generic)\000\000\r\216U\000\000\000\000\000\bqbw%\177\000\000\320x`w\001", '\000' <repeats 11 times>, "M\307]\000\000\000\000\000Q\243|\000\000\000\000\000\250\271aw%\177\000\000`Fx\000\000\000\000\000\000\000\000\000N\031\000\000s\270{\000\000\000\000\000\320_\347w%\177\000\000s\270{\000\000\000\000\000\000\000\000\b\000\000\000" si = 0x0 __func__ = "tcp_init_children" #7 0x000000000042619b in main_loop () at main.c:1735 i = 2 pid = 6477 si = 0x0 si_desc = "udp receiver child=1 sock=81.27.163.30:5060 (81.27.163.30:5060)\000\004\000\000\000\000\000\000\000\000\260\354j%\177\000\000\000\000\000\000\000\000\000\000\250\347\027k%\177\000\000p \250\313\377\177\000\000\034,M\000\000\000\000\000", '\377' <repeats 12 times>, "\n\000\000" nrprocs = 2 woneinit = 1 __func__ = "main_loop" #8 0x000000000042c9a5 in main (argc=15, argv=0x7fffcba82318) at main.c:2675 cfg_stream = 0x27eb010 c = -1 r = 0 tmp = 0x7fffcba83eb8 "" tmp_len = 1 port = 0 proto = 2021155176 options = 0x76c218 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 856022880 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x7f2578786718 p = 0xffffffff <error: Cannot access memory at address 0xffffffff> st = {st_dev = 93, st_ino = 98, st_nlink = 2, st_mode = 16832, st_uid = 378, st_gid = 378, __pad0 = 0, st_rdev = 0, st_size = 60, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1549008770, tv_nsec = 369207448}, st_mtim = {tv_sec = 1549009064, tv_nsec = 813217177}, st_ctim = {tv_sec = 1549009064, tv_nsec = 813217177}, __glibc_reserved = {0, 0, 0}} __func__ = "main" ```
Let me know if you need more info, i can fully reproduce the problem.
Cheers,
Thomas