Hi,

we have crashes on some of our kamailio hosts:

/usr/sbin/kamailio[1833]: CRITICAL: <core> [core/pass_fd.c:277]: receive_fd(): EOF on 9
/usr/sbin/kamailio[1820]: ALERT: <core> [main.c:738]: handle_sigs(): child process 1831 exited by a signal 6
/usr/sbin/kamailio[1820]: ALERT: <core> [main.c:741]: handle_sigs(): core was not generated
/usr/sbin/kamailio[1820]: INFO: <core> [main.c:764]: handle_sigs(): terminating due to SIGCHLD

Problem

Kamailio segfaults when processing tcp replies.

Config file and use case

We're running a somewhat complex Kemi/python configuration. It routes internal udp traffic from Asterisk to PSTN providers with various protocols (udp/tcp/tls).

I was able to isolate the problem on a test rig to a simple test setup. It happens if kamailio uses a outgoing tcp socket, udp is not affected.
The segfault occurs always after approx. 12-13 register/auth/ok cycles (8 packets, each). No other messages or calls are involved at all. I can reproduce the case now with a short register timeout within 4-5 minutes.
Registers are not directly processed in kamailio (no usrloc at al).

A downgrade to 5.1.6 shows a similar problem.

Kamailio version / os

Prebuilt 5.2.1 from deb.kamailio.org in a Ubuntu 16.04 lxc container.

kamailio -V

version: kamailio 5.2.1 (x86_64/linux) 

flags: STATS: Off, USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MEM, SHM_MMAP, PKG_MALLOC, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLACKLIST, HAVE_RESOLV_RES
ADAPTIVE_WAIT_LOOPS=1024, MAX_RECV_BUFFER_SIZE 262144 MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
id: unknown 
compiled with gcc 5.3.1

gdb debug info

Core dumps where analyzed with the source and dbg packages from deb.kamailio.org.

(gdb) list

251             clear_branches();
252
253             if(unlikely(ksr_route_locks_set!=NULL && msg->callid && msg->callid->body.s
254                             && msg->callid->body.len >0)) {
255                     cidlockidx = get_hash1_raw(msg->callid->body.s, msg->callid->body.len);
256                     cidlockidx = cidlockidx % ksr_route_locks_set->size;
257                     cidlockset = 1;
258             }
259
260             if(msg->first_line.type == SIP_REQUEST) {

(gdb) bt full

#0  0x0000000000596b86 in receive_msg (
    buf=0x2969590 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.253.100;
branch=z9hG4"..., len=573, rcv_info=0x7f256b187e08) at core/receive.c:256
        msg = 0x7f2577648d90
        ctx = {rec_lev = -878175776, run_flags = 32767, last_retcode = 6442811, jmp_env = {{__jmpbuf = {8108147, 134217728, 8388608, 6, 140736610180220, 139798687284720, 1, 70364449210377}, 
              __mask_was_saved = 878175761, __saved_mask = {__val = {139798687284848, 140736610179776, 139798687284720, 42949672963, 139798687286050, 8388608, 139798687286053, 140736610179792, 6445147, 0, 0, 
                  255, 0, 140736610180220, 139798687284720, 2314885530818453536}}}}}
        bctx = 0x0
        ret = 1
        stats_on = 0
        tvb = {tv_sec = 140736610180220, tv_usec = 139798687285480}
        tve = {tv_sec = 139798687284720, tv_usec = 38654722047}
        tz = {tz_minuteswest = 2014554944, tz_dsttime = 1}
        diff = 0
        inb = {
          s = 0x2969590 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.25
3.100;branch=z9hG4"..., len = 573}
        netinfo = {data = {s = 0x7bb873 "INFO", len = 134217728}, rcv = 0x800000, dst = 0x6}
        keng = 0x0
        evp = {data = 0x7fffcba81900, rcv = 0x7f256b187e08, dst = 0x0}
        cidlockidx = 2555451508
        cidlockset = 0
        errsipmsg = 0
        __func__ = "receive_msg"
#1  0x000000000062cb6c in receive_tcp_msg (
    tcpbuf=0x7f256b1880e8 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.
253.100;branch=z9hG4"..., len=573, rcv_info=0x7f256b187e08, con=0x7f256b187df0) at core/tcp_read.c:1448
        buf = 0x2969590 "SIP/2.0 401 Unauthorized\r\nVia: SIP/2.0/TCP 81.27.163.30:5062;branch=z9hG4bK4473.59e6c5c0a45c67b063e075a8f9f8eccb.0\r\nVia: SIP/2.0/UDP 100.72.253.100:5060;rport=5060;received=100.72.25
3.100;branch=z9hG4"...
        bsize = 65535
        blen = 65535
        __func__ = "receive_tcp_msg"
#2  0x000000000062ec28 in tcp_read_req (con=0x7f256b187df0, bytes_read=0x7fffcba81c78, read_flags=0x7fffcba81c7c) at core/tcp_read.c:1631
        bytes = 573
        total_bytes = 573
        resp = 1
        size = 24
        req = 0x7f256b187e70
        dst = {send_sock = 0x0, to = {s = {sa_family = 61216, sa_data = "\255\000\000\000\000\000\270\034\250\313\377\177\000"}, sin = {sin_family = 61216, sin_port = 173, sin_addr = {s_addr = 0}, 
              sin_zero = "\270\034\250\313\377\177\000"}, sin6 = {sin6_family = 61216, sin6_port = 173, sin6_flowinfo = 0, sin6_addr = {__in6_u = {
                  __u6_addr8 = "\270\034\250\313\377\177\000\000\b\000\000\000\000\000\000", __u6_addr16 = {7352, 52136, 32767, 0, 8, 0, 0, 0}, __u6_addr32 = {3416792248, 32767, 8, 0}}}, sin6_scope_id = 20}}, 
          id = 0, proto = 1 '\001', send_flags = {f = 0, blst_imask = 1}}
        c = 57 '9'
        ret = -878175312
        __func__ = "tcp_read_req"
#3  0x00000000006318c1 in handle_io (fm=0x7f2577636468, events=1, idx=-1) at core/tcp_read.c:1804
        ret = 8
        n = 8
        read_flags = 1
        con = 0x7f256b187df0
        s = 9
        resp = 140736610180352
        t = 32549
        __func__ = "handle_io"
#4  0x000000000062017f in io_wait_loop_epoll (h=0xadef20 <io_w>, t=2, repeat=0) at core/io_wait.h:1065
        n = 1
        r = 0
        fm = 0x7f2577636468
        revents = 1
        __func__ = "io_wait_loop_epoll"
#5  0x000000000063375a in tcp_receive_loop (unix_sock=25) at core/tcp_read.c:1974
        __func__ = "tcp_receive_loop"
#6  0x000000000051a6ed in tcp_init_children () at core/tcp_main.c:4853
        r = 0
        i = 1
        reader_fd_1 = 25
        pid = 0
        si_desc = "tcp receiver (generic)\000\000\r\216U\000\000\000\000\000\bqbw%\177\000\000\320x`w\001", '\000' <repeats 11 times>, "M\307]\000\000\000\000\000Q\243|\000\000\000\000\000\250\271aw%\177\000\000`Fx\000\000\000\000\000\000\000\000\000N\031\000\000s\270{\000\000\000\000\000\320_\347w%\177\000\000s\270{\000\000\000\000\000\000\000\000\b\000\000\000"
        si = 0x0
        __func__ = "tcp_init_children"
#7  0x000000000042619b in main_loop () at main.c:1735
        i = 2
        pid = 6477
        si = 0x0
        si_desc = "udp receiver child=1 sock=81.27.163.30:5060 (81.27.163.30:5060)\000\004\000\000\000\000\000\000\000\000\260\354j%\177\000\000\000\000\000\000\000\000\000\000\250\347\027k%\177\000\000p \250\313\377\177\000\000\034,M\000\000\000\000\000", '\377' <repeats 12 times>, "\n\000\000"
        nrprocs = 2
        woneinit = 1
        __func__ = "main_loop"
#8  0x000000000042c9a5 in main (argc=15, argv=0x7fffcba82318) at main.c:2675
        cfg_stream = 0x27eb010
        c = -1
        r = 0
        tmp = 0x7fffcba83eb8 ""
        tmp_len = 1
        port = 0
        proto = 2021155176
        options = 0x76c218 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
        ret = -1
        seed = 856022880
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 0
        n_lst = 0x7f2578786718
        p = 0xffffffff <error: Cannot access memory at address 0xffffffff>
        st = {st_dev = 93, st_ino = 98, st_nlink = 2, st_mode = 16832, st_uid = 378, st_gid = 378, __pad0 = 0, st_rdev = 0, st_size = 60, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1549008770, 
            tv_nsec = 369207448}, st_mtim = {tv_sec = 1549009064, tv_nsec = 813217177}, st_ctim = {tv_sec = 1549009064, tv_nsec = 813217177}, __glibc_reserved = {0, 0, 0}}
        __func__ = "main"

Let me know if you need more info, i can fully reproduce the problem.

Cheers,

Thomas


You are receiving this because you are subscribed to this thread.
Reply to this email directly, view it on GitHub, or mute the thread.