Service is crashing from time to time and it crashed 13 times today. Please find the bt full traces below

Sig 11: 
(gdb) bt full
#0  0x00000000006a16fc in timer_list_expire (t=1339518316, h=0x7facee484fd0, slow_l=0x7facee488208, slow_mark=7936) at core/timer.c:846
        tl = 0x7facf048e1b8
        ret = 0
#1  0x00000000006a1c19 in timer_handler () at core/timer.c:922
        saved_ticks = 1339518316
        run_slow_timer = 0
        i = 768
        __FUNCTION__ = "timer_handler"
#2  0x00000000006a20ba in timer_main () at core/timer.c:961
No locals.
#3  0x000000000042ac0a in main_loop () at main.c:1753
        i = 32
        pid = 0
        si = 0x0
        si_desc = "udp receiver child=31 sock=10.161.224.211:5060", '\000' <repeats 11 times>, "\354\325K\346\060\375\350\003\000\000\000\314\000\000\000\365k\000\000\000\000\000\000P\272A\000\000\000\000\000P\343o%\374\177", '\000' <repeats 18 times>, "\320\335o%\374\177\000\000\215\006Z\000\000\000\000"
        nrprocs = 32
        woneinit = 1
        __FUNCTION__ = "main_loop"
#4  0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856
        cfg_stream = 0x2674010
        c = -1
        r = 0
        tmp = 0x7ffc256fff2c ""
        tmp_len = 32685
        port = -1767840736
        proto = 32685
        ahost = 0x0
        aport = 0
        options = 0x7e2058 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
        ret = -1
        seed = 1101782887
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 0
        n_lst = 0x7fad96a1d5f7
        p = 0x7fad96a0e77c "free"
        st = {st_dev = 17, st_ino = 15237, st_nlink = 2, st_mode = 16877, st_uid = 105, st_gid = 112, __pad0 = 0, st_rdev = 0, st_size = 120, st_blksize = 4096, st_blocks = 0, st_atim = {
            tv_sec = 1607000056, tv_nsec = 100000000}, st_mtim = {tv_sec = 1607002632, tv_nsec = 716000000}, st_ctim = {tv_sec = 1607003887, tv_nsec = 44000000}, __glibc_reserved = {0, 0, 0}}
        tbuf = "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\210'Ö\255\177\000\000\030\207\241\226\255\177\000\000\000\000\000\000\000\000\000\000\220\340o%\374\177\000\000`\261w%\374\177\000\000\240\341o%\374\177\000\000رw%\374\177\000\000\220\341o%\374\177\000\000\000\000\000\000\000\000\000\000<\177\241\226\255\177\000\000\000\000\000\000\000\000\000\000.N=\366\000\000\000\000\004\000\000\000\000\000\000\000\006\000\000\000\000\000\000\000\250u\302\226\255\177\000\000\266\210\241\226\255\177", '\000' <repeats 11 times>, "\341o%\374\177\000\000\060=\362\225\255\177\000\000\020\342o%\374\177\000\000\370\001\363\225\255\177\000\000\070\365\330\003\000\000\000\000\000\342o%\374\177\000\000\350*Ö"...
        option_index = 0
        long_options = {{name = 0x7e424f "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x7df854 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x7e4254 "alias", has_arg = 1, 
            flag = 0x0, val = 1024}, {name = 0x7e425a "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x7e4260 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {
            name = 0x7e4269 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x7e4273 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x7e427d "loadmodule", has_arg = 1, 
            flag = 0x0, val = 1029}, {name = 0x7e4288 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x7e4291 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {
            name = 0x7e429c "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}}
        __FUNCTION__ = "main"
(gdb) 

Sig 6:

(gdb) bt full
#0  0x00007fad95f56c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00007fad95f5a028 in abort () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#2  0x000000000041e39d in sig_alarm_abort (signo=14) at main.c:687
        __FUNCTION__ = "sig_alarm_abort"
#3  <signal handler called>
No symbol table info available.
#4  0x00007fad960183c7 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#5  0x00007fad6f5c024b in futex_get (lock=0x7facee70baa0) at ../../core/mem/../futexlock.h:121
        v = 1
        i = 0
#6  0x00007fad6f5c2167 in get_peer_by_fqdn (fqdn=0x7faceeb377b0) at peermanager.c:240
        i = 0x7f4d8a
#7  0x00007fad6f5e6c31 in get_first_connected_route (cdp_session=0x7faceeb37768, r=0x7facee70a220, app_id=16777236, vendor_id=10415) at routing.c:110
        peers = {0x7facee9f4870, 0x18, 0x1, 0x0, 0x102ef763ea8, 0xc, 0x0, 0x11b00000000, 0x0, 0x7facef617fb0, 0x7facf0a3f2d0, 0x7facef2b5f90, 0x8, 0x7facee407000, 0x7ffc256fd0d0, 
          0x7ffc256fd0dc, 0x3007df3d8, 0x7fad6f617680, 0x41b610 <syslog@plt>, 0x7f4d8a}
        peer_count = 0
        prev_metric = 0
        i = 0x1007f4d8a
        p = 0x7ffc256fcff0
        j = 628084656
        least_recent_time = 7234707
        time_spec = {tv_sec = 140382874676976, tv_nsec = 1569531277302}
        __FUNCTION__ = "get_first_connected_route"
#8  0x00007fad6f5ea8b2 in get_routing_peer (cdp_session=0x7faceeb37768, m=0x7facef617fb0) at routing.c:272
        destination_realm = {s = 0x7facef6d38f0 "isbvpcrf001.zte.com", len = 19}
        destination_host = {s = 0x0, len = 0}
        avp = 0x7facef2b5f90
        avp_vendor = 0x0
        avp2 = 0x0
        group = {head = 0x0, tail = 0x0}
        p = 0x0
        rr = 0x0
        app_id = 16777236
        vendor_id = 10415
        __FUNCTION__ = "get_routing_peer"
#9  0x00007fad6f5ac72d in AAASendMessage (message=0x7facef617fb0, callback_f=0x0, callback_param=0x0) at diameter_comm.c:137
        cdp_session = 0x7faceeb37768
        p = 0x7facef7fdce0
        __FUNCTION__ = "AAASendMessage"
#10 0x00007fad6ef05dbb in rx_send_str (rx_session_id=0x7facf0849d50) at rx_str.c:157
        __FUNCTION__ = "rx_send_str"
        auth = 0x7faceeb37768
        str = 0x7facef617fb0
        avp = 0x7facef2b5f90
        x = "\000\000\000\001"
#11 0x00007fad6eeb25c4 in callback_dialog (dlg=0x7facf0036d70, type=8192, params=0x7fad72b2a1c0 <params>) at ims_qos_mod.c:453
        msg = 0x0
        parsed_cseq = 0x7facee4406c8
        rx_session_id = 0x7facf0849d50
---Type <return> to continue, or q <return> to quit---
        auth = 0x0
        p_session_data = 0x0
        current_fd = 0x0
        new_fd = 0x0
        current_has_video = 0
        new_has_video = 0
        must_unlock_aaa = 1
        __FUNCTION__ = "callback_dialog"
#12 0x00007fad728973af in run_dlg_callbacks (type=8192, dlg=0x7facf0036d70, req=0x0, rpl=0x0, dir=0, dlg_data=0x0) at dlg_cb.c:274
        cb = 0x7facf0849e68
        __FUNCTION__ = "run_dlg_callbacks"
#13 0x00007fad728d5315 in destroy_dlg (dlg=0x7facf0036d70) at dlg_hash.c:219
        ret = 0
        var = 0x7faceea76348
        __FUNCTION__ = "destroy_dlg"
#14 0x00007fad728d5b1b in destroy_dlg_table () at dlg_hash.c:291
        dlg = 0x0
        l_dlg = 0x7facf0036d70
        i = 120
        __FUNCTION__ = "destroy_dlg_table"
#15 0x00007fad7290b71e in mod_destroy () at ims_dialog.c:627
No locals.
#16 0x0000000000616ae1 in destroy_modules () at core/sr_module.c:746
        t = 0x7fad75fcae18
        foo = 0x7fad75fca710
        __FUNCTION__ = "destroy_modules"
#17 0x000000000041ce17 in cleanup (show_status=1) at main.c:563
        memlog = 0
        __FUNCTION__ = "cleanup"
#18 0x000000000041e6ef in shutdown_children (sig=15, show_status=1) at main.c:706
        __FUNCTION__ = "shutdown_children"
#19 0x0000000000421755 in handle_sigs () at main.c:806
        chld = 0
        chld_status = 139
        any_chld_stopped = 1
        memlog = 0
        __FUNCTION__ = "handle_sigs"
#20 0x000000000042bc85 in main_loop () at main.c:1817
        i = 32
        pid = 27887
        si = 0x0
        si_desc = "udp receiver child=31 sock=10.161.224.211:5060", '\000' <repeats 11 times>, "\354\325K\346\060\375\350\003\000\000\000\314\000\000\000\365k\000\000\000\000\000\000P\272A\000\000\000\000\000P\343o%\374\177", '\000' <repeats 18 times>, "\320\335o%\374\177\000\000\215\006Z\000\000\000\000"
        nrprocs = 32
        woneinit = 1
        __FUNCTION__ = "main_loop"
#21 0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856
        cfg_stream = 0x2674010
        c = -1
        r = 0
        tmp = 0x7ffc256fff2c ""
        tmp_len = 32685
---Type <return> to continue, or q <return> to quit---
        port = -1767840736
        proto = 32685
        ahost = 0x0
        aport = 0
        options = 0x7e2058 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
        ret = -1
        seed = 1101782887
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 0
        n_lst = 0x7fad96a1d5f7
        p = 0x7fad96a0e77c "free"
        st = {st_dev = 17, st_ino = 15237, st_nlink = 2, st_mode = 16877, st_uid = 105, st_gid = 112, __pad0 = 0, st_rdev = 0, st_size = 120, st_blksize = 4096, st_blocks = 0, st_atim = {
            tv_sec = 1607000056, tv_nsec = 100000000}, st_mtim = {tv_sec = 1607002632, tv_nsec = 716000000}, st_ctim = {tv_sec = 1607003887, tv_nsec = 44000000}, __glibc_reserved = {0, 0, 0}}
        tbuf = "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\210'Ö\255\177\000\000\030\207\241\226\255\177\000\000\000\000\000\000\000\000\000\000\220\340o%\374\177\000\000`\261w%\374\177\000\000\240\341o%\374\177\000\000رw%\374\177\000\000\220\341o%\374\177\000\000\000\000\000\000\000\000\000\000<\177\241\226\255\177\000\000\000\000\000\000\000\000\000\000.N=\366\000\000\000\000\004\000\000\000\000\000\000\000\006\000\000\000\000\000\000\000\250u\302\226\255\177\000\000\266\210\241\226\255\177", '\000' <repeats 11 times>, "\341o%\374\177\000\000\060=\362\225\255\177\000\000\020\342o%\374\177\000\000\370\001\363\225\255\177\000\000\070\365\330\003\000\000\000\000\000\342o%\374\177\000\000\350*Ö"...
        option_index = 0
        long_options = {{name = 0x7e424f "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x7df854 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x7e4254 "alias", has_arg = 1, 
            flag = 0x0, val = 1024}, {name = 0x7e425a "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x7e4260 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {
            name = 0x7e4269 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x7e4273 "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x7e427d "loadmodule", has_arg = 1, 
            flag = 0x0, val = 1029}, {name = 0x7e4288 "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x7e4291 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {
            name = 0x7e429c "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}}
        __FUNCTION__ = "main"
(gdb) 

I observed that service crash on Signal 11 and when service starts again it immediately crashes on Sig 6. After that service remains up for some time. Moreover, there are many following errors of udp_send_failed and tcp_read error. 

#0  0x00007f668aa94013 in recvfrom () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00000000006638d0 in udp_rcv_loop () at core/udp_server.c:460
        len = 638
        buf = "SUBSCRIBE tel:+923492324215 SIP/2.0\r\nFrom: <tel:+923492324215>;tag=2869510412\r\nTo: <tel:+923492324215>\r\nCSeq: 722026762 SUBSCRIBE\r\nCall-ID: 2869510410_3596082716@10.100.217.241\r\nVia: SIP/2.0/UDP 10.100"...
        tmp = 0xdf831260 <error: Cannot access memory at address 0xdf831260>
        from = 0x7f666ac86e78
        fromlen = 16

and 

#0  0x00007f668aa94013 in recvfrom () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00000000006638d0 in udp_rcv_loop () at core/udp_server.c:460
        len = 952
        buf = "REGISTER sip:ims.mnc006.mcc410.3gppnetwork.org SIP/2.0\r\nVia: SIP/2.0/UDP 10.100.213.50;rport;branch=z9hG4bKL60mek0i6jXu6V\r\nFrom: <sip:410060265715793@ims.mnc006.mcc410.3gppnetwork.org>;tag=jYbVBQiAfL1"...
        tmp = 0xdf831260 <error: Cannot access memory at address 0xdf831260>
        from = 0x7f666ac86e78
        fromlen = 16
        ri = {src_ip = {af = 2, len = 4, u = {addrl = {140072621270026, 140723275665552}, addr32 = {852845578, 32613, 2967179408, 32764}, addr16 = {25610, 13013, 32613,
                0, 37008, 45275, 32764, 0}, addr = "\nd\325\062e\177\000\000\220\220۰\374\177\000"}}, dst_ip = {af = 2, len = 4, u = {addrl = {3839926538, 0},
              addr32 = {3839926538, 0, 0, 0}, addr16 = {41226, 58592, 0, 0, 0, 0, 0, 0}, addr = "\n\241\340\344", '\000' <repeats 11 times>}}, src_port = 5060,
          dst_port = 5060, proto_reserved1 = 0, proto_reserved2 = 0, src_su = {s = {sa_family = 2, sa_data = "\023\304\nd\325\062\000\000\000\000\000\000\000"}, sin = {
              sin_family = 2, sin_port = 50195, sin_addr = {s_addr = 852845578}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 50195,
              sin6_flowinfo = 852845578, sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0,
                    0}}}, sin6_scope_id = 0}}, bind_address = 0x7f666aa23d78, proto = 1 '\001'}
        evp = {data = 0x0, rcv = 0x0, dst = 0x0}
        printbuf = "P\221۰\374\177\000\000\005\364<\337e\177\000\000 \221۰\374\177\000\000\265˧\337e\177\000\000\340\223۰\374\177\000\000L\221۰\374\177\000\000\340\223۰\004\000\000\000\360\005|\000\016\000\000\000n\365\023o\016\000\000\000\177$\000\000\000\000\000\000P\221۰\374\177\000\000(6\373\337e\177\000\000`\211A\000\000\000\000\000`\227۰\374\177", '\000' <repeats 18 times>, "\020\222۰\374\177\000\000\035\261]", '\000' <repeats 21 times>, "\300\022\262\000\000\000\000\000I/e\025\000\000\000\000`\211A\000\000\000\000\000`\227۰\374\177", '\000' <repeats 19 times>, "\222"...
        i = -537187496
        j = 32613
        l = 32613
        __FUNCTION__ = "udp_rcv_loop"
#2  0x0000000000424c4c in main_loop () at main.c:1628

This VM PCSCF node is created on OpenStack. 

Regards

Hamid R. Hashmi



From: Daniel-Constantin Mierla <miconda@gmail.com>
Sent: Monday, December 7, 2020 5:53 PM
To: Kamailio (SER) - Users Mailing List <sr-users@lists.kamailio.org>; Hamid Hashmi <hamid2kviii@hotmail.com>; Henning Westerholt <hw@skalatan.de>
Subject: Re: [SR-Users] kamailio-5.2.0 Crash - udp_send failed
 

Hello,


get also the output of "bt full" from gdb.


I see you use ims moudules, they are known to have some limitations for various settings. Is this something that you can reproduce, or is something that happens from time to time without any pattern?


Cheers,
Daniel


On 06.12.20 20:13, Hamid Hashmi wrote:
Hello 

I have updated Kamailio from 5.2.0 to 5.4.2 but the service is still crashing with sig 11 and sig 6. Please find the attached 'bt full' traces.

Sig 11:
Core was generated by `/usr/local/sbin/kamailio -f /usr/local/etc/kamailio_pcscf/kamailio.cfg -P /var/'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x00000000006a16fc in timer_list_expire (t=1339518316, h=0x7facee484fd0, slow_l=0x7facee488208, slow_mark=7936) at core/timer.c:846
846                     _timer_rm_list(tl); /* detach */
(gdb) bt
#0  0x00000000006a16fc in timer_list_expire (t=1339518316, h=0x7facee484fd0, slow_l=0x7facee488208, slow_mark=7936) at core/timer.c:846
#1  0x00000000006a1c19 in timer_handler () at core/timer.c:922
#2  0x00000000006a20ba in timer_main () at core/timer.c:961
#3  0x000000000042ac0a in main_loop () at main.c:1753
#4  0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856
Sig 6:
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `/usr/local/sbin/kamailio -f /usr/local/etc/kamailio_pcscf/kamailio.cfg -P /var/'.
Program terminated with signal SIGABRT, Aborted.
#0  0x00007fad95f56c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6
(gdb) bt
#0  0x00007fad95f56c37 in raise () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x00007fad95f5a028 in abort () from /lib/x86_64-linux-gnu/libc.so.6
#2  0x000000000041e39d in sig_alarm_abort (signo=14) at main.c:687
#3  <signal handler called>
#4  0x00007fad960183c7 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
#5  0x00007fad6f5c024b in futex_get (lock=0x7facee70baa0) at ../../core/mem/../futexlock.h:121
#6  0x00007fad6f5c2167 in get_peer_by_fqdn (fqdn=0x7faceeb377b0) at peermanager.c:240
#7  0x00007fad6f5e6c31 in get_first_connected_route (cdp_session=0x7faceeb37768, r=0x7facee70a220, app_id=16777236, vendor_id=10415) at routing.c:110
#8  0x00007fad6f5ea8b2 in get_routing_peer (cdp_session=0x7faceeb37768, m=0x7facef617fb0) at routing.c:272
#9  0x00007fad6f5ac72d in AAASendMessage (message=0x7facef617fb0, callback_f=0x0, callback_param=0x0) at diameter_comm.c:137
#10 0x00007fad6ef05dbb in rx_send_str (rx_session_id=0x7facf0849d50) at rx_str.c:157
#11 0x00007fad6eeb25c4 in callback_dialog (dlg=0x7facf0036d70, type=8192, params=0x7fad72b2a1c0 <params>) at ims_qos_mod.c:453
#12 0x00007fad728973af in run_dlg_callbacks (type=8192, dlg=0x7facf0036d70, req=0x0, rpl=0x0, dir=0, dlg_data=0x0) at dlg_cb.c:274
#13 0x00007fad728d5315 in destroy_dlg (dlg=0x7facf0036d70) at dlg_hash.c:219
#14 0x00007fad728d5b1b in destroy_dlg_table () at dlg_hash.c:291
#15 0x00007fad7290b71e in mod_destroy () at ims_dialog.c:627
#16 0x0000000000616ae1 in destroy_modules () at core/sr_module.c:746
#17 0x000000000041ce17 in cleanup (show_status=1) at main.c:563
#18 0x000000000041e6ef in shutdown_children (sig=15, show_status=1) at main.c:706
#19 0x0000000000421755 in handle_sigs () at main.c:806
#20 0x000000000042bc85 in main_loop () at main.c:1817
#21 0x000000000043400c in main (argc=13, argv=0x7ffc256fe358) at main.c:2856

PS: I am using ubuntu 14.04.  

Regards

Hamid R. Hashmi


From: Henning Westerholt <hw@skalatan.de>
Sent: Saturday, September 26, 2020 12:04 PM
To: Kamailio (SER) - Users Mailing List <sr-users@lists.kamailio.org>
Cc: Hamid Hashmi <hamid2kviii@hotmail.com>
Subject: Re: kamailio-5.2.0 Crash - udp_send failed
 
Hello,

you are using a old and non released dev version of kamailio. These are meant only for development purposes.

Please update to a proper version (like the latest one based on 5.3 or 5.4 branch.

Then try to reproduce the bug again and report on the list or by creating an issue.

Cheers,

Henning


--
Henning Westerholt - https://skalatan.de/blog/
Kamailio services - https://skalatan.de/services


From: sr-users <sr-users-bounces@lists.kamailio.org> on behalf of Hamid Hashmi <hamid2kviii@hotmail.com>
Sent: Friday, September 25, 2020 9:28:07 PM
To: sr-users@lists.kamailio.org <sr-users@lists.kamailio.org>
Subject: [SR-Users] kamailio-5.2.0 Crash - udp_send failed