Hello,

the problem was that a structure in shared memory (the request cloned in tm) could have been used in parallel by different kamailio processes.

If there were two processes at the same time, parsing PAI resulted in setting the header pointer to a private memory. The other process could overtake in processing, using the same cloned request, and this time the pai pointer is set, but to another private memory zone. I added the locks for calling the callbacks, so the process that parse the PAI is the one cleaning it.

Performances should not be impacted that much, the transaction lock is used and will add sequential processing when there are two replies at the same time, which is not the common.

Cheers,
Daniel

On 07/07/14 12:40, Igor Potjevlesch wrote:
Hello,

Can you explain the modification and the impact on our plateform?
Is it for the pai problem?

Do you have explanation for the km_val.c problem wich cause crash for Kamailio too?

Regards,

Igor




2014-07-01 16:40 GMT+02:00 Daniel-Constantin Mierla <miconda@gmail.com>:
Hello,

can you give it a try with the patch from next commit?

- http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=da9d56be28e050dd0cb4aed50efcbda043a3e5cf

If all goes fine while testing, I will backport.

Cheers,
Daniel


On 26/06/14 12:58, Igor Potjevlesch wrote:
Hello,

Here the result :

(gdb) frame 6

#6  0x00007f127cb6dde6 in acc_onreply (t=0x7f1274c157f0, req=0x7f1274c3ac08,
    reply=0x7f12804a6d70, code=200) at acc_logic.c:501
501                             clean_hdr_field(hdr);
(gdb) print hdr
$1 = (hdr_field_t *) 0x7f1274c3c238
(gdb) print *hdr
$2 = {type = HDR_PAI_T, name = {
    s = 0x7f1274c3b6cd "P-Asserted-Identity: <sip:0123456789@domain;user=phone>\r\nP-Sig-Options: Sending-Complete\r\n\r\nv=0\r\no=- 111851 1 IN IP4 A.B.C.D\r\ns=-\r\nt=0 0\r\nm=audio 21336 RTP/AVP 8 101 13\r\nc=IN IP4 A.B.C"..., len = 19}, body = {
    s = 0x7f1274c3b6e2 "<sip:0123456789@domain;user=phone>\r\nP-Sig-Options: Sending-Complete\r\n\r\nv=0\r\no=- 111851 1 IN IP4 A.B.C.D\r\ns=-\r\nt=0 0\r\nm=audio 21336 RTP/AVP 8 101 13\r\nc=IN IP4 A.B.C.D\r\na=rtpmap:101 tele"..., len = 44}, len = 67, parsed = 0x0, next = 0x7f1274c3c278}

(gdb) frame 4

#4  0x000000000056e5e6 in free_pai_ppi_body (pid_b=0x7f12803cb480)
    at parser/parse_ppi_pai.c:102
102                     pkg_free(pid_b);
(gdb) print *pid_b
$3 = {id = 0x0, num_ids = 0, next = 0x1d0}

This is the bt full :


#0  0x0000003d6f6328a5 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x0000003d6f634085 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x0000000000546d3c in qm_debug_frag (qm=0x7f1280275010, f=0x7f12803cb450) at mem/q_malloc.c:142
        __FUNCTION__ = "qm_debug_frag"

#3  0x0000000000548b26 in qm_free (qm=0x7f1280275010, p=0x7f12803cb480, file=0x6276a0 "<core>: parser/parse_ppi_pai.c", func=0x627a00 "free_pai_ppi_body", line=102) at mem/q_malloc.c:464
        f = 0x7f12803cb450
        size = 139717434027144
        next = 0xf00000000
        prev = 0x7f127cd79e00
        __FUNCTION__ = "qm_free"

#4  0x000000000056e5e6 in free_pai_ppi_body (pid_b=0x7f12803cb480) at parser/parse_ppi_pai.c:102
        __FUNCTION__ = "free_pai_ppi_body"

#5  0x000000000054fee0 in clean_hdr_field (hf=0x7f1274c3c238) at parser/hf.c:126
        h_parsed = 0x7f1274c3c268
        __FUNCTION__ = "clean_hdr_field"

#6  0x00007f127cb6dde6 in acc_onreply (t=0x7f1274c157f0, req=0x7f1274c3ac08, reply=0x7f12804a6d70, code=200) at acc_logic.c:501
        new_uri_bk = {s = 0x7f1274b53cdf "sip:0987654321@GW SIP/2.0\r\nRecord-Route: <sip:A.B.C.D;lr=on>\r\nVia: SIP/2.0/UDP A.B.C.D;branch=z9hG4bK512b.82b197888826f6b60c0c63b79801294d.0\r\nVia: SIP/2.0/UDP A.B.C.D:2057;branch=z9hG4bK-12"..., len = 19}
        br = 0
        hdr = 0x7f1274c3c238
        __FUNCTION__ = "acc_onreply"

#7  0x00007f127cb6e30a in tmcb_func (t=0x7f1274c157f0, type=512, ps=0x7fff0b015580) at acc_logic.c:573
        __FUNCTION__ = "tmcb_func"

#8  0x00007f127ed68478 in run_trans_callbacks_internal (cb_lst=0x7f1274c15860, type=512, trans=0x7f1274c157f0, params=0x7fff0b015580) at t_hooks.c:290
        cbp = 0x7f1274ac0e90
        backup_from = 0x934630
        backup_to = 0x934638
        backup_dom_from = 0x934640
        backup_dom_to = 0x934648
        backup_uri_from = 0x934620
        backup_uri_to = 0x934628
        backup_xavps = 0x934760
        __FUNCTION__ = "run_trans_callbacks_internal"

#9  0x00007f127ed6868a in run_trans_callbacks_with_buf (type=512, rbuf=0x7f1274c158b0, req=0x7f1274c3ac08, repl=0x7f12804a6d70, flags=200) at t_hooks.c:336
        params = {req = 0x7f1274c3ac08, rpl = 0x7f12804a6d70, param = 0x7f1274ac0ea0, code = 200, flags = 200, branch = 0, t_rbuf = 0x7f1274c158b0, dst = 0x7f1274c15900, send_buf = {
            s = 0x7f1274c27620 "SIP/2.0 200 OK\r\nVia: SIP/2.0/UDP A.B.C.D:2057;branch=z9hG4bK-129F259C;rport=2057\r\nCall-ID: cb03dc02e909d3118f86009033290024@A.B.C.D\r\nFrom: <sip:0123456789@domain;user=phone>;epid=00903"..., len = 1021}}
        trans = 0x7f1274c157f0

#10 0x00007f127ed9ac06 in relay_reply (t=0x7f1274c157f0, p_msg=0x7f12804a6d70, branch=0, msg_status=200, cancel_data=0x7fff0b0158e0, do_put_on_wait=1) at t_reply.c:2001
        relay = 0
        save_clone = 0
        buf = 0x7f12804a7cc0 "SIP/2.0 200 OK\r\nVia: SIP/2.0/UDP A.B.C.D:2057;branch=z9hG4bK-129F259C;rport=2057\r\nCall-ID: cb03dc02e909d3118f86009033290024@A.B.C.D\r\nFrom: <sip:0123456789@domain;user=phone>;epid=00903"...
        res_len = 1021
        relayed_code = 200
        relayed_msg = 0x7f12804a6d70
        reply_bak = 0x7fff0b015730
        bm = {to_tag_val = {s = 0x7f1274c16d88 "", len = 5449343}}
        totag_retr = 0
        reply_status = RPS_COMPLETED
        uas_rb = 0x7f1274c158b0
        to_tag = 0x0
        reason = {s = 0x10b0156e0 <Address 0x10b0156e0 out of bounds>, len = 1}
        onsend_params = {req = 0x200924a64, rpl = 0x7f127edbaf90, param = 0x414cc0, code = 1, flags = 0, branch = 0, t_rbuf = 0x7f126a80c828, dst = 0x7f12804a6f68, send_buf = {s = 0xb015700 <Address 0xb015700 out of bounds>, len = 1024}}
        __FUNCTION__ = "relay_reply"

#11 0x00007f127ed9d0b7 in reply_received (p_msg=0x7f12804a6d70) at t_reply.c:2499
        msg_status = 200
        last_uac_status = 183
        ack = 0x40 <Address 0x40 out of bounds>
        ack_len = 0
        branch = 0
        reply_status = -2143420688
        onreply_route = 1
        cancel_data = {cancel_bitmap = 0, reason = {cause = 200, u = {text = {s = 0x0, len = 9586191}, e2e_cancel = 0x0, packed_hdrs = {s = 0x0, len = 9586191}}}}
        uac = 0x7f1274c15958
        t = 0x7f1274c157f0
        lack_dst = {send_sock = 0x7f12803e4110, to = {s = {sa_family = 20496, sa_data = "'\200\022\177\000\000\310\036#\000\000\000\000"}, sin = {sin_family = 20496, sin_port = 32807, sin_addr = {s_addr = 32530}, sin_zero = "\310\036#\000\000\000\000"}, sin6 = {
              sin6_family = 20496, sin6_port = 32807, sin6_flowinfo = 32530, sin6_addr = {__in6_u = {__u6_addr8 = "\310\036#\000\000\000\000\000\360\247=\200\022\177\000", __u6_addr16 = {7880, 35, 0, 0, 42992, 32829, 32530, 0}, __u6_addr32 = {2301640, 0, 2151524336,
                    32530}}}, sin6_scope_id = 2150060928}}, id = 32530, proto = 72 'H', send_flags = {f = 228 '\344', blst_imask = 61 '='}}
        backup_user_from = 0x934630
        backup_user_to = 0x934638
        backup_domain_from = 0x934640
        backup_domain_to = 0x934648
        backup_uri_from = 0x934620
        backup_uri_to = 0x934628
        backup_xavps = 0x934760
        replies_locked = 1
        branch_ret = 0
        prev_branch = 184637856
        blst_503_timeout = 32767
        hf = 0x7f12804a6d90
        onsend_params = {req = 0x7fff0b015960, rpl = 0x550b94, param = 0x231dc8, code = 0, flags = 3, branch = 0, t_rbuf = 0x7f1280275380, dst = 0x7f12803de418, send_buf = {s = 0x7fff0b015960 "`G\223", len = 5538037}}
        ctx = {rec_lev = 0, run_flags = 0, last_retcode = 0, jmp_env = {{__jmpbuf = {139717438500712, 3644308075193502665, 4279488, 140733378027408, 0, 0, 3644308075281583049, -3644194520509117495}, __mask_was_saved = 0, __saved_mask = {__val = {9586395,
                  1065161476041, 124554051613, 9586471, 139717437685488, 9587300, 9586197, 361695345073193192, 9586295, 9586274, 2151546560, 139717437685488, 139717437615640, 139717438500712, 4279488, 140733378027408}}}}}
        __FUNCTION__ = "reply_received"

#12 0x000000000045d837 in do_forward_reply (msg=0x7f12804a6d70, mode=0) at forward.c:777
        new_buf = 0x0
        dst = {send_sock = 0x0, to = {s = {sa_family = 0, sa_data = '\000' <repeats 13 times>}, sin = {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 0,
              sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}}, id = 0, proto = 0 '\000', send_flags = {f = 0 '\000', blst_imask = 0 '\000'}}
        new_len = 32530
        r = 1
        s = 0x464804a6d78 <Address 0x464804a6d78 out of bounds>
        len = 0
        __FUNCTION__ = "do_forward_reply"

#13 0x000000000045e0f8 in forward_reply (msg=0x7f12804a6d70) at forward.c:860
No locals.
#14 0x00000000004a58e7 in receive_msg (buf=0x924600 "SIP/2.0 200 OK\r\nVia: SIP/2.0/UDP 185.20.8.4;branch=z9hG4bK512b.82b197888826f6b60c0c63b79801294d.0;received=185.20.8.4\r\nVia: SIP/2.0/UDP 10.143.1.2:2057;branch=z9hG4bK-129F259C;rport=2057\r\nCall-ID: cb0"...,
    len=1124, rcv_info=0x7fff0b015c60) at receive.c:273
         msg = 0x7f12804a6d70
        ctx = {rec_lev = 8868984, run_flags = 0, last_retcode = 0, jmp_env = {{__jmpbuf = {0, 0, 0, 263853236176, 1, 0, 169653785368, 9586112}, __mask_was_saved = 184638568, __saved_mask = {__val = {139717436454816, 12884901899, 139717436454816, 4279488,
                  140733378027408, 140733378026464, 5477954, 0, 139717072962944, 50195, 169290548608, 9586112, 140733378026592, 140733378026512, 5474789, 4279488}}}}}
        ret = 32530
        inb = {s = 0x924600 "SIP/2.0 200 OK\r\nVia: SIP/2.0/UDP A.B.C.D;branch=z9hG4bK512b.82b197888826f6b60c0c63b79801294d.0;received=A.B.C.D\r\nVia: SIP/2.0/UDP A.B.C.D:2057;branch=z9hG4bK-129F259C;rport=2057\r\nCall-ID: cb0"..., len = 1124}
        __FUNCTION__ = "receive_msg"

#15 0x000000000053c9a8 in udp_rcv_loop () at udp_server.c:536
        len = 1124
        buf = "SIP/2.0 200 OK\r\nVia: SIP/2.0/UDP A.B.C.D;branch=z9hG4bK512b.82b197888826f6b60c0c63b79801294d.0;received=A.B.C.D\r\nVia: SIP/2.0/UDP A.B.C.D:2057;branch=z9hG4bK-129F259C;rport=2057\r\nCall-ID: cb0"...
        tmp = 0x9245c0 "10.143.1.10"
        from = 0x7f12803e3f68
        fromlen = 16
        ri = {src_ip = {af = 2, len = 4, u = {addrl = {403182777, 139717436454816}, addr32 = {403182777, 0, 2150315936, 32530}, addr16 = {5305, 6152, 0, 0, 14240, 32811, 32530, 0}, addr = "\271\024\b\030\000\000\000\000\240\067+\200\022\177\000"}}, dst_ip = {af = 2,
            len = 4, u = {addrl = {67638457, 0}, addr32 = {67638457, 0, 0, 0}, addr16 = {5305, 1032, 0, 0, 0, 0, 0, 0}, addr = "\271\024\b\004", '\000' <repeats 11 times>}}, src_port = 5060, dst_port = 5060, proto_reserved1 = 0, proto_reserved2 = 0, src_su = {s = {
              sa_family = 2, sa_data = "\023Ĺ\024\b\030\000\000\000\000\000\000\000"}, sin = {sin_family = 2, sin_port = 50195, sin_addr = {s_addr = 403182777}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 50195,
              sin6_flowinfo = 403182777, sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}}, bind_address = 0x7f12802b3638, proto = 1 '\001'}
        __FUNCTION__ = "udp_rcv_loop"

#16 0x000000000046d42b in main_loop () at main.c:1617
        i = 1
        pid = 0
        si = 0x7f12802b3638
        si_desc = "udp receiver child=1 sock=A.B.C.D:5060\000D\200\022\177\000\000\000\206\063\200\022\177\000\000.\205^\000\000\000\000\000\020w^\000\000\000\000\000\275\005r/\000\000\000\000\300LA\000\000\000\000\000\220_\001\v\377\177", '\000' <repeats 18 times>"\320, ]\001\v\377\177\000\000\364\244K\000\000\000\000"
        nrprocs = 15
        __FUNCTION__ = "main_loop"

#17 0x0000000000470533 in main (argc=7, argv=0x7fff0b015f98) at main.c:2545
        cfg_stream = 0xe20010
        c = -1
        r = 0
        tmp = 0x7fff0b017f70 ""
        tmp_len = 0
        port = 0
        proto = 0
        options = 0x5e0a68 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:"
        ret = -1
        seed = 1972285608
        rfd = 4
debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 0
        n_lst = 0x3d6f60fb88
        p = 0x5cab80 "H\211l$\330L\211d$\340H\215-o\244*"
        __FUNCTION__ = "main"

In a next mail you will find a new bt full of Kamailio 's crash but about  km_val.c : db_mysql_val2str



2014-06-25 18:26 GMT+02:00 Daniel-Constantin Mierla <miconda@gmail.com>:


2014-06-25 18:26 GMT+02:00 Daniel-Constantin Mierla <miconda@gmail.com>:
Hello,

can you give the output of:

frame 6
print hdr
print *hdr

frame 4
print *pid_b

Also, it would be good to have full trace for other details:

bt full

Cheers,
Daniel


On 25/06/14 14:49, Igor Potjevlesch wrote:
Hello,

We updated this morning Kamailio in 4.1.4 with your patch.
[...]


-- 
Daniel-Constantin Mierla - http://www.asipto.com
http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda


-- 
Daniel-Constantin Mierla - http://www.asipto.com
http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda