Steps:
(1) 9 users and 1 mcu registered the server
(2) All 9 users connect to the mcu through the server
(3) For a while, some users showed registration fail, we found Kamailio does not respond
to any further SIP message.
I checked this using `netstat -apn` and it showed:
```
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address Foreign Address State
PID/Program name
tcp 0 0 10.100.92.42:5060 0.0.0.0:* LISTEN
9384/kamailio
tcp 0 0 10.100.92.42:5061 0.0.0.0:* LISTEN
9384/kamailio
tcp 48963 0 10.100.92.42:5060 10.100.92.42:42177 ESTABLISHED
9375/kamailio
tcp 29621 0 10.100.92.42:5060 10.100.92.40:34392 ESTABLISHED
9375/kamailio
tcp 1617 0 10.100.92.42:43997 10.100.92.42:5062 ESTABLISHED
9375/kamailio
tcp 2506 0 10.100.92.42:5060 10.100.92.68:37386 ESTABLISHED
9383/kamailio
udp 0 0 127.0.0.1:50180 127.0.0.1:7722 ESTABLISHED
9384/kamailio
udp 0 0 127.0.0.1:47125 127.0.0.1:7722 ESTABLISHED
9378/kamailio
udp 0 0 127.0.0.1:55318 127.0.0.1:7722 ESTABLISHED
9375/kamailio
udp 0 0 127.0.0.1:44595 127.0.0.1:7722 ESTABLISHED
9382/kamailio
udp 0 0 127.0.0.1:41045 127.0.0.1:7722 ESTABLISHED
9383/kamailio
udp 0 0 127.0.0.1:52901 127.0.0.1:7722 ESTABLISHED
9380/kamailio
udp 0 0 127.0.0.1:39078 127.0.0.1:7722 ESTABLISHED
9376/kamailio
udp 0 0 127.0.0.1:36548 127.0.0.1:7722 ESTABLISHED
9377/kamailio
udp 0 0 127.0.0.1:55005 127.0.0.1:7722 ESTABLISHED
9374/kamailio
udp 0 0 127.0.0.1:53049 127.0.0.1:7722 ESTABLISHED
9381/kamailio
udp 0 0 127.0.0.1:44358 127.0.0.1:7722 ESTABLISHED
9374/kamailio
udp 0 0 127.0.0.1:60253 127.0.0.1:7722 ESTABLISHED
9379/kamailio
udp 0 0 10.100.92.42:5060 0.0.0.0:*
9374/kamailio
udp 0 0 ::ffff:127.0.0.1:7722 :::*
9354/rtpengine
raw 0 0 0.0.0.0:255 0.0.0.0:* 255
9374/kamailio
```
And I attached the gdb to the those processed which Recv-Q value were high, it showed:
PID [9375]
```
#0 0x400a7450 in recvfrom () from /lib/libpthread.so.0
No symbol table info available.
#1 0x00265848 in udp_rcv_loop () at udp_server.c:446
len = 472
buf = "REGISTER sip:10.100.92.42:5060 SIP/2.0\r\nVia: SIP/2.0/UDP
10.100.91.13:5060;rport;branch=z9hG4bK649620372\r\nFrom:
<sip:DEMOROOM_2@10.100.92.42>;tag=658334621\r\nTo:
<sip:DEMOROOM_2@10.100.92.42>\r\nCall-ID:"...
tmp = 0x49fbfc "10.100.92.43"
from = 0x402d5b24
fromlen = 16
ri = {src_ip = {af = 2, len = 4, u = {addrl = {224093194, 3200776404, 1935216,
1734536862}, addr32 = {224093194, 3200776404, 1935216, 1734536862}, addr16 = {25610,
3419, 63700, 48839, 34672, 29, 61086, 26466}, addr =
"\nd[\r\324\370\307\276p\207\035\000\236\356bg"}}, dst_ip = {af = 2, len = 4, u
= {addrl = {710697994, 0,
0, 0}, addr32 = {710697994, 0, 0, 0}, addr16 = {25610, 10844, 0, 0, 0, 0,
0, 0}, addr = "\nd\\*", '\000' <repeats 11 times>}}, src_port =
5060,
dst_port = 5060, proto_reserved1 = 0, proto_reserved2 = 0, src_su = {s =
{sa_family = 2, sa_data = "\023\304\nd[\r\000\000\000\000\000\000\000"}, sin =
{
sin_family = 2, sin_port = 50195, sin_addr = {s_addr = 224093194}, sin_zero
= "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 50195,
sin6_flowinfo = 224093194, sin6_addr = {__in6_u = {__u6_addr8 =
'\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0},
__u6_addr32 = {0, 0, 0, 0}}},
sin6_scope_id = 0}}, bind_address = 0x40261e14, proto = 1 '\001'}
__FUNCTION__ = "udp_rcv_loop"
#2 0x000c90a0 in main_loop () at main.c:1629
i = 0
pid = 0
si = 0x40261e14
si_desc = "udp receiver child=0
sock=10.100.92.42:5060\000\244\372\307\276\304y2A\000\000,\000\000\000X\000 \000\062@
\313'@h\t;\000\230\325&@\000\000\000\000x\021\002\000\060K1@\000\000\000\000d\372\307\276\260\275'\000\230\331\070\000X\311\070\000\030\000\000\000\000PVA\000\000\000\000\000\000\a@Lu\223A"
r = 1074200576
nrprocs = 1
__FUNCTION__ = "main_loop"
#3 0x000d0830 in main (argc=5, argv=0xbec7fd04) at main.c:2581
cfg_stream = 0x4b5008
c = -1
r = 0
tmp = 0x40070ac4 "P\002\006@\a"
tmp_len = -1094190232
port = 1074045560
proto = -1094190164
options = 0x383880
":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:"
ret = -1
seed = 3985271057
rfd = 206
debug_save = 0
debug_flag = 0
dont_fork_cnt = 0
n_lst = 0x0
p = 0x5 <Address 0x5 out of bounds>
__FUNCTION__ = "main"
```
PID [9383]
```
#0 0x400a6a04 in __lll_lock_wait () from /lib/libpthread.so.0
#1 0x400a10b8 in pthread_mutex_lock () from /lib/libpthread.so.0
#2 0x002145b4 in tcpconn_do_send (fd=21, c=0x41969534,
buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP
10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom:
<sip:10.100.92.42:42177>;tag=1996784611\r\nTo:
<sip:10.100.92.42:5060>;tag=e926"..., len=340, send_flags=..., resp=0xbec7daf8,
locked=0) at tcp_main.c:2520
#3 0x002130f0 in tcpconn_send_put (c=0x41969534,
buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP
10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom:
<sip:10.100.92.42:42177>;tag=1996784611\r\nTo:
<sip:10.100.92.42:5060>;tag=e926"..., len=340, send_flags=...) at
tcp_main.c:2377
#4 0x002105c0 in tcp_send (dst=0xbec7e38c, from=0x0,
buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP
10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom:
<sip:10.100.92.42:42177>;tag=1996784611\r\nTo:
<sip:10.100.92.42:5060>;tag=e926"..., len=340) at tcp_main.c:2133
#5 0x40e33164 in msg_send (dst=0xbec7e38c,
buf=0x402cbcb0 "SIP/2.0 200 Keepalive\r\nVia: SIP/2.0/TCP
10.100.92.42:5062;rport=42177;branch=z9hG4bK416855936;received=10.100.92.42\r\nFrom:
<sip:10.100.92.42:42177>;tag=1996784611\r\nTo:
<sip:10.100.92.42:5060>;tag=e926"..., len=340) at ../../forward.h:225
#6 0x40e362e8 in sl_reply_helper (msg=0x40342814, code=200, reason=0x402e1d6c
"Keepalive", tag=0x0) at sl_funcs.c:224
#7 0x40e373e0 in sl_send_reply (msg=0x40342814, code=200, reason=0x402e1d6c
"Keepalive") at sl_funcs.c:327
#8 0x40e2b30c in w_sl_send_reply (msg=0x40342814, p1=0x40282108
"\244\324(@\001", p2=0x40282140 "d\320(@ ") at sl.c:236
#9 0x00029b90 in do_action (h=0xbec7f110, a=0x4028cf34, msg=0x40342814) at action.c:1100
#10 0x00036ce8 in run_actions (h=0xbec7f110, a=0x4028cf34, msg=0x40342814) at
action.c:1583
#11 0x00029a2c in do_action (h=0xbec7f110, a=0x4028d120, msg=0x40342814) at action.c:1079
#12 0x00036ce8 in run_actions (h=0xbec7f110, a=0x40288ed8, msg=0x40342814) at
action.c:1583
#13 0x000263ac in do_action (h=0xbec7f110, a=0x40279db8, msg=0x40342814) at action.c:712
#14 0x00036ce8 in run_actions (h=0xbec7f110, a=0x4027321c, msg=0x40342814) at
action.c:1583
#15 0x00037538 in run_top_route (a=0x4027321c, msg=0x40342814, c=0x0) at action.c:1669
#16 0x0013ce80 in receive_msg (
buf=0x41969714 "OPTIONS sip:10.100.92.42:5060 SIP/2.0\r\nVia: SIP/2.0/TCP
10.100.92.42:5062;rport;branch=z9hG4bK416855936\r\nFrom:
<sip:10.100.92.42:42177>;tag=1996784611\r\nTo:
<sip:10.100.92.42:5060>\r\nCall-ID: 2059319404"..., len=404,
rcv_info=0x4196955c) at receive.c:216
#17 0x00248fbc in receive_tcp_msg (
tcpbuf=0x41969714 "OPTIONS sip:10.100.92.42:5060 SIP/2.0\r\nVia: SIP/2.0/TCP
10.100.92.42:5062;rport;branch=z9hG4bK416855936\r\nFrom:
<sip:10.100.92.42:42177>;tag=1996784611\r\nTo:
<sip:10.100.92.42:5060>\r\nCall-ID: 2059319404"..., len=404,
rcv_info=0x4196955c, con=0x41969534) at tcp_read.c:1230
#18 0x0024c484 in tcp_read_req (con=0x41969534, bytes_read=0xbec7f714,
read_flags=0xbec7f70c) at tcp_read.c:1431
#19 0x0024fdd8 in handle_io (fm=0x403321ac, events=1, idx=-1) at tcp_read.c:1655
#20 0x0023f814 in io_wait_loop_epoll (h=0x47d90c, t=2, repeat=0) at io_wait.h:1073
#21 0x00250fc8 in tcp_receive_loop (unix_sock=22) at tcp_read.c:1764
#22 0x00234674 in tcp_init_children () at tcp_main.c:4879
#23 0x000c9dec in main_loop () at main.c:1714
#24 0x000d0830 in main (argc=5, argv=0xbec7fd04) at main.c:2581
```
In process 9375, I found it got stuck in recvfrom () (udp_server.c:446), and the code is:
`len=recvfrom(bind_address->socket, buf, BUF_SIZE, 0, &from->s,&fromlen);`
It seems operate using blocking mode instead of a non-blocking one,
why???
Or maybe I miss a packet-check function like select() or poll()??
Thanks in advance. :)
---
Reply to this email directly or view it on GitHub:
https://github.com/kamailio/kamailio/issues/510