I observing a case when Kamailio stop processing calls.
What is found:
Lot of connections with CLOSE_WAIT state
[root@sbc-13 ~]# netstat -anp | grep 5060
tcp 129 0 100.85.118.66:5060 0.0.0.0:* LISTEN 56907/kamailio
tcp 153 0 100.85.118.66:5060 3.101.84.132:21929 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:32738 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:39084 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:28843 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:57939 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:27823 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:15820 CLOSE_WAIT -
tcp 0 0 100.85.118.66:5060 207.244.110.239:11587 ESTABLISHED 56907/kamailio
tcp 153 0 100.85.118.66:5060 3.101.84.132:11972 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:57245 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:54344 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:41416 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:3644 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:61657 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:49325 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:65334 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:48657 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:1612 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:7879 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:56626 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:57247 CLOSE_WAIT -
tcp 153 0 100.85.118.66:5060 3.101.84.132:30494 CLOSE_WAIT -
At this time on two core server, one core loaded 100% and another core is free.
And top show
%Cpu(s): 29.4 us, 26.5 sy
As about 26% CPU cycles proceed kernel call, using strace
utility I found Kamailio process (with top CSP usage) continusly make thus kernel calls.
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
recvfrom(58, 0x7ffdb46a5ab0, 232, MSG_PEEK, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
Then I check 58 process file descriptor
[root@sbc-13 ~]# ls -la /proc/56907/fd/58
lrwx------. 1 kamailio kamailio 64 Mar 3 10:49 /proc/56907/fd/58 -> 'socket:[3931924]'
Info about open socket
[root@sbc-13 ~]# lsof -a -p 56907| grep 3931924
kamailio 56907 kamailio 58u IPv4 3931924 0t0 TCP ip-100-85-118-66.us-west-1.compute.internal:sip->207.244.110.239:11495 (ESTABLISHED)
Info about Kamailio 56907 process
[root@sbc-13 ~]# kamcmd core.ps | tail
56903
tcp receiver (generic) child=4
56904
tcp receiver (generic) child=5
56905
tcp receiver (generic) child=6
56906
tcp receiver (generic) child=7
56907
tcp main process
Then using gdb I have created core file and back trace here
(gdb) bt full
#0 0x00007f07d3639b1e in recv () from /lib64/libpthread.so.0
No symbol table info available.
#1 0x0000000000528817 in tcpconn_read_haproxy (c=0x7f07ce98d930) at core/tcp_main.c:953
bytes = -1
retval = 0
size = 3454406656
port = 32519
p = 0x7ef068 ""
end = 0x7f07d335c4c0 <syslog> "\363\017\036\372H\201\354", <incomplete sequence \330>
src_ip = 0x6e9646 <qm_shm_malloc+103>
dst_ip = 0x7ffdb46a5c20
v2sig = "\r\n\r\n\000\r\nQUIT\n"
hdr = {v1 = {line = "\300\304\065\323\004", '\000' <repeats 11 times>, "\370ؘ\316\004\000\000\000t\251\205\000\000\000\000\000\320[j\264\375\177\000\000\004\331m\000\000\000\000\000\320[j\264\375\177\000\000\306\331m\000\000\000\000\000\b\342u\316\a\177\000\000\000\020\346\315\a\177\000\000\367b\201\000\000\000\000\000y\251\205\000n\004\000\000\260\235\201\000\000\000\000\000\247\227\201"}, v2 = {sig = "\300\304\065\323\004\000\000\000\000\000\000", ver_cmd = 0 '\000', fam = 0 '\000', len = 0, addr = {ip4 = {src_addr = 3466123512, dst_addr = 4, src_port = 43380, dst_port = 133}, ip6 = {src_addr = "\370ؘ\316\004\000\000\000t\251\205\000\000\000\000", dst_addr = "\320[j\264\375\177\000\000\004\331m\000\000\000\000", src_port = 23504, dst_port = 46186}, unx = {src_addr = "\370ؘ\316\004\000\000\000t\251\205\000\000\000\000\000\320[j\264\375\177\000\000\004\331m\000\000\000\000\000\320[j\264\375\177\000\000\306\331m\000\000\000\000\000\b\342u\316\a\177\000\000\000\020\346\315\a\177\000\000\367b\201\000\000\000\000\000y\251\205\000n\004\000\000\260\235\201\000\000\000\000\000\247\227\201\000\000\000\000\000\240C\000\000\000\000\000\000\000\020\346", <incomplete sequence \315>, dst_addr = "\a\177\000\000PB\226\316\a\177\000\000\000\020\346\315\a\177\000\000\274\332\366\312\a\177\000\000Ȧ\351͗\002\000\000\240\370\366\312\a\177\000\000S\342\366\312\a\177\000\000@\342u\316\a\177\000\000\000\000\000\000\000\000P\225\001\000\000\000\000\000\000\000\003\000\000\000\001\b\000\000\300[j\264\375\177\000\000R\031Q\000\000\000\000\000hV\313\322\a\177\000"}}}}
__func__ = "tcpconn_read_haproxy"
#2 0x0000000000529cc8 in tcpconn_new (sock=58, su=0x7ffdb46a5e00, local_addr=0x7f07d2cb5668, ba=0x7f07d2cb5610, type=2, state=3) at core/tcp_main.c:1164
c = 0x7f07ce98d930
rd_b_size = 16384
ret = 32519
__func__ = "tcpconn_new"
#3 0x000000000055194e in handle_new_connect (si=0x7f07d2cb5610) at core/tcp_main.c:4259
su = {s = {sa_family = 2, sa_data = ",\347\317\364n\357\000\000\000\000\000\000\000"}, sin = {sin_family = 2, sin_port = 59180, sin_addr = {s_addr = 4017026255}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 2, sin6_port = 59180, sin6_flowinfo = 4017026255, sin6_addr = {__in6_u = {__u6_addr8 = "\000\000\000\000\000\000\000\000h\360~\000\000\000\000", __u6_addr16 = {0, 0, 0, 0, 61544, 126, 0, 0}, __u6_addr32 = {0, 0, 8319080, 0}}}, sin6_scope_id = 67108864}, sas = {ss_family = 2, __ss_padding = ",\347\317\364n\357\000\000\000\000\000\000\000\000h\360~\000\000\000\000\000\000\000\000\004\000\000\000\000\360^j\264\375\177\000\000\023FU\000\000\000\000\000\230_\226\316\a\177\000\000\300\225\264\000\000\000\000\000\377\377\377\377\001\000\000\000\060٘\316\a\177\000\000\060٘\316\a\177\000\000\377\377\377\377\377\377\377\377\220^j\264\375\177\000\000\000\000\000\000\000\000\000\000\300\304\065\323\a\177\000", __ss_align = 8235966803625406752}}
sock_name = {s = {sa_family = 23984, sa_data = "j\264\375\177\000\000\376\023Q\000\000\000\000"}, sin = {sin_family = 23984, sin_port = 46186, sin_addr = {s_addr = 32765}, sin_zero = "\376\023Q\000\000\000\000"}, sin6 = {sin6_family = 23984, sin6_port = 46186, sin6_flowinfo = 32765, sin6_addr = {__in6_u = {__u6_addr8 = "\376\023Q\000\000\000\000\000\310ژ\316\a\177\000", __u6_addr16 = {5118, 81, 0, 0, 56008, 52888, 32519, 0}, __u6_addr32 = {5313534, 0, 3466123976, 32519}}}, sin6_scope_id = 3454980136}, sas = {ss_family = 23984, __ss_padding = "j\264\375\177\000\000\376\023Q\000\000\000\000\000\310ژ\316\a\177\000\000(\320\356\315\a\177\000\000\030\"\202\000_\376\377\377\000\000\000\000\000\000\000\000\300\304\065\323\a\177\000\000\363:\204\000\000\000\000\000h\360~\000\000\000\000\000\000\000\000\004\000\000\000\000\360^j\264\375\177\000\000\245\222T\000\000\000\000\000\250\376\314\322\002\000\000\000\000\000\000\000\002\000\000\000Dc\201\000\241\001\000", __ss_align = 0}}
sock_name_len = 32519
dst_su = 0x7f07d2cb5668
tcpconn = 0xffffffffce98d930
su_len = 16
new_sock = 58
__func__ = "handle_new_connect"
#4 0x0000000000554aac in handle_io (fm=0x7f07d2ccfa40, ev=1, idx=-1) at core/tcp_main.c:4517
ret = 16
__func__ = "handle_io"
#5 0x000000000051c992 in io_wait_loop_epoll (h=0xaf4760 <io_h>, t=5, repeat=0) at core/io_wait.h:1070
n = 1
r = 0
fm = 0x7f07d2ccfa40
revents = 1
__func__ = "io_wait_loop_epoll"
#6 0x000000000055800f in tcp_main_loop () at core/tcp_main.c:4808
si = 0x0
r = 8
__func__ = "tcp_main_loop"
#7 0x000000000042ca0b in main_loop () at main.c:1788
i = 8
pid = 0
si = 0x0
si_desc = "udp receiver child=7 sock=[2600:1f1c:6d5:aa03::7]:5060\000\061\063\062:5060)\000\000\000\000\060\000\000\000\370bj\264\375\177\000\000\020bj\264\375\177\000\000\000\334ѯ\344lFߨE\201\000\000\000\000\000\250\203\306\322k\000\000\000\363:\204\000\000\000\000\000\270o\305\322\a\177\000"
nrprocs = 8
woneinit = 1
__func__ = "main_loop"
#8 0x0000000000435d95 in main (argc=11, argv=0x7ffdb46a6828) at main.c:2895
cfg_stream = 0x156b2a0
c = -1
r = 0
tmp = 0x7ffdb46a6ee3 ""
tmp_len = 0
port = 0
proto = 0
ahost = 0x0
aport = 0
options = 0x7f1e48 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:"
ret = -1
seed = 2702569434
rfd = 4
debug_save = 0
debug_flag = 0
dont_fork_cnt = 2
n_lst = 0x7ffdb46d4268
p = 0x7f07d3309db5 <handle_intel.constprop+197> "H\205\300uf\213t$\004M\215\216\244"
st = {st_dev = 24, st_ino = 19244, st_nlink = 2, st_mode = 16832, st_uid = 985, st_gid = 979, __pad0 = 0, st_rdev = 0, st_size = 40, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1613817804, tv_nsec = 114998890}, st_mtim = {tv_sec = 1614768566, tv_nsec = 7667804}, st_ctim = {tv_sec = 1614768566, tv_nsec = 7667804}, __glibc_reserved = {0, 0, 0}}
tbuf = '\000' <repeats 16 times>, "\214\344'\323\a\177\000\000\330Am\264\375\177\000\000\000\000\000\000 ", '\000' <repeats 19 times>, "X_\376\323\a\177\000\000\000ej\264\375\177\000\000\a\000\000\000\000\000\000\000\020U\376\323\a\177\000\000\a\000\000\000\t\000\000\000\210\351 \324\a\177\000\000\034\021\377\323\a\177\000\000\000\000\000\000\000\000\000\000H\037\377\323\a\177\000\000Pdj\264\375\177\000\000\260\344'\323\a\177\000\000\300\363\037\324\a\177\000\000\000\000\000\000\000\000\000\000\320dj\264\375\177", '\000' <repeats 26 times>, "\200W\376\323\a\177\000\000"...
option_index = 12
long_options = {{name = 0x7f4056 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x7ef4e1 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x7f405b "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x7f4061 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x7f4067 "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x7f4070 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x7f407a "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x7f4084 "loadmodule", has_arg = 1, flag = 0x0, val = 1029}, {name = 0x7f408f "modparam", has_arg = 1, flag = 0x0, val = 1030}, {name = 0x7f4098 "log-engine", has_arg = 1, flag = 0x0, val = 1031}, {name = 0x7f40a3 "debug", has_arg = 1, flag = 0x0, val = 1032}, {name = 0x7f40a9 "cfg-print", has_arg = 0, flag = 0x0, val = 1033}, {name = 0x7f40b3 "atexit", has_arg = 1, flag = 0x0, val = 1034}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}}
__func__ = "main"
We use enabled haproxy protocol.
Loos as Kamailio does not properly close the socket when the socket closed on the client-side.
kamailio -v
[root@sbc-13 ~]# kamailio -v
version: kamailio 5.5.0-dev4 (x86_64/linux) 06656a
flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLOCKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED
ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
id: 06656a
compiled on 04:01:58 Mar 1 2021 with gcc 8.3.1
[root@sbc-13 ~]# cat /etc/os-release
NAME="CentOS Linux"
VERSION="8"
ID="centos"
ID_LIKE="rhel fedora"
VERSION_ID="8"
PLATFORM_ID="platform:el8"
PRETTY_NAME="CentOS Linux 8"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:centos:centos:8"
HOME_URL="https://centos.org/"
BUG_REPORT_URL="https://bugs.centos.org/"
CENTOS_MANTISBT_PROJECT="CentOS-8"
CENTOS_MANTISBT_PROJECT_VERSION="8"
—
You are receiving this because you are subscribed to this thread.
Reply to this email directly, view it on GitHub, or unsubscribe.