Daniel-Constantin Mierla writes:
If you can reproduce it, watch what the timer processes do during that time frame. Get the list of processes with 'kamctl ps', then when the issue is exposed, grab the backtraces of all processes with:
kamctl trap
A file is created with the backtraces taken with gdb. Sent it over here to analyze, if you cannot spot an issue there.
Two traps were taken more than 10 sec apart when the number of transactions and shared memory usage started to grow. In both traps, process PID: 6049 DSC: timer was in the exactly same state (below).
Looks like it is stuck in an acc to db operation. How long is such an operation allowed to last before an error is generated and the timer process becomes free to do something else?
acc module db_insert_mode has value 2 and there is 2 async_workers (process numbers 6056 and 5057 below). db_mysql README does not mention if it supports async insert.
-- Juha
---start 6049 ----------------------------------------------------- [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". 0x00007f7dc30107e4 in __GI___poll (fds=0x7fff233831e8, nfds=1, timeout=2000) at ../sysdeps/unix/sysv/linux/poll.c:29 29 ../sysdeps/unix/sysv/linux/poll.c: No such file or directory. #0 0x00007f7dc30107e4 in __GI___poll (fds=0x7fff233831e8, nfds=1, timeout=2000) at ../sysdeps/unix/sysv/linux/poll.c:29 resultvar = 18446744073709551100 sc_ret = <optimized out> #1 0x00007f7dc2064efa in ?? () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #2 0x00007f7dc20652da in ?? () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #3 0x00007f7dc2071425 in mysql_get_parameters () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #4 0x00007f7dc2071626 in mysql_get_parameters () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #5 0x00007f7dc2067c1e in ?? () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #6 0x00007f7dc2068654 in ?? () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #7 0x00007f7dc206d4ed in ?? () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #8 0x00007f7dc2070513 in ?? () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #9 0x00007f7dc206f230 in mysql_ping () from /usr/lib/x86_64-linux-gnu/libmariadb.so.3 No symbol table info available. #10 0x00007f7dc20bbaba in db_mysql_submit_query (_h=0x7f7dc237f3a0, _s=0x7f7dc210e1e0 <sql_str>) at km_dbase.c:83 t = 1583739979 i = 0 code = 21923 __func__ = "db_mysql_submit_query" #11 0x00007f7dc20fb35e in db_do_submit_query (_h=0x7f7dc237f3a0, _query=0x7f7dc210e1e0 <sql_str>, submit_query=0x7f7dc20bb71d <db_mysql_submit_query>) at db_query.c:58 ret = -1039426148 tvb = {tv_sec = 94160937318932, tv_usec = 140178106086304} tve = {tv_sec = 200798118576, tv_usec = 1782411427864} tz = {tz_minuteswest = 24, tz_dsttime = 65147} tdiff = 32637 __func__ = "db_do_submit_query" #12 0x00007f7dc20fdd29 in db_do_insert_cmd (_h=0x7f7dc237f3a0, _k=0x7f7dc04f88a0 <db_keys>, _v=0x7f7dc04f8b80 <db_vals>, _n=24, val2str=0x7f7dc20b9d9c <db_mysql_val2str>, submit_query=0x7f7dc20bb71d <db_mysql_submit_query>, mode=0) at db_query.c:243 off = 804 ret = 415 __func__ = "db_do_insert_cmd" #13 0x00007f7dc20fe396 in db_do_insert (_h=0x7f7dc237f3a0, _k=0x7f7dc04f88a0 <db_keys>, _v=0x7f7dc04f8b80 <db_vals>, _n=24, val2str=0x7f7dc20b9d9c <db_mysql_val2str>, submit_query=0x7f7dc20bb71d <db_mysql_submit_query>) at db_query.c:258 No locals. #14 0x00007f7dc20c08a4 in db_mysql_insert (_h=0x7f7dc237f3a0, _k=0x7f7dc04f88a0 <db_keys>, _v=0x7f7dc04f8b80 <db_vals>, _n=24) at km_dbase.c:493 No locals. #15 0x00007f7dc07342a0 in db_cluster_insert (_h=0x7f7dc29bb650, _k=0x7f7dc04f88a0 <db_keys>, _v=0x7f7dc04f8b80 <db_vals>, _n=24) at dbcl_api.c:379 ret = -1 i = 5 k = 32637 sec = 36091306 rc = 0 rok = 0 j = 0 dbh = 0x7f7dc237f3a0 cls = 0x7f7dc239b980 __func__ = "db_cluster_insert" #16 0x00007f7dc04c18a0 in acc_db_request (rq=0x7f7d9e36d540) at acc.c:476 m = 24 n = -1640573632 i = 24 o = 17 t = 0x7f7d9e36e668 dtime = -4.6749039984044258e+154 __func__ = "acc_db_request" #17 0x00007f7dc04d4aca in acc_onreply (t=0x7f7d9e3b0650, req=0x7f7d9e357650, reply=0xffffffffffffffff, code=408) at acc_logic.c:604 new_uri_bk = {s = 0x7f7d9e36dc40 "sip:xxx@x.x.x.x:5060;transport=udp203.29INVITE sip:xxx@xxx.xxx SIP/2.0\r\nRecord-Route: sip:x.x.x.x;lr;did=851.8452\r\nVia: SIP/2.0/UDP x.x.x.x;branch"..., len = 50} br = 0 hdr = 0x7f7dc15f3c0f <msg_send_buffer+17006> cmsg = 0x7f7d9e36d540 cmsg_len = 6968 preq = 0x7f7d9e36d540 mstart = 0x7f7d9e36d540 mend = 0x7f7d9e36f078 __func__ = "acc_onreply" #18 0x00007f7dc04d5783 in tmcb_func (t=0x7f7d9e3b0650, type=512, ps=0x7fff23383a50) at acc_logic.c:684 __func__ = "tmcb_func" #19 0x00007f7dc15fff4d in run_trans_callbacks_internal (cb_lst=0x7f7d9e3b06c8, type=512, trans=0x7f7d9e3b0650, params=0x7fff23383a50) at t_hooks.c:254 cbp = 0x7f7d9e339720 backup_from = 0x55a38cc8dc90 <def_list+16> backup_to = 0x55a38cc8dc98 <def_list+24> backup_dom_from = 0x55a38cc8dca0 <def_list+32> backup_dom_to = 0x55a38cc8dca8 <def_list+40> backup_uri_from = 0x55a38cc8dc80 <def_list> backup_uri_to = 0x55a38cc8dc88 <def_list+8> backup_xavps = 0x55a38cce95c0 <_xavp_list_head> __func__ = "run_trans_callbacks_internal" #20 0x00007f7dc1600180 in run_trans_callbacks_with_buf (type=512, rbuf=0x7f7d9e3b0720, req=0x7f7d9e357650, repl=0xffffffffffffffff, flags=0) at t_hooks.c:297 params = {req = 0x7f7d9e357650, rpl = 0xffffffffffffffff, param = 0x7f7d9e339730, code = 408, flags = 0, branch = 0, t_rbuf = 0x7f7d9e3b0720, dst = 0x7f7d9e3b0770, send_buf = {s = 0x7f7d9e3cf530 "SIP/2.0 408 Request Timeout\r\nVia: SIP/2.0/UDP x.x.x.x;branch=z9hG4bKf819.22eaf74987b5f436d71e6e7ac8c4d607.0\r\nVia: SIP/2.0/UDP x.x.x.x:5060;rport=5060;branch=z9hG4bKac410218704\r\nFrom: "xxx"..., len = 489}} trans = 0x7f7d9e3b0650 #21 0x00007f7dc15b04f5 in relay_reply (t=0x7f7d9e3b0650, p_msg=0xffffffffffffffff, branch=0, msg_status=408, cancel_data=0x7fff23383c30, do_put_on_wait=0) at t_reply.c:2021 relay = 0 save_clone = 0 buf = 0x7f7dc271d010 "SIP/2.0 408 Request Timeout\r\nVia: SIP/2.0/UDP x.x.x.x;branch=z9hG4bKf819.22eaf74987b5f436d71e6e7ac8c4d607.0\r\nVia: SIP/2.0/UDP x.x.x.x:5060;rport=5060;branch=z9hG4bKac410218704\r\nFrom: "xxx"... res_len = 489 relayed_code = 408 relayed_msg = 0xffffffffffffffff reply_bak = 0x0 bm = {to_tag_val = {s = 0x7f7dc271d146 "f148edd50b878efc51a72f445b390303-e1110000\r\nCall-ID: 173781355593202084446@x.x.x.x\r\nCSeq: 1 INVITE\r\nServer: OpenSIPg SIP Proxy (5.3.2-6 (x86_64/linux))\r\nContent-Length: 0\r\n\r\n", len = 41}} totag_retr = 0 reply_status = RPS_COMPLETED uas_rb = 0x7f7d9e3b0720 to_tag = 0x7f7dc1677750 <tm_tag> reason = {s = 0x55a38cb96cc4 "Request Timeout", len = 15} onsend_params = {req = 0x0, rpl = 0x0, param = 0x0, code = 0, flags = 0, branch = 0, t_rbuf = 0x0, dst = 0x0, send_buf = {s = 0x7fff23383bc0 "\220<8#\377\177", len = -1}} ip = {af = 0, len = 0, u = {addrl = {xxx, xxx}, addr32 = {xxx, xxx, xxx, xxx}, addr16 = {xxx, xxx, xxx, xxx, xxx, xxx, xxx, 0}, addr = "xxx"}} __func__ = "relay_reply" #22 0x00007f7dc15ec23d in fake_reply (t=0x7f7d9e3b0650, branch=0, code=408) at timer.c:295 cancel_data = {cancel_bitmap = 0, reason = {cause = 0, u = {text = {s = 0x0, len = -1640298928}, e2e_cancel = 0x0, packed_hdrs = {s = 0x0, len = -1640298928}}}} do_cancel_branch = 1 reply_status = 32637 #23 0x00007f7dc15ec6dd in final_response_handler (r_buf=0x7f7d9e3b0878, t=0x7f7d9e3b0650) at timer.c:455 silent = 0 branch_ret = 12582912 prev_branch = 0 now = 0 #24 0x00007f7dc15ec78f in retr_buf_handler (ticks=577460907, tl=0x7f7d9e3b0898, p=0x3e8) at timer.c:511 rbuf = 0x7f7d9e3b0878 fr_remainder = 0 retr_remainder = 0 retr_interval = 0 new_retr_interval_ms = 0 crt_retr_interval_ms = 140177493637904 t = 0x7f7d9e3b0650 __func__ = "retr_buf_handler" #25 0x000055a38c85794f in timer_list_expire (t=577460907, h=0x7f7d9dbafb70, slow_l=0x7f7d9dbb0330, slow_mark=48237) at core/timer.c:857 tl = 0x7f7d9e3b0898 ret = 0 #26 0x000055a38c857df7 in timer_handler () at core/timer.c:922 saved_ticks = 577460907 run_slow_timer = 0 i = 109 __func__ = "timer_handler" #27 0x000055a38c8582c0 in timer_main () at core/timer.c:961 No locals. #28 0x000055a38c7bc306 in main_loop () at main.c:1743 i = 8 pid = 0 si = 0x0 si_desc = "udp receiver child=7 sock=xxx" nrprocs = 8 woneinit = 1 __func__ = "main_loop" #29 0x000055a38c7c4bb9 in main (argc=17, argv=0x7fff233843f8) at main.c:2802 cfg_stream = 0x55a38d35f260 c = -1 r = 0 tmp = 0x7fff23384f3f "" tmp_len = 17 port = 0 proto = 590889976 ahost = 0x0 aport = 0 options = 0x55a38cb3a618 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 1148589326 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x0 p = 0x0 st = {st_dev = 21, st_ino = 1930, st_nlink = 2, st_mode = 16832, st_uid = 109, st_gid = 115, __pad0 = 0, st_rdev = 0, st_size = 40, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1572122046, tv_nsec = 75984014}, st_mtim = {tv_sec = 1583490964, tv_nsec = 420466802}, st_ctim = {tv_sec = 1583492513, tv_nsec = 260094288}, __glibc_reserved = {0, 0, 0}} tbuf = "\000\000\000\000\000\000\000\000\b\000\000\000\000\000\000\000\060\207\025\303}\177\000\000\000\000\000\000\000\000\000\000\240\221\070#\377\177\000\000\364\212\023\303}\177\000\000\b\000\000\000\000\000\000\000\230\222\070#\377\177\000\000\060\207\025\303}\177\000\000\230A8#\377\177\000\000\224A8#\377\177", '\000' <repeats 18 times>, "\230\222\070#\377\177\000\000\250\221\070#\377\177\000\000'.\n\303}\177\000\000&\260be\000\000\000\000\000C8#\377\177\000\000\300\212\225\001\000\000\000\000`B8#\377\177\000\000PB8#\377\177\000\000\230A8#\377\177\000\000X\207\025\303}\177\000\000\001", '\000' <repeats 31 times>... option_index = 0 long_options = {{name = 0x55a38cb3cfca "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x55a38cb37438 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x55a38cb3cfcf "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x55a38cb3cfd5 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x55a38cb3cfdb "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x55a38cb3cfe4 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x55a38cb3cfee "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __func__ = "main" [Inferior 1 (process 6049) detached] ---end 6049 -------------------------------------------------------
---start 6056 ----------------------------------------------------- [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". 0x00007f7dc30f4834 in __libc_recvfrom (fd=12, buf=0x7fff23383cd0, len=8, flags=0, addr=..., addrlen=0x0) at ../sysdeps/unix/sysv/linux/recvfrom.c:27 27 ../sysdeps/unix/sysv/linux/recvfrom.c: No such file or directory. #0 0x00007f7dc30f4834 in __libc_recvfrom (fd=12, buf=0x7fff23383cd0, len=8, flags=0, addr=..., addrlen=0x0) at ../sysdeps/unix/sysv/linux/recvfrom.c:27 resultvar = 18446744073709551104 sc_ret = <optimized out> #1 0x000055a38c986b74 in async_task_run (idx=1) at core/async_task.c:269 ptask = 0x2200000021 received = 1060615238 __func__ = "async_task_run" #2 0x000055a38c9859a8 in async_task_child_init (rank=0) at core/async_task.c:185 pid = 0 i = 0 __func__ = "async_task_child_init" #3 0x000055a38c8fd83d in init_child (rank=0) at core/sr_module.c:822 ret = 32 type = 0x55a38cb7d6cb "PROC_MAIN" __func__ = "init_child" #4 0x000055a38c7bc5e8 in main_loop () at main.c:1753 i = 8 pid = 6049 si = 0x0 si_desc = "udp receiver child=7 sock=xxx" nrprocs = 8 woneinit = 1 __func__ = "main_loop" #5 0x000055a38c7c4bb9 in main (argc=17, argv=0x7fff233843f8) at main.c:2802 cfg_stream = 0x55a38d35f260 c = -1 r = 0 tmp = 0x7fff23384f3f "" tmp_len = 17 port = 0 proto = 590889976 ahost = 0x0 aport = 0 options = 0x55a38cb3a618 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 1148589326 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x0 p = 0x0 st = {st_dev = 21, st_ino = 1930, st_nlink = 2, st_mode = 16832, st_uid = 109, st_gid = 115, __pad0 = 0, st_rdev = 0, st_size = 40, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1572122046, tv_nsec = 75984014}, st_mtim = {tv_sec = 1583490964, tv_nsec = 420466802}, st_ctim = {tv_sec = 1583492513, tv_nsec = 260094288}, __glibc_reserved = {0, 0, 0}} tbuf = "\000\000\000\000\000\000\000\000\b\000\000\000\000\000\000\000\060\207\025\303}\177\000\000\000\000\000\000\000\000\000\000\240\221\070#\377\177\000\000\364\212\023\303}\177\000\000\b\000\000\000\000\000\000\000\230\222\070#\377\177\000\000\060\207\025\303}\177\000\000\230A8#\377\177\000\000\224A8#\377\177", '\000' <repeats 18 times>, "\230\222\070#\377\177\000\000\250\221\070#\377\177\000\000'.\n\303}\177\000\000&\260be\000\000\000\000\000C8#\377\177\000\000\300\212\225\001\000\000\000\000`B8#\377\177\000\000PB8#\377\177\000\000\230A8#\377\177\000\000X\207\025\303}\177\000\000\001", '\000' <repeats 31 times>... option_index = 0 long_options = {{name = 0x55a38cb3cfca "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x55a38cb37438 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x55a38cb3cfcf "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x55a38cb3cfd5 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x55a38cb3cfdb "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x55a38cb3cfe4 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x55a38cb3cfee "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __func__ = "main" [Inferior 1 (process 6056) detached] ---end 6056 -------------------------------------------------------
---start 6057 ----------------------------------------------------- [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". 0x00007f7dc30f4834 in __libc_recvfrom (fd=12, buf=0x7fff23383cd0, len=8, flags=0, addr=..., addrlen=0x0) at ../sysdeps/unix/sysv/linux/recvfrom.c:27 27 ../sysdeps/unix/sysv/linux/recvfrom.c: No such file or directory. #0 0x00007f7dc30f4834 in __libc_recvfrom (fd=12, buf=0x7fff23383cd0, len=8, flags=0, addr=..., addrlen=0x0) at ../sysdeps/unix/sysv/linux/recvfrom.c:27 resultvar = 18446744073709551104 sc_ret = <optimized out> #1 0x000055a38c986b74 in async_task_run (idx=2) at core/async_task.c:269 ptask = 0x2300000022 received = 1497148970 __func__ = "async_task_run" #2 0x000055a38c9859a8 in async_task_child_init (rank=0) at core/async_task.c:185 pid = 0 i = 1 __func__ = "async_task_child_init" #3 0x000055a38c8fd83d in init_child (rank=0) at core/sr_module.c:822 ret = 32 type = 0x55a38cb7d6cb "PROC_MAIN" __func__ = "init_child" #4 0x000055a38c7bc5e8 in main_loop () at main.c:1753 i = 8 pid = 6049 si = 0x0 si_desc = "udp receiver child=7 sock=xxx" nrprocs = 8 woneinit = 1 __func__ = "main_loop" #5 0x000055a38c7c4bb9 in main (argc=17, argv=0x7fff233843f8) at main.c:2802 cfg_stream = 0x55a38d35f260 c = -1 r = 0 tmp = 0x7fff23384f3f "" tmp_len = 17 port = 0 proto = 590889976 ahost = 0x0 aport = 0 options = 0x55a38cb3a618 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:x:X:Y:" ret = -1 seed = 1148589326 rfd = 4 debug_save = 0 debug_flag = 0 dont_fork_cnt = 0 n_lst = 0x0 p = 0x0 st = {st_dev = 21, st_ino = 1930, st_nlink = 2, st_mode = 16832, st_uid = 109, st_gid = 115, __pad0 = 0, st_rdev = 0, st_size = 40, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1572122046, tv_nsec = 75984014}, st_mtim = {tv_sec = 1583490964, tv_nsec = 420466802}, st_ctim = {tv_sec = 1583492513, tv_nsec = 260094288}, __glibc_reserved = {0, 0, 0}} tbuf = "\000\000\000\000\000\000\000\000\b\000\000\000\000\000\000\000\060\207\025\303}\177\000\000\000\000\000\000\000\000\000\000\240\221\070#\377\177\000\000\364\212\023\303}\177\000\000\b\000\000\000\000\000\000\000\230\222\070#\377\177\000\000\060\207\025\303}\177\000\000\230A8#\377\177\000\000\224A8#\377\177", '\000' <repeats 18 times>, "\230\222\070#\377\177\000\000\250\221\070#\377\177\000\000'.\n\303}\177\000\000&\260be\000\000\000\000\000C8#\377\177\000\000\300\212\225\001\000\000\000\000`B8#\377\177\000\000PB8#\377\177\000\000\230A8#\377\177\000\000X\207\025\303}\177\000\000\001", '\000' <repeats 31 times>... option_index = 0 long_options = {{name = 0x55a38cb3cfca "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x55a38cb37438 "version", has_arg = 0, flag = 0x0, val = 118}, {name = 0x55a38cb3cfcf "alias", has_arg = 1, flag = 0x0, val = 1024}, {name = 0x55a38cb3cfd5 "subst", has_arg = 1, flag = 0x0, val = 1025}, {name = 0x55a38cb3cfdb "substdef", has_arg = 1, flag = 0x0, val = 1026}, {name = 0x55a38cb3cfe4 "substdefs", has_arg = 1, flag = 0x0, val = 1027}, {name = 0x55a38cb3cfee "server-id", has_arg = 1, flag = 0x0, val = 1028}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} __func__ = "main" [Inferior 1 (process 6057) detached] ---end 6057 -------------------------------------------------------