Version: 5.3.3 `065668`
Platform: CentOS 7 (3.10.0-862.14.4.el7.x86_64)
I seem to have a persistent intermittent crash on all five gateways which use `http_async_client` to query an HTTP API.
In all cases, the crash is on attempt to access or reference an AVP called `$avp(is_subscribe)`:
```
Program terminated with signal 11, Segmentation fault.
#0 0x00000000005d479b in match_by_name (avp=0xabcdefed, id=45, name=0x7fff9379c668)
at core/usr_avp.c:378
378 if (id==avp->id && avp->flags&AVP_NAME_STR &&
hMissing separate debuginfos, use: debuginfo-install libuuid-2.23.2-52.el7_5.1.x86_64 nss-pem-1.0.3-4.el7.x86_64
(gdb) where
#0 0x00000000005d479b in match_by_name (avp=0xabcdefed, id=45, name=0x7fff9379c668)
at core/usr_avp.c:378
#1 0x00000000005d54bf in search_next_avp (s=0x7fff9379c660, val=0x7fff9379c630)
at core/usr_avp.c:499
#2 0x00000000005d4f04 in search_avp (ident=..., val=0x7fff9379c630, state=0x7fff9379c660)
at core/usr_avp.c:465
#3 0x00000000005d4913 in search_first_avp (flags=1, name=..., val=0x7fff9379c630,
s=0x7fff9379c660) at core/usr_avp.c:414
#4 0x00007f3f6ba529fd in ops_is_avp_set (msg=0xb04f60 <_faked_msg>, ap=0x7f3f73296ed8)
at avpops_impl.c:1821
#5 0x00007f3f6ba60d47 in w_is_avp_set (msg=0xb04f60 <_faked_msg>, param=0x7f3f73296ed8 "",
op=0x0) at avpops.c:1059
#6 0x0000000000480f3d in do_action (h=0x7fff9379ce70, a=0x7f3f732a8b30,
msg=0xb04f60 <_faked_msg>) at core/action.c:1077
#7 0x000000000048da7c in run_actions (h=0x7fff9379ce70, a=0x7f3f732a8b30,
msg=0xb04f60 <_faked_msg>) at core/action.c:1576
#8 0x000000000048e13d in run_actions_safe (h=0x7fff9379dae0, a=0x7f3f732a8b30,
msg=0xb04f60 <_faked_msg>) at core/action.c:1640
#9 0x0000000000438ff3 in rval_get_int (h=0x7fff9379dae0, msg=0xb04f60 <_faked_msg>,
i=0x7fff9379d318, rv=0x7f3f732a8d70, cache=0x0) at core/rvalue.c:915
#10 0x000000000043d5a3 in rval_expr_eval_int (h=0x7fff9379dae0, msg=0xb04f60 <_faked_msg>,
res=0x7fff9379d318, rve=0x7f3f732a8d68) at core/rvalue.c:1913
#11 0x000000000043d9f6 in rval_expr_eval_int (h=0x7fff9379dae0, msg=0xb04f60 <_faked_msg>,
res=0x7fff9379d7cc, rve=0x7f3f732a9598) at core/rvalue.c:1921
#12 0x000000000048097b in do_action (h=0x7fff9379dae0, a=0x7f3f732aa9c0,
msg=0xb04f60 <_faked_msg>) at core/action.c:1047
#13 0x000000000048da7c in run_actions (h=0x7fff9379dae0, a=0x7f3f732aa9c0,
msg=0xb04f60 <_faked_msg>) at core/action.c:1576
#14 0x000000000048e205 in run_top_route (a=0x7f3f732aa9c0, msg=0xb04f60 <_faked_msg>, c=0x0)
at core/action.c:1661
#15 0x00007f3f705f7d0e in async_http_cb (reply=0x7f3f2b7ee620, param=0x7f3f2b7eca18)
at async_http.c:256
#16 0x00007f3f705f0dab in check_multi_info (g=0x7f3f2abeb9e8) at http_multi.c:597
#17 0x00007f3f705e84ec in event_cb (fd=6, kind=2, userp=0x1d7cdf0) at http_multi.c:145
#18 0x00007f3f70131a14 in event_process_active_single_queue (activeq=0x1c6f820, base=0x1d3ad90)
at event.c:1350
#19 event_process_active (base=<optimized out>) at event.c:1420
#20 event_base_loop (base=0x1d3ad90, flags=0) at event.c:1621
#21 0x00007f3f705f490f in async_http_run_worker (worker=0x7f3f2abd1bb0) at async_http.c:92
#22 0x00007f3f705dcb17 in child_init (rank=0) at http_async_client_mod.c:366
#23 0x000000000057a810 in init_mod_child (m=0x7f3f73245168, rank=0) at core/sr_module.c:780
#24 0x000000000057a4ac in init_mod_child (m=0x7f3f73245d58, rank=0) at core/sr_module.c:776
#25 0x000000000057a4ac in init_mod_child (m=0x7f3f73246340, rank=0) at core/sr_module.c:776
#26 0x000000000057a4ac in init_mod_child (m=0x7f3f732467f8, rank=0) at core/sr_module.c:776
#27 0x000000000057a4ac in init_mod_child (m=0x7f3f73246e38, rank=0) at core/sr_module.c:776
#28 0x000000000057a4ac in init_mod_child (m=0x7f3f73247210, rank=0) at core/sr_module.c:776
#29 0x000000000057a4ac in init_mod_child (m=0x7f3f73247978, rank=0) at core/sr_module.c:776
#30 0x000000000057a4ac in init_mod_child (m=0x7f3f73247da0, rank=0) at core/sr_module.c:776
#31 0x000000000057af5a in init_child (rank=0) at core/sr_module.c:825
#32 0x0000000000426bc1 in main_loop () at main.c:1753
#33 0x000000000042e63a in main (argc=13, argv=0x7fff9379edb8) at main.c:2802
```
Most recently, it is here:
```
route {
$avp(is_subscribe) = 0;
...
$http_req(method) = 'GET';
$http_req(suspend) = 1;
$http_req(hdr) = "x-api-key: " + $sel(cfg_get.api.whitelist_key);
t_newtran();
http_async_query("https://some_host/check_ip_in_whi
telist?ip=$(si{s.escape.common})", "WHITELIST_CHECK");
}
...
route[WHITELIST_CHECK] {
if(!is_avp_set("$avp(is_subscribe)")) {
xlog("L_INFO", "[R-HTTP-REPLY:$ci] !> AVP is_subscribe is undefined -- cannot process further\n");
sl_send_reply("500", "Internal Server Error");
exit;
}
```
However, adding `is_avp_set()` was my attempt to put a safe-guard around unsafe access of this AVP. It didn't work:
```
(gdb) print *name
$1 = {s = 0x7f3f73261835 "is_subscribe)", len = 12}
(gdb) print avp
$2 = (avp_t *) 0xabcdefed
(gdb) print avp->id
Cannot access memory at address 0xabcdefed
```
Note that I am making the assumption above that AVPs created in the main request route will be available in the `http_async_client` callback, because it is just a suspended TM transaction underneath -- right? So, transaction-persistent variables such as AVP should be visible throughout the lifetime of the transaction just as for a conventional async TM transaction, I would think.
Digging further into the mesage being handled in `async_http_cb()`, it is always a normal HTTP `200 OK` reply:
```
(gdb) set print elements 0
(gdb) frame 15
#15 0x00007f3f705f7d0e in async_http_cb (reply=0x7f3f2b7ee620, param=0x7f3f2b7eca18)
at async_http.c:256
256 if (run_top_route(act, fmsg, 0)<0) {
(gdb) print *reply
$5 = {retcode = 200, result = 0x7f3f2b7ec560, error = '\000' <repeats 255 times>}
(gdb) print *(reply->result)
$6 = {
s = 0x7f3f2b7ee798 "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 16\r\nConnection: keep-alive\r\nDate: Wed, 15 Apr 2020 15:06:52 GMT\r\nx-amzn-RequestId: d3a44941-e0c4-4cb9-9144-ea54d929c300\r\nx-amz-apigw-id: LCJqBF7CIAMFXCw=\r\nX-Amzn-Trace-Id: Root=1-5e97230c-4fe1af7d072275d95c231a4c;Sampled=0\r\nX-Cache: Miss from cloudfront\r\nHia: 1.1 ade18dc841d2e1cc8ef49611c5d4c93e.cloudfront.net (CloudFront)\r\nX-Amz-Cf-Pop: IAD89-C2\r\nX-Amz-Cf-Id: 3s1S2kvOgL7L4ZKMlWwQnfIxHmWFGz0PHbcNFefYA1tZ1j0AGy3JHg==\r\n\r\n{\"status\":false}\300\300\300\300", len = 504}
```
Any insights would be appreciated! These crashes are happening basically hourly from nowhere, after over a year of undisturbed operation.
--
You are receiving this because you are subscribed to this thread.
Reply to this email directly or view it on GitHub:
https://github.com/kamailio/kamailio/issues/2286
@eschmidbauer I created a dedicated issue on tracker for troubleshooting related issues to in-memory presentity records, let's continue there:
* https://github.com/kamailio/kamailio/issues/2294
Attach there the output for `bt full` taken from core file with gdb. Also, add the parameters you set for presence* modules.
--
You are receiving this because you are subscribed to this thread.
Reply to this email directly or view it on GitHub:
https://github.com/kamailio/kamailio/commit/f917318c1e5fefa4106eaaf46123506…
here is a trace from the coredump:
```
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00007f12607da547 in atomic_cmpxchg_int (var=0x8, old=0, new_v=1) at ../../core/mem/../atomic/atomic_x86.h:224
224 ATOMIC_FUNC_CMPXCHG(cmpxchg, "cmpxchgl %2, %1", int , int)
(gdb) up
#1 0x00007f12607da57b in futex_get (lock=0x8) at ../../core/mem/../futexlock.h:99
99 v=atomic_cmpxchg(lock, 0, 1); /* lock if 0 */
(gdb)
#2 0x00007f12607fe0d0 in update_db_subs_timer_dbnone (no_lock=0) at subscribe.c:2079
2079 lock_get(&subs_htable[i].lock);
(gdb) up
#3 0x00007f1260802121 in timer_db_update (ticks=61825161, param=0x0) at subscribe.c:2441
2441 update_db_subs_timer_dbnone(no_lock);
(gdb) up
#4 0x00005568ade05b2e in sr_wtimer_exec (ticks=61825161, param=0x0) at core/timer_proc.c:433
433 wt->f(ticks, wt->param);
(gdb) up
#5 0x00005568ade04b58 in fork_sync_timer (child_id=-1, desc=0x5568ae01a3a1 "secondary timer", make_sock=1, f=0x5568ade057a8 <sr_wtimer_exec>, param=0x0, interval=1000) at core/timer_proc.c:267
267 f(TICKS_TO_S(ts1), param); /* ticks in sec for compatibility with old
(gdb) up
#6 0x00005568ade05ece in sr_wtimer_start () at core/timer_proc.c:459
459 if(fork_sync_timer(-1 /*PROC_TIMER*/, "secondary timer", 1,
(gdb) up
#7 0x00005568adc05e24 in main_loop () at main.c:1452
1452 if(sr_wtimer_start()<0) {
(gdb) up
#8 0x00005568adc123e7 in main (argc=9, argv=0x7ffd9501a8a8) at main.c:2845
2845 ret=main_loop();
(gdb) up
```
--
You are receiving this because you are subscribed to this thread.
Reply to this email directly or view it on GitHub:
https://github.com/kamailio/kamailio/commit/f917318c1e5fefa4106eaaf46123506…