[Users] Memory Errors

Martin Klisch martin at campus-merseburg.de
Tue May 15 13:45:33 CEST 2007


Hi,

i use acc and uac. here are the backtrace + frame infos for the core dump,
i  dont know much about debugging, but i hope these things are useful:

i bet it's too much info. :)

--------------------snip-backtrace

(gdb) backtrace
#0  0xff2c0888 in memcpy () from
/platform/SUNW,Sun-Fire-V210/lib/libc_psr.so.1
#1  0xfeb86948 in restore_from_reply (t=0xfc9fab20, type=2, p=0x0) at
from.c:574
#2  0xff02bb64 in run_trans_callbacks (type=2, trans=0xfc9eb7e8,
req=0x12c9cc, rpl=0xdfcd8, code=-56996184) at t_hooks.c:205
#3  0xff02ee2c in t_reply_matching (p_msg=0x140ac8, p_branch=0x3) at
t_lookup.c:842
#4  0xff02f550 in t_check (p_msg=0x140ac8, param_branch=0xffbffa7c) at
t_lookup.c:913
#5  0xff0388bc in reply_received (p_msg=0x12d000) at t_reply.c:1269
#6  0x00024dd4 in forward_reply (msg=0x140ac8) at forward.c:488
#7  0x00043b78 in receive_msg (buf=0x12d800 "", len=41462, rcv_info=0x1)
at receive.c:195
#8  0x0006f134 in udp_rcv_loop () at udp_server.c:465
#9  0x0003385c in main_loop () at main.c:834
#10 0x00035be4 in main (argc=3, argv=0xe7800) at main.c:1399



--------------snip frame

(gdb) frame 0
#0  0xff2c0888 in memcpy () from
/platform/SUNW,Sun-Fire-V210/lib/libc_psr.so.1
(gdb) list
579                             rpl->from->len,rpl->from->name.s);
580             l = del_lump( rpl, rpl->from->name.s-rpl->buf,
rpl->from->len, 0);
581             if (l==0) {
582                     LOG(L_ERR,"ERROR:uac:restore_from_reply: del lump
failed\n");
583                     return;
584             }
585
586             DBG("DBG:uac::restore_from_reply: inserting <%.*s>\n",
587                             new_val.len,new_val.s);
588             if (insert_new_lump_after( l, new_val.s, new_val.len,
0)==0) {
(gdb)
(gdb) frame 1
#1  0xfeb86948 in restore_from_reply (t=0xfc9fab20, type=2, p=0x0) at
from.c:574
574             memcpy( new_val.s, req->from->name.s, req->from->len);
(gdb) list
569             new_val.s = pkg_malloc( req->from->len );
570             if (p==0) {
571                     LOG(L_ERR,"ERROR:uac:restore_from_reply: no more
pkg mem\n");
572                     return;
573             }
574             memcpy( new_val.s, req->from->name.s, req->from->len);
575             new_val.len = req->from->len;
576
577
578             DBG("DBG:uac::restore_from_reply: removing <%.*s>\n",
(gdb)
(gdb) frame 2
#2  0xff02bb64 in run_trans_callbacks (type=2, trans=0xfc9eb7e8,
req=0x12c9cc, rpl=0xdfcd8, code=-56996184) at t_hooks.c:205
205                             cbp->callback( trans, type, &params );
(gdb) list
200             for (cbp=trans->tmcb_hl.first; cbp; cbp=cbp->next)  {
201                     if ( (cbp->types)&type ) {
202                             DBG("DBG: trans=%p, callback type %d, id
%d entered\n",
203                                     trans, type, cbp->id );
204                             params.param = &(cbp->param);
205                             cbp->callback( trans, type, &params );
206                     }
207             }
208             set_avp_list( backup );
209             params.extra1 = params.extra2 = 0;
(gdb)
#3  0xff02ee2c in t_reply_matching (p_msg=0x140ac8, p_branch=0x3) at
t_lookup.c:842
842                             run_trans_callbacks( TMCB_RESPONSE_IN, T,
T->uas.request, p_msg,
(gdb) list
837                             if (parse_headers(p_msg, HDR_TO_F, 0)==-1) {
838                                     LOG(L_ERR, "ERROR:
t_reply_matching: to parsing failed\n");
839                             }
840                     }
841                     if (!is_local(p_cell)) {
842                             run_trans_callbacks( TMCB_RESPONSE_IN, T,
T->uas.request, p_msg,
843                                     p_msg->REPLY_STATUS);
844                     }
845                     return 1;
846             } /* for cycle */
(gdb)
(gdb) frame 4
#4  0xff02f550 in t_check (p_msg=0x140ac8, param_branch=0xffbffa7c) at
t_lookup.c:913
913                             t_reply_matching( p_msg ,
(gdb) list
908                                                     LOG(L_ERR, "ERROR:
INVITE reply cannot be parsed\n");
909                                                     return -1;
910                                             }
911                             }
912
913                             t_reply_matching( p_msg ,
914                                    
param_branch!=0?param_branch:&local_branch );
915
916                     }
917     #ifdef EXTRA_DEBUG
(gdb) frame 5
#5  0xff0388bc in reply_received (p_msg=0x12d000) at t_reply.c:1269
1269            if (t_check(p_msg, &branch ) == -1) return 1;
(gdb) list
1264            struct ua_client *uac;
1265            struct cell *t;
1266            struct usr_avp **backup_list;
1267
1268            /* make sure we know the associated transaction ... */
1269            if (t_check(p_msg, &branch ) == -1) return 1;
1270
1271            /*... if there is none, tell the core router to fwd
statelessly */
1272            t = get_t();
1273            if ((t == 0) || (t == T_UNDEFINED)) return 1;
(gdb)
(gdb) frame 6
#6  0x00024dd4 in forward_reply (msg=0x140ac8) at forward.c:488
488                             if (mod->exports->response_f(msg)==0) goto
skip;
(gdb) list
483             /* quick hack, slower for multiple modules*/
484             for (mod=modules;mod;mod=mod->next){
485                     if ((mod->exports) && (mod->exports->response_f)){
486                             DBG("DEBUG:forward_reply: found module %s,
passing reply to it\n",
487                                             mod->exports->name);
488                             if (mod->exports->response_f(msg)==0) goto
skip;
489                     }
490             }
491
492             /* we have to forward the reply stateless, so we need
second via -bogdan*/
(gdb)
(gdb) frame 7
#7  0x00043b78 in receive_msg (buf=0x12d800 "", len=41462, rcv_info=0x1)
at receive.c:195
195                             forward_reply(msg);
(gdb) list
190                                     msg->REPLY_STATUS);
191                             update_stat( drp_rpls, 1);
192                             goto end; /* drop the message */
193                     } else {
194                             /* send the msg */
195                             forward_reply(msg);
196                             /* TODO - TX reply stat */
197                     }
198
199                     /* execute post reply-script callbacks */
(gdb)
(gdb) frame 8
#8  0x0006f134 in udp_rcv_loop () at udp_server.c:465
465                     receive_msg(buf, len, &ri);
(gdb) list
460                             continue;
461                     }
462
463
464                     /* receive_msg must free buf too!*/
465                     receive_msg(buf, len, &ri);
466
467             /* skip: do other stuff */
468
469             }
(gdb)
(gdb) frame 9
#9  0x0003385c in main_loop () at main.c:834
834                                             return udp_rcv_loop();
(gdb) list
829                                             bind_address=si; /*
shortcut */
830                                             if (init_child(chd_rank) <
0) {
831                                                     LOG(L_ERR,
"init_child failed\n");
832                                                     goto error;
833                                             }
834                                             return udp_rcv_loop();
835                                     }else{
836                                                    
pt[process_no].pid=pid; /*should be in shared mem.*/
837                                                    
snprintf(pt[process_no].desc, MAX_PT_DESC,
838                                                             "receiver
child=%d sock= %s:%s", i,
(gdb)
(gdb) frame 10
#10 0x00035be4 in main (argc=3, argv=0xe7800) at main.c:1399
1399            ret=main_loop();
(gdb) list
1394                                                    r);
1395                    goto error;
1396            };
1397
1398
1399            ret=main_loop();
1400
1401    error:
1402            /*kill everything*/
1403            kill_all_children(SIGTERM);
(gdb)


> Thanks Martin,
>
> it looks like the crash occurs in a function registered as callback to
> TM for the TMCB_RESPONSE_IN event.
>
> there are couple of modules doing this - acc, osp, siptrace, uac, but I
> cannot figure out which one. Can you get more info from the debugger ?
> gdb provides more info - like the corresponding files for the called
> functions...
>
> also which modules (from above) are you using?
>
> regards,
> bogdan
>
> Martin Klisch wrote:
>> Hi Bogdan-Andrei,
>>
>> here is the backtrace:
>> root at sip-b:/var/core # pstack
>> core_sip-b_openser_40002_40002_1178955651_10547
>> core 'core_sip-b_openser_40002_40002_1178955651_10547' of 10547:
>> /opt/SER/SER0/bin/openser -P /var/run/SER0.pid -f /opt/SER/SER0/etc/SE
>>  ff2c0888 memcpy   (fc9fab20, 2, 0, feb868c0, 2, 4a8) + 3c8
>>  ff02bb5c run_trans_callbacks (2, fc9eb7e8, 12c9cc, dfcd8, fc9a4ea8, 1)
>> + bc
>>  ff02ee24 t_reply_matching (140ac8, 3, e788c, 3, 0, 1) + 12e4
>>  ff02f548 t_check  (140ac8, ffbffa7c, 22c2e0, 3, 0, 2) + 268
>>  ff0388b4 reply_received (140ac8, 3, ff05df54, 0, ff0388a0, ec400) + 14
>>  00024dcc forward_reply (140ac8, 3, e0400, 0, fc804ae8, a1f7) + 10c
>>  00043b70 receive_msg (12d800, a1f6, 1, 0, 0, 13c4) + 6d0
>>  0006f12c udp_rcv_loop (fc890, fc800, e7800, fc800, 10c890, dfc00) + a4c
>>  00033854 main_loop (fc8040e8, 2500000, 0, e7800, 38, dfc00) + 5f4
>>  00035bdc main     (3, e7800, ffbffde4, 3, ffffffff, e7800) + 1cbc
>>  000174fc _start   (0, 0, 0, 0, 0, 0) + 5c
>>
>>
>>
>>> Hi Martin,
>>>
>>> once you spot the error against, please follow the wiki indications and
>>> get the dump for pkg memory.
>>>
>>> changing from db_mode 3 to 1 should not affect the consumption of
>>> private memory (the location cache is kept in shared memory).
>>>
>>> If you still have the core file, please post the backtrace.
>>>
>>> Thanks and regards,
>>> Bogdan
>>>
>>> Martin Klisch wrote:
>>>
>>>> Hi,
>>>>
>>>> i can not reproduce the errors. it only appears per random. in the
>>>> last
>>>> 5
>>>> days it appeared two times. five days ago i moves the daemon to
>>>> another
>>>> sun fire with less ram and switched from db_mode 3 to 1 - could this
>>>> be
>>>> the reason? i moved it back to the other machine now.
>>>>
>>>> on saturday it coredumped after the memory errors. would the coredump
>>>> file
>>>> help?
>>>>
>>>> bye, martin
>>>>
>>>>
>>>>
>>>>> Martin,
>>>>>
>>>>> the errors refer to a potential problem related to private memory
>>>>> (not
>>>>> shared one), so you need to send the signal (as explained on the
>>>>> wiki)
>>>>> to the process printing such errors ( like 20106). Get and post the
>>>>> mem
>>>>> dump for pkg memory.
>>>>>
>>>>> regards,
>>>>> bogdan
>>>>>
>>>>> Klaus Darilion wrote:
>>>>>
>>>>>
>>>>>> http://openser.org/dokuwiki/doku.php/troubleshooting:memory
>>>>>>
>>>>>> Martin Klisch wrote:
>>>>>>
>>>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> i have some memory errors after running OpenSER one day:
>>>>>>> May 15 02:46:56 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 381148
>>>>>>> local0.error] ERROR: build_req_buf_from_sip_req: out of memory
>>>>>>> May 15 02:46:56 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 933771
>>>>>>> local0.error] ERROR:tm:print_uac_request: no pkg_mem
>>>>>>> May 15 02:46:56 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 826633
>>>>>>> local0.error] ERROR:tm:t_forward_nonack: failure to add branches
>>>>>>> May 15 02:47:00 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 381148
>>>>>>> local0.error] ERROR: build_req_buf_from_sip_req: out of memory
>>>>>>> May 15 02:47:00 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 933771
>>>>>>> local0.error] ERROR:tm:print_uac_request: no pkg_mem
>>>>>>> May 15 02:47:00 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 826633
>>>>>>> local0.error] ERROR:tm:t_forward_nonack: failure to add branches
>>>>>>> May 15 02:47:03 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 381148
>>>>>>> local0.error] ERROR: build_req_buf_from_sip_req: out of memory
>>>>>>> May 15 02:47:03 sip-b /opt/SER/SER0/bin/openser[20106]: [ID 933771
>>>>>>> local0.error] ERROR:tm:print_uac_request: no pkg_mem
>>>>>>>
>>>>>>> Version: OpenSER 1.2.0
>>>>>>> OS: Solaris 10
>>>>>>> Arch: Sparc
>>>>>>>
>>>>>>> Is there a memory leak an any module? or is my memory too low?
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> Users mailing list
>>>>>>>
>>>>>>> http://openser.org/cgi-bin/mailman/listinfo/users
>>>>>>>
>>>>>>>
>>>>>> _______________________________________________
>>>>>> Users mailing list
>>>>>> Users at openser.org
>>>>>> http://openser.org/cgi-bin/mailman/listinfo/users
>>>>>>
>>>>>>
>>>>>>
>>>>
>>>>
>>>>
>>>
>>
>>
>>
>>
>
>






More information about the Users mailing list