[SR-Users] kamailio crash periodically in timer handler

Péter Barabás peter.barabas at arenim.com
Mon Apr 27 23:59:32 CEST 2015


Hi,

you gave me an idea where to find the error.
We modified the ul_publish.c source in order to achieve the following results:

-          when client sends a REGISTER, an implicit PUBLISH is called so in each registration, publication should also happen

-          when tcp socket is lost, we want that the contacts of the lost user get a presence NOTIFY with open=0 state to show the unregistered (lost) state

Here is the ul_publish.c source, maybe the bug is in it. But in our dev system, it works like a charm:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/parser.h>
#include <time.h>

#include "../../parser/parse_expires.h"
#include "../../parser/msg_parser.h"
#include "../../str.h"
#include "../../dset.h"
#include "../usrloc/usrloc.h"
#include "../usrloc/ul_callback.h"
#include "../../modules/tm/tm_load.h"
#include "../pua/pua.h"
#include "pua_usrloc.h"

#define BUF_LEN   256
#define REG_BY_PUBLISH

#ifdef REG_BY_PUBLISH

static char* device_buffer = NULL;
static char* state_buffer = NULL;
static char* content_buffer = NULL;
static void replace_string(char** dst, const char* source, int len)
{
                if (*dst)
                {
                               free(*dst);
                               *dst = NULL;
                }

                if (source)
                {
                               *dst = malloc(len+1);
                               strncpy(*dst, source, len);
                               (*dst)[len] = 0;
                }
}

#endif

int pua_set_publish(struct sip_msg* msg , char* s1, char* s2)
{
#ifdef REG_BY_PUBLISH
                const char* first_delimiter = NULL;
                const char* second_delimiter = NULL;
                // Prepare place to store data
                replace_string(&device_buffer, NULL, 0);
                replace_string(&state_buffer, NULL, 0);
                replace_string(&content_buffer, NULL, 0);

                if (s1)
                {
                               first_delimiter = strchr(s1, ';');
                               if (first_delimiter)
                                               second_delimiter = strchr(first_delimiter+1, ';');

                               if (first_delimiter)
                               {
                                               replace_string(&device_buffer, s1, first_delimiter - s1);

                                               if (second_delimiter)
                                               {
                                                               replace_string(&state_buffer, first_delimiter+1, second_delimiter - first_delimiter - 1);
                                                               replace_string(&content_buffer, second_delimiter+1, strlen(s1) - (second_delimiter - s1 + 1));
                                               }
                               }
                }
                if (strlen(s1) && (!device_buffer || !state_buffer || !content_buffer))
                {
                               LM_WARN("Failed to parse PresenceInReg header\n");
                               return 1;
                }

                if (device_buffer)
                               LM_DBG("Device: %s\n", device_buffer);
                if (state_buffer)
                               LM_DBG("State: %s\n", state_buffer);
                if (content_buffer)
                               LM_DBG("Content: %s\n", content_buffer);
#endif
                pua_ul_publish= 1;
                if(pua_ul_bmask!=0)
                               setbflag(0, pua_ul_bflag);
                return 1;
}

int pua_unset_publish(struct sip_msg* msg, unsigned int flags, void* param)
{
//            LM_ERR("unset send publish\n");
                pua_ul_publish= 0;

                if(pua_ul_bmask!=0)
                               resetbflag(0, pua_ul_bflag);
                return 1;
}


/* for debug purpose only */
void print_publ(publ_info_t* p)
{
                LM_DBG("publ:\n");
                LM_DBG("uri= %.*s\n", p->pres_uri->len, p->pres_uri->s);
                LM_DBG("id= %.*s\n", p->id.len, p->id.s);
                LM_DBG("expires= %d\n", p->expires);
}

str* build_pidf(ucontact_t* c
#ifdef REG_BY_PUBLISH
                , int open, const char* tuple_id, const char* content
#endif
)
{
                xmlDocPtr  doc = NULL;
                xmlNodePtr root_node = NULL;
                xmlNodePtr tuple_node = NULL;
                xmlNodePtr status_node = NULL;
                xmlNodePtr basic_node = NULL;
#ifdef REG_BY_PUBLISH
                xmlNodePtr note_node = NULL;
#endif
                str *body= NULL;
                str pres_uri= {NULL, 0};
                char buf[BUF_LEN];
                char* at= NULL;

                if(c->expires< (int)time(NULL))
                {
                               LM_ERR("PUBLISH: found expired \n\n");
#ifdef REG_BY_PUBLISH
                               open = 0;
#else
                               return NULL;
#endif
                }

                pres_uri.s = buf;
                if(pres_prefix.s)
                {
                               memcpy(pres_uri.s, pres_prefix.s, pres_prefix.len);
                               pres_uri.len+= pres_prefix.len;
                               memcpy(pres_uri.s+ pres_uri.len, ":", 1);
                               pres_uri.len+= 1;
                }
                if(pres_uri.len + c->aor->len+ 1 > BUF_LEN)
                {
                               LM_ERR("buffer size overflown\n");
                               return NULL;
                }

                memcpy(pres_uri.s+ pres_uri.len, c->aor->s, c->aor->len);
                pres_uri.len+= c->aor->len;

                at = memchr(c->aor->s, '@', c->aor->len);
                if(!at)
                {
                               if(pres_uri.len + 2 + default_domain.len > BUF_LEN)
                               {
                                               LM_ERR("buffer size overflown\n");
                                               return NULL;
                               }

                               pres_uri.s[pres_uri.len++]= '@';
                               memcpy(pres_uri.s+ pres_uri.len, default_domain.s, default_domain.len);
                               pres_uri.len+= default_domain.len;
                }
                pres_uri.s[pres_uri.len]= '\0';

                /* create the Publish body  */
                doc = xmlNewDoc(BAD_CAST "1.0");
                if(doc==0)
                {
                               LM_ERR("Failed to create new xml\n");
                               return NULL;
                }
                root_node = xmlNewNode(NULL, BAD_CAST "presence");
                if(root_node==0)
                {
                               LM_ERR("Cannot obtain root node\n");
                               goto error;
                }
                xmlDocSetRootElement(doc, root_node);

                xmlNewProp(root_node, BAD_CAST "xmlns",
                                               BAD_CAST "urn:ietf:params:xml:ns:pidf");
                xmlNewProp(root_node, BAD_CAST "xmlns:dm",
                                               BAD_CAST "urn:ietf:params:xml:ns:pidf:data-model");
                xmlNewProp(root_node, BAD_CAST  "xmlns:rpid",
                                               BAD_CAST "urn:ietf:params:xml:ns:pidf:rpid" );
                xmlNewProp(root_node, BAD_CAST "xmlns:c",
                                               BAD_CAST "urn:ietf:params:xml:ns:pidf:cipid");
                xmlNewProp(root_node, BAD_CAST "entity", BAD_CAST pres_uri.s);

                tuple_node =xmlNewChild(root_node, NULL, BAD_CAST "tuple", NULL) ;

#ifdef REG_BY_PUBLISH
                // Override default tuple id
                xmlNewProp(tuple_node, BAD_CAST "id", BAD_CAST tuple_id);
#endif

                if( tuple_node ==NULL)
                {
                               LM_ERR("while adding child\n");
                               goto error;
                }

                status_node = xmlNewChild(tuple_node, NULL, BAD_CAST "status", NULL) ;
                if( status_node ==NULL)
                {
                               LM_ERR("while adding child\n");
                               goto error;
                }

#ifdef REG_BY_PUBLISH
                basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic",
                               open ? (BAD_CAST "open") : (BAD_CAST "close")) ;
#else
                basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic",
                               BAD_CAST "open");
#endif

                if( basic_node ==NULL)
                {
                               LM_ERR("while adding child\n");
                               goto error;
                }

#ifdef REG_BY_PUBLISH
                /* LM_ERR("Content is %s \n", content); */

                note_node = xmlNewChild(tuple_node, NULL, BAD_CAST "note", BAD_CAST content);
                if ( note_node == NULL)
                {
                               LM_ERR("while adding child\n");
                               goto error;
                }
#endif

                body = (str*)pkg_malloc(sizeof(str));
                if(body == NULL)
                {
                               LM_ERR("while allocating memory\n");
                               return NULL;
                }
                memset(body, 0, sizeof(str));

                xmlDocDumpFormatMemory(doc,(unsigned char**)(void*)&body->s,&body->len,1);

                LM_DBG("new_body:\n%.*s\n",body->len, body->s);

                /*free the document */
                xmlFreeDoc(doc);
                xmlCleanupParser();

                return body;

error:
                if(body)
                {
                               if(body->s)
                                               xmlFree(body->s);
                               pkg_free(body);
                }
                if(doc)
                               xmlFreeDoc(doc);
                return NULL;
}

void ul_publish(ucontact_t* c, int type, void* param)
{
                str* body= NULL;
                str uri= {NULL, 0};
                char* at= NULL;
                publ_info_t* publ= NULL;
                int size= 0;
                str content_type;
                int error;

                content_type.s= "application/pidf+xml";
                content_type.len= 20;

#ifndef REG_BY_PUBLISH
                if(pua_ul_publish==0 && pua_ul_bmask==0)
                {
                               LM_INFO("should not send ul publish\n");
                               return;
                }
#endif
                if(pua_ul_bmask!=0 && (c->cflags & pua_ul_bmask)==0)
                {
                               LM_INFO("not marked for publish\n");
                               return;
                }

                if(type & UL_CONTACT_DELETE) {
                               LM_DBG("\nDELETE type\n");
                } else {
                               if(type & UL_CONTACT_INSERT) {
                                               LM_DBG("\nINSERT type\n");
                               } else {
                                               if(type & UL_CONTACT_UPDATE) {
                                                               LM_DBG("\nUPDATE type\n");
                                               } else {
                                                               if(type & UL_CONTACT_EXPIRE) {
                                                                              LM_DBG("\nEXPIRE type\n");
                                                               }
                                               }
                               }
                }
#ifdef REG_BY_PUBLISH
                int online = type & UL_CONTACT_INSERT || type & UL_CONTACT_UPDATE;
                if (online && state_buffer)
                               online = strcmp(state_buffer, "closed") != 0;

                body= build_pidf(c, online, device_buffer ? device_buffer : "device", content_buffer ? content_buffer : "");
                if(online && (body == NULL || body->s == NULL))
                               goto error;
#else
                if(type & UL_CONTACT_INSERT)
                {
                               body= build_pidf(c);
                               if(body == NULL || body->s == NULL)
                                               goto error;
                }
                else
                               body = NULL;
#endif

                uri.s = (char*)pkg_malloc(sizeof(char)*(c->aor->len+default_domain.len+6));
                if(uri.s == NULL)
                               goto error;

                memcpy(uri.s, "sip:", 4);
                uri.len = 4;
                memcpy(uri.s+ uri.len, c->aor->s, c->aor->len);
                uri.len+= c->aor->len;
                at = memchr(c->aor->s, '@', c->aor->len);
                if(!at)
                {
                               uri.s[uri.len++]= '@';
                               memcpy(uri.s+ uri.len, default_domain.s, default_domain.len);
                               uri.len+= default_domain.len;
                }
                LM_DBG("uri= %.*s\n", uri.len, uri.s);

                size= sizeof(publ_info_t)+ sizeof(str)+( uri.len
                                               +c->callid.len+ 12 + content_type.len)*sizeof(char);

                if(body)
                               size+= sizeof(str)+ body->len* sizeof(char);

                publ= (publ_info_t*)pkg_malloc(size);
                if(publ== NULL)
                {
                               LM_ERR("no more share memory\n");
                               goto error;
                }
                memset(publ, 0, size);
                size= sizeof(publ_info_t);

                publ->pres_uri= (str*)((char*)publ + size);
                size+= sizeof(str);
                publ->pres_uri->s= (char*)publ+ size;
                memcpy(publ->pres_uri->s, uri.s, uri.len);
                publ->pres_uri->len= uri.len;
                size+= uri.len;

                if(body)
                {
                               publ->body= (str*)( (char*)publ + size);
                               size+= sizeof(str);

                               publ->body->s= (char*)publ + size;
                               memcpy(publ->body->s, body->s, body->len);
                               publ->body->len= body->len;
                               size+= body->len;
                }
                publ->id.s= (char*)publ+ size;
                memcpy(publ->id.s, "UL_PUBLISH.", 11);
                memcpy(publ->id.s+11, c->callid.s, c->callid.len);
                publ->id.len= 11+ c->callid.len;
                size+= publ->id.len;

                publ->content_type.s= (char*)publ+ size;
                memcpy(publ->content_type.s, content_type.s, content_type.len);
                publ->content_type.len= content_type.len;
                size+= content_type.len;


                if(type & UL_CONTACT_EXPIRE || type & UL_CONTACT_DELETE)
                               publ->expires= 0;
                else
#ifndef REG_BY_PUBLISH
                               publ->expires= c->expires - (int)time(NULL);
#else
                               publ->expires = 900;
#endif

                if(type & UL_CONTACT_INSERT)
                               publ->flag|= INSERT_TYPE;
                else
                               publ->flag|= UPDATE_TYPE;

                publ->source_flag|= UL_PUBLISH;
                publ->event|= PRESENCE_EVENT;
                publ->extra_headers= NULL;
                print_publ(publ);
                if((error=pua_send_publish(publ))< 0)
                {
#ifdef REG_BY_PUBLISH
                               LM_ERR("while sending publish\n");
#else
                               LM_ERR("while sending publish for ul event %d\n", type);
#endif
                               if(type & UL_CONTACT_UPDATE && error == ERR_PUBLISH_NO_BODY) {
                                               /* This error can occur if Kamailio was restarted/stopped and for any reason couldn't store a pua
                                               * entry in 'pua' DB table. It can also occur if 'pua' table is cleaned externally while Kamailio
                                               * is stopped so cannot retrieve these entries from DB when restarting.
                                               * In these cases, when a refresh registration for that user creates an UPDATE action in pua_usrloc,
                                               * pua 'ul_publish()' would fail since the appropiate entry doesn't exist in pua hast table ("New
                                                * PUBLISH and no body found - invalid request").
                                               * This code solves this problem by invoking an INSERT action if an UPDATE action failed due to the
                                                * above error. It will however generate a new presentity entry in the presence server (until the
                                               * previous one expires), but this is a minor issue. */
                                               LM_ERR("UPDATE action generated a PUBLISH without body -> invoking INSERT action\n");
                                               ul_publish(c, UL_CONTACT_INSERT, param);
                                               return;
                               }
                }

                pua_ul_publish= 0;

error:

                if(publ)
                               pkg_free(publ);

                if(body)
                {
                               if(body->s)
                                               xmlFree(body->s);
                               pkg_free(body);
                }

                if(uri.s)
                               pkg_free(uri.s);
                pua_ul_publish= 0;

                return;

}

Péter

From: Daniel-Constantin Mierla [mailto:miconda at gmail.com]
Sent: Monday, April 27, 2015 11:41 PM
To: Péter Barabás; Kamailio (SER) - Users Mailing List
Subject: Re: [SR-Users] kamailio crash periodically in timer handler

Hello,

got dragged by some other tasks for a while...

Can you list here all your loaded modules? Also, are you having event routes in your config?

At this moment it looks like either a buffer overflow somewhere or a free of an shared memory element without detaching from timer. I will need more details to narrow it down.

Cheers,
Daniel
On 23/04/15 17:54, Péter Barabás wrote:
Hi,

here it is:

$1 = {next = 0x0, prev = 0x7f64eb5b6bd0, expire = 1432560038, initial_timeout = 80, data = 0x7f64ebc3a888, f = 0x7f6510a2c165 <wait_handler>, flags = 513, slow_idx = 0}

Péter

From: sr-users [mailto:sr-users-bounces at lists.sip-router.org] On Behalf Of Daniel-Constantin Mierla
Sent: Thursday, April 23, 2015 5:07 PM
To: Kamailio (SER) - Users Mailing List
Subject: Re: [SR-Users] kamailio crash periodically in timer handler

Hello,

can you get from gdb the output of:

p *tl

Cheers,
Daniel
On 23/04/15 17:00, Péter Barabás wrote:
Hi,

we have a serious problem with Kamailio v4.2.3, it crashes after some days. We have checked the core dump and the gdb output is the following for bt and full backtrace:

#0  0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0, slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877
#1  0x0000000000602746 in timer_handler () at timer.c:953
#2  0x0000000000602bb9 in timer_main () at timer.c:992
#3  0x00000000004a833c in main_loop () at main.c:1700
#4  0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578

#0  0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0, slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877
        tl = 0x7f64ebc3a908
        ret = 0
#1  0x0000000000602746 in timer_handler () at timer.c:953
        saved_ticks = 1432559962
        run_slow_timer = 0
        i = 865
        __FUNCTION__ = "timer_handler"
#2  0x0000000000602bb9 in timer_main () at timer.c:992
No locals.
#3  0x00000000004a833c in main_loop () at main.c:1700
        i = 4
        pid = 0
        si = 0x0
        si_desc = "udp receiver child=3 sock=127.0.0.1:5060\000\375\362\250/\313\345\v\004\000\000\000\000\000\000\000\004\257\263=\000\000\000\000\200TA\000\000\000\000\000\060}i\226\377\177", '\000' <repeats 19 times>, "{i\226\377\177\000\000\376\366R\000\000\000\000\000\377\377\377\377\377\177\000\000@\350\250\000\000\000\000"
        nrprocs = 4
        __FUNCTION__ = "main_loop"
#4  0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578
        cfg_stream = 0x1b94010
        c = -1
        r = 0
        tmp = 0x7fff96699b17 ""
        tmp_len = 0
        port = 1
        proto = 32767
        options = 0x703e08 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:"
        ret = -1
        seed = 2591880768
        rfd = 4
        debug_save = 0
        debug_flag = 0
        dont_fork_cnt = 0
        n_lst = 0x0
        p = 0x7fff96697c50 ""
        __FUNCTION__ = "main"

In the logs it can be seen:

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: CRITICAL: <core> [pass_fd.c:293]: receive_fd(): EOF on 21
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:784]: handle_sigs(): child process 4623 exited by a signal 11
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:787]: handle_sigs(): core was generated
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [main.c:799]: handle_sigs(): terminating due to SIGCHLD
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4631]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4628]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4634]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4615]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4617]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4626]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4625]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4624]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4632]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4622]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4621]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4620]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4619]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4618]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4616]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4614]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4629]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4627]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4633]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4630]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64eca19950/0x7f64eca19988) - ignore
Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64ed587f20/0x7f64ed587f58) - ignore

Kamailio runs on Ubuntu trusty. The interesting thing that we run Kamailio 4.2.3 in an other environment (on Debian Squeeze) and it works perfect, no crashes. The main difference is that in Debian environment we have less number of users and the firewall is not so strength. But the configuration and the source is the same for both.

Please help us to solve this problem. Thanks.

Péter

_______________________________________________

SIP Express Router (SER) and Kamailio (OpenSER) - sr-users mailing list

sr-users at lists.sip-router.org<mailto:sr-users at lists.sip-router.org>

http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-users




--

Daniel-Constantin Mierla

http://twitter.com/#!/miconda<http://twitter.com/#%21/miconda> - http://www.linkedin.com/in/miconda

Kamailio World Conference, May 27-29, 2015

Berlin, Germany - http://www.kamailioworld.com



--

Daniel-Constantin Mierla

http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda

Kamailio World Conference, May 27-29, 2015

Berlin, Germany - http://www.kamailioworld.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.sip-router.org/pipermail/sr-users/attachments/20150427/6d7e2b3b/attachment.html>


More information about the sr-users mailing list