Hi,

 

you gave me an idea where to find the error.

We modified the ul_publish.c source in order to achieve the following results:

-          when client sends a REGISTER, an implicit PUBLISH is called so in each registration, publication should also happen

-          when tcp socket is lost, we want that the contacts of the lost user get a presence NOTIFY with open=0 state to show the unregistered (lost) state

 

Here is the ul_publish.c source, maybe the bug is in it. But in our dev system, it works like a charm:

 

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <libxml/parser.h>

#include <time.h>

 

#include "../../parser/parse_expires.h"

#include "../../parser/msg_parser.h"

#include "../../str.h"

#include "../../dset.h"

#include "../usrloc/usrloc.h"

#include "../usrloc/ul_callback.h"

#include "../../modules/tm/tm_load.h"

#include "../pua/pua.h"

#include "pua_usrloc.h"

 

#define BUF_LEN   256

#define REG_BY_PUBLISH

 

#ifdef REG_BY_PUBLISH

 

static char* device_buffer = NULL;

static char* state_buffer = NULL;

static char* content_buffer = NULL;

static void replace_string(char** dst, const char* source, int len)

{

                if (*dst)

                {

                               free(*dst);

                               *dst = NULL;

                }

               

                if (source)

                {             

                               *dst = malloc(len+1);

                               strncpy(*dst, source, len);

                               (*dst)[len] = 0;

                }

}

 

#endif

 

int pua_set_publish(struct sip_msg* msg , char* s1, char* s2)

{

#ifdef REG_BY_PUBLISH

                const char* first_delimiter = NULL;

                const char* second_delimiter = NULL;

                // Prepare place to store data

                replace_string(&device_buffer, NULL, 0);

                replace_string(&state_buffer, NULL, 0);

                replace_string(&content_buffer, NULL, 0);

               

                if (s1)

                {

                               first_delimiter = strchr(s1, ';');

                               if (first_delimiter)

                                               second_delimiter = strchr(first_delimiter+1, ';');

                              

                               if (first_delimiter)

                               {

                                               replace_string(&device_buffer, s1, first_delimiter - s1);

                                              

                                               if (second_delimiter)

                                               {

                                                               replace_string(&state_buffer, first_delimiter+1, second_delimiter - first_delimiter - 1);

                                                               replace_string(&content_buffer, second_delimiter+1, strlen(s1) - (second_delimiter - s1 + 1));

                                               }

                               }

                }

                if (strlen(s1) && (!device_buffer || !state_buffer || !content_buffer))

                {

                               LM_WARN("Failed to parse PresenceInReg header\n");

                               return 1;

                }

               

                if (device_buffer)

                               LM_DBG("Device: %s\n", device_buffer);

                if (state_buffer)

                               LM_DBG("State: %s\n", state_buffer);

                if (content_buffer)

                               LM_DBG("Content: %s\n", content_buffer);

#endif

                pua_ul_publish= 1;

                if(pua_ul_bmask!=0)

                               setbflag(0, pua_ul_bflag);

                return 1;

}

 

int pua_unset_publish(struct sip_msg* msg, unsigned int flags, void* param)

{

//            LM_ERR("unset send publish\n");

                pua_ul_publish= 0;

               

                if(pua_ul_bmask!=0)

                               resetbflag(0, pua_ul_bflag);

                return 1;

}

 

               

/* for debug purpose only */

void print_publ(publ_info_t* p)

{

                LM_DBG("publ:\n");

                LM_DBG("uri= %.*s\n", p->pres_uri->len, p->pres_uri->s);

                LM_DBG("id= %.*s\n", p->id.len, p->id.s);

                LM_DBG("expires= %d\n", p->expires);

}             

 

str* build_pidf(ucontact_t* c

#ifdef REG_BY_PUBLISH

                , int open, const char* tuple_id, const char* content

#endif

)

{

                xmlDocPtr  doc = NULL;

                xmlNodePtr root_node = NULL;

                xmlNodePtr tuple_node = NULL;

                xmlNodePtr status_node = NULL;

                xmlNodePtr basic_node = NULL;

#ifdef REG_BY_PUBLISH

                xmlNodePtr note_node = NULL;

#endif

                str *body= NULL;

                str pres_uri= {NULL, 0};

                char buf[BUF_LEN];

                char* at= NULL;

 

                if(c->expires< (int)time(NULL))

                {

                               LM_ERR("PUBLISH: found expired \n\n");

#ifdef REG_BY_PUBLISH

                               open = 0;

#else

                               return NULL;

#endif

                }

 

                pres_uri.s = buf;

                if(pres_prefix.s)

                {

                               memcpy(pres_uri.s, pres_prefix.s, pres_prefix.len);

                               pres_uri.len+= pres_prefix.len;

                               memcpy(pres_uri.s+ pres_uri.len, ":", 1);

                               pres_uri.len+= 1;

                }

                if(pres_uri.len + c->aor->len+ 1 > BUF_LEN)

                {

                               LM_ERR("buffer size overflown\n");

                               return NULL;

                }

 

                memcpy(pres_uri.s+ pres_uri.len, c->aor->s, c->aor->len);

                pres_uri.len+= c->aor->len;

 

                at = memchr(c->aor->s, '@', c->aor->len);

                if(!at)

                {

                               if(pres_uri.len + 2 + default_domain.len > BUF_LEN)

                               {

                                               LM_ERR("buffer size overflown\n");

                                               return NULL;

                               }

 

                               pres_uri.s[pres_uri.len++]= '@';

                               memcpy(pres_uri.s+ pres_uri.len, default_domain.s, default_domain.len);

                               pres_uri.len+= default_domain.len;                    

                }

                pres_uri.s[pres_uri.len]= '\0';

 

                /* create the Publish body  */

                doc = xmlNewDoc(BAD_CAST "1.0");

                if(doc==0)

                {

                               LM_ERR("Failed to create new xml\n");

                               return NULL;

                }

                root_node = xmlNewNode(NULL, BAD_CAST "presence");

                if(root_node==0)

                {

                               LM_ERR("Cannot obtain root node\n");

                               goto error;

                }

                xmlDocSetRootElement(doc, root_node);

 

                xmlNewProp(root_node, BAD_CAST "xmlns",

                                               BAD_CAST "urn:ietf:params:xml:ns:pidf");

                xmlNewProp(root_node, BAD_CAST "xmlns:dm",

                                               BAD_CAST "urn:ietf:params:xml:ns:pidf:data-model");

                xmlNewProp(root_node, BAD_CAST  "xmlns:rpid",

                                               BAD_CAST "urn:ietf:params:xml:ns:pidf:rpid" );

                xmlNewProp(root_node, BAD_CAST "xmlns:c",

                                               BAD_CAST "urn:ietf:params:xml:ns:pidf:cipid");

                xmlNewProp(root_node, BAD_CAST "entity", BAD_CAST pres_uri.s);

 

                tuple_node =xmlNewChild(root_node, NULL, BAD_CAST "tuple", NULL) ;

               

#ifdef REG_BY_PUBLISH

                // Override default tuple id

                xmlNewProp(tuple_node, BAD_CAST "id", BAD_CAST tuple_id);

#endif

 

                if( tuple_node ==NULL)

                {

                               LM_ERR("while adding child\n");

                               goto error;

                }

               

                status_node = xmlNewChild(tuple_node, NULL, BAD_CAST "status", NULL) ;

                if( status_node ==NULL)

                {

                               LM_ERR("while adding child\n");

                               goto error;

                }

               

#ifdef REG_BY_PUBLISH

                basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic",

                               open ? (BAD_CAST "open") : (BAD_CAST "close")) ;

#else

                basic_node = xmlNewChild(status_node, NULL, BAD_CAST "basic",

                               BAD_CAST "open");

#endif

 

                if( basic_node ==NULL)

                {

                               LM_ERR("while adding child\n");

                               goto error;

                }

               

#ifdef REG_BY_PUBLISH

                /* LM_ERR("Content is %s \n", content); */

               

                note_node = xmlNewChild(tuple_node, NULL, BAD_CAST "note", BAD_CAST content);

                if ( note_node == NULL)

                {

                               LM_ERR("while adding child\n");

                               goto error;

                }

#endif

 

                body = (str*)pkg_malloc(sizeof(str));

                if(body == NULL)

                {

                               LM_ERR("while allocating memory\n");

                               return NULL;

                }

                memset(body, 0, sizeof(str));

 

                xmlDocDumpFormatMemory(doc,(unsigned char**)(void*)&body->s,&body->len,1);

 

                LM_DBG("new_body:\n%.*s\n",body->len, body->s);

 

                /*free the document */

                xmlFreeDoc(doc);

                xmlCleanupParser();

 

                return body;

 

error:

                if(body)

                {

                               if(body->s)

                                               xmlFree(body->s);

                               pkg_free(body);

                }

                if(doc)

                               xmlFreeDoc(doc);

                return NULL;

}             

 

void ul_publish(ucontact_t* c, int type, void* param)

{

                str* body= NULL;

                str uri= {NULL, 0};

                char* at= NULL;

                publ_info_t* publ= NULL;

                int size= 0;

                str content_type;

                int error;

 

                content_type.s= "application/pidf+xml";

                content_type.len= 20;

 

#ifndef REG_BY_PUBLISH

                if(pua_ul_publish==0 && pua_ul_bmask==0)

                {

                               LM_INFO("should not send ul publish\n");

                               return;

                }

#endif

                if(pua_ul_bmask!=0 && (c->cflags & pua_ul_bmask)==0)

                {

                               LM_INFO("not marked for publish\n");

                               return;

                }

 

                if(type & UL_CONTACT_DELETE) {

                               LM_DBG("\nDELETE type\n");

                } else {

                               if(type & UL_CONTACT_INSERT) {

                                               LM_DBG("\nINSERT type\n");

                               } else {

                                               if(type & UL_CONTACT_UPDATE) {

                                                               LM_DBG("\nUPDATE type\n");

                                               } else {

                                                               if(type & UL_CONTACT_EXPIRE) {

                                                                              LM_DBG("\nEXPIRE type\n");

                                                               }

                                               }

                               }

                }

#ifdef REG_BY_PUBLISH

                int online = type & UL_CONTACT_INSERT || type & UL_CONTACT_UPDATE;

                if (online && state_buffer)

                               online = strcmp(state_buffer, "closed") != 0;

               

                body= build_pidf(c, online, device_buffer ? device_buffer : "device", content_buffer ? content_buffer : "");

                if(online && (body == NULL || body->s == NULL))

                               goto error;

#else

                if(type & UL_CONTACT_INSERT)

                {

                               body= build_pidf(c);

                               if(body == NULL || body->s == NULL)

                                               goto error;

                }

                else

                               body = NULL;

#endif

 

                uri.s = (char*)pkg_malloc(sizeof(char)*(c->aor->len+default_domain.len+6));

                if(uri.s == NULL)

                               goto error;

 

                memcpy(uri.s, "sip:", 4);

                uri.len = 4;

                memcpy(uri.s+ uri.len, c->aor->s, c->aor->len);

                uri.len+= c->aor->len;

                at = memchr(c->aor->s, '@', c->aor->len);

                if(!at)

                {

                               uri.s[uri.len++]= '@';

                               memcpy(uri.s+ uri.len, default_domain.s, default_domain.len);

                               uri.len+= default_domain.len;                

                }

                LM_DBG("uri= %.*s\n", uri.len, uri.s);

               

                size= sizeof(publ_info_t)+ sizeof(str)+( uri.len

                                               +c->callid.len+ 12 + content_type.len)*sizeof(char);

               

                if(body)

                               size+= sizeof(str)+ body->len* sizeof(char);

 

                publ= (publ_info_t*)pkg_malloc(size);

                if(publ== NULL)

                {

                               LM_ERR("no more share memory\n");

                               goto error;

                }

                memset(publ, 0, size);

                size= sizeof(publ_info_t);

 

                publ->pres_uri= (str*)((char*)publ + size);

                size+= sizeof(str);

                publ->pres_uri->s= (char*)publ+ size;

                memcpy(publ->pres_uri->s, uri.s, uri.len);

                publ->pres_uri->len= uri.len;

                size+= uri.len;

 

                if(body)

                {

                               publ->body= (str*)( (char*)publ + size);

                               size+= sizeof(str);

 

                               publ->body->s= (char*)publ + size;

                               memcpy(publ->body->s, body->s, body->len);

                               publ->body->len= body->len;

                               size+= body->len;

                }

                publ->id.s= (char*)publ+ size;

                memcpy(publ->id.s, "UL_PUBLISH.", 11);

                memcpy(publ->id.s+11, c->callid.s, c->callid.len);

                publ->id.len= 11+ c->callid.len;

                size+= publ->id.len;

 

                publ->content_type.s= (char*)publ+ size;

                memcpy(publ->content_type.s, content_type.s, content_type.len);

                publ->content_type.len= content_type.len;

                size+= content_type.len;

 

 

                if(type & UL_CONTACT_EXPIRE || type & UL_CONTACT_DELETE)

                               publ->expires= 0;

                else

#ifndef REG_BY_PUBLISH

                               publ->expires= c->expires - (int)time(NULL);

#else

                               publ->expires = 900;

#endif

 

                if(type & UL_CONTACT_INSERT)

                               publ->flag|= INSERT_TYPE;

                else

                               publ->flag|= UPDATE_TYPE;

 

                publ->source_flag|= UL_PUBLISH;

                publ->event|= PRESENCE_EVENT;

                publ->extra_headers= NULL;

                print_publ(publ);

                if((error=pua_send_publish(publ))< 0)

                {

#ifdef REG_BY_PUBLISH

                               LM_ERR("while sending publish\n");

#else

                               LM_ERR("while sending publish for ul event %d\n", type);

#endif

                               if(type & UL_CONTACT_UPDATE && error == ERR_PUBLISH_NO_BODY) {

                                               /* This error can occur if Kamailio was restarted/stopped and for any reason couldn't store a pua

                                               * entry in 'pua' DB table. It can also occur if 'pua' table is cleaned externally while Kamailio

                                               * is stopped so cannot retrieve these entries from DB when restarting.

                                               * In these cases, when a refresh registration for that user creates an UPDATE action in pua_usrloc,

                                               * pua 'ul_publish()' would fail since the appropiate entry doesn't exist in pua hast table ("New

                                                * PUBLISH and no body found - invalid request").

                                               * This code solves this problem by invoking an INSERT action if an UPDATE action failed due to the

                                                * above error. It will however generate a new presentity entry in the presence server (until the

                                               * previous one expires), but this is a minor issue. */

                                               LM_ERR("UPDATE action generated a PUBLISH without body -> invoking INSERT action\n");

                                               ul_publish(c, UL_CONTACT_INSERT, param);

                                               return;

                               }

                }

 

                pua_ul_publish= 0;

 

error:

 

                if(publ)

                               pkg_free(publ);

 

                if(body)

                {

                               if(body->s)

                                               xmlFree(body->s);

                               pkg_free(body);

                }

               

                if(uri.s)

                               pkg_free(uri.s);

                pua_ul_publish= 0;

 

                return;

 

}

 

Péter

 

From: Daniel-Constantin Mierla [mailto:miconda@gmail.com]
Sent: Monday, April 27, 2015 11:41 PM
To: Péter Barabás; Kamailio (SER) - Users Mailing List
Subject: Re: [SR-Users] kamailio crash periodically in timer handler

 

Hello,

got dragged by some other tasks for a while...

Can you list here all your loaded modules? Also, are you having event routes in your config?

At this moment it looks like either a buffer overflow somewhere or a free of an shared memory element without detaching from timer. I will need more details to narrow it down.

Cheers,
Daniel

On 23/04/15 17:54, Péter Barabás wrote:

Hi,

 

here it is:

 

$1 = {next = 0x0, prev = 0x7f64eb5b6bd0, expire = 1432560038, initial_timeout = 80, data = 0x7f64ebc3a888, f = 0x7f6510a2c165 <wait_handler>, flags = 513, slow_idx = 0}

 

Péter

 

From: sr-users [mailto:sr-users-bounces@lists.sip-router.org] On Behalf Of Daniel-Constantin Mierla
Sent: Thursday, April 23, 2015 5:07 PM
To: Kamailio (SER) - Users Mailing List
Subject: Re: [SR-Users] kamailio crash periodically in timer handler

 

Hello,

can you get from gdb the output of:

p *tl

Cheers,
Daniel

On 23/04/15 17:00, Péter Barabás wrote:

Hi,

 

we have a serious problem with Kamailio v4.2.3, it crashes after some days. We have checked the core dump and the gdb output is the following for bt and full backtrace:

 

#0  0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0, slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877

#1  0x0000000000602746 in timer_handler () at timer.c:953

#2  0x0000000000602bb9 in timer_main () at timer.c:992

#3  0x00000000004a833c in main_loop () at main.c:1700

#4  0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578

 

#0  0x0000000000602291 in timer_list_expire (t=1432559962, h=0x7f64eb5b6bd0, slow_l=0x7f64eb5ba328, slow_mark=43873) at timer.c:877

        tl = 0x7f64ebc3a908

        ret = 0

#1  0x0000000000602746 in timer_handler () at timer.c:953

        saved_ticks = 1432559962

        run_slow_timer = 0

        i = 865

        __FUNCTION__ = "timer_handler"

#2  0x0000000000602bb9 in timer_main () at timer.c:992

No locals.

#3  0x00000000004a833c in main_loop () at main.c:1700

        i = 4

        pid = 0

        si = 0x0

        si_desc = "udp receiver child=3 sock=127.0.0.1:5060\000\375\362\250/\313\345\v\004\000\000\000\000\000\000\000\004\257\263=\000\000\000\000\200TA\000\000\000\000\000\060}i\226\377\177", '\000' <repeats 19 times>, "{i\226\377\177\000\000\376\366R\000\000\000\000\000\377\377\377\377\377\177\000\000@\350\250\000\000\000\000"

        nrprocs = 4

        __FUNCTION__ = "main_loop"

#4  0x00000000004ad857 in main (argc=13, argv=0x7fff96697d38) at main.c:2578

        cfg_stream = 0x1b94010

        c = -1

        r = 0

        tmp = 0x7fff96699b17 ""

        tmp_len = 0

        port = 1

        proto = 32767

        options = 0x703e08 ":f:cm:M:dVIhEeb:l:L:n:vKrRDTN:W:w:t:u:g:P:G:SQ:O:a:A:"

        ret = -1

        seed = 2591880768

        rfd = 4

        debug_save = 0

        debug_flag = 0

        dont_fork_cnt = 0

        n_lst = 0x0

        p = 0x7fff96697c50 ""

        __FUNCTION__ = "main"

 

In the logs it can be seen:

 

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: CRITICAL: <core> [pass_fd.c:293]: receive_fd(): EOF on 21

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:784]: handle_sigs(): child process 4623 exited by a signal 11

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: ALERT: <core> [main.c:787]: handle_sigs(): core was generated

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [main.c:799]: handle_sigs(): terminating due to SIGCHLD

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4631]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4628]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4634]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4615]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4617]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4626]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4625]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4624]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4632]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4622]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4621]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4620]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4619]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4618]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4616]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4635]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4614]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4629]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4627]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4633]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4630]: INFO: <core> [main.c:850]: sig_usr(): signal 15 received

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64eca19950/0x7f64eca19988) - ignore

Apr 23 08:07:31 ctsip1 /usr/local/sbin/kamailio[4612]: INFO: <core> [mem/f_malloc.c:599]: fm_free(): freeing a free fragment (0x7f64ed587f20/0x7f64ed587f58) - ignore

 

Kamailio runs on Ubuntu trusty. The interesting thing that we run Kamailio 4.2.3 in an other environment (on Debian Squeeze) and it works perfect, no crashes. The main difference is that in Debian environment we have less number of users and the firewall is not so strength. But the configuration and the source is the same for both.

 

Please help us to solve this problem. Thanks.

 

Péter

_______________________________________________
SIP Express Router (SER) and Kamailio (OpenSER) - sr-users mailing list
sr-users@lists.sip-router.org
http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-users




-- 
Daniel-Constantin Mierla
http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda
Kamailio World Conference, May 27-29, 2015
Berlin, Germany - http://www.kamailioworld.com



-- 
Daniel-Constantin Mierla
http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda
Kamailio World Conference, May 27-29, 2015
Berlin, Germany - http://www.kamailioworld.com