[sr-dev] git:master: ims_charging: fixed deadlock when interim CCA timeout occurs

Carlos Ruiz Diaz carlos.ruizdiaz at gmail.com
Thu Oct 24 16:12:43 CEST 2013


Module: sip-router
Branch: master
Commit: fc4f2216f867b00a6685abdf51b8165572f24f69
URL:    http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=fc4f2216f867b00a6685abdf51b8165572f24f69

Author: Carlos Ruiz Diaz <carlos.ruizdiaz at gmail.com>
Committer: Carlos Ruiz Diaz <carlos.ruizdiaz at gmail.com>
Date:   Thu Oct 24 11:05:17 2013 -0300

ims_charging: fixed deadlock when interim CCA timeout occurs

---

 modules/ims_charging/dialog.c          |    2 +
 modules/ims_charging/ims_ro.c          |   12 +++++++-
 modules/ims_charging/ro_session_hash.h |    4 +-
 modules/ims_charging/ro_timer.c        |   47 ++++++++++++++++++++-----------
 4 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/modules/ims_charging/dialog.c b/modules/ims_charging/dialog.c
index 3e0861e..1b2453b 100644
--- a/modules/ims_charging/dialog.c
+++ b/modules/ims_charging/dialog.c
@@ -39,7 +39,9 @@ void dlg_reply(struct dlg_cell *dlg, int type, struct dlg_cb_params *_params) {
 			LM_ERR("Ro Session object is NULL...... aborting\n");
 			return;
 		}
+
 		ro_session_entry = &(ro_session_table->entries[session->h_entry]);
+
 		ro_session_lock(ro_session_table, ro_session_entry);
 
 		if (session->active) {
diff --git a/modules/ims_charging/ims_ro.c b/modules/ims_charging/ims_ro.c
index 724c463..72a6fe5 100644
--- a/modules/ims_charging/ims_ro.c
+++ b/modules/ims_charging/ims_ro.c
@@ -642,6 +642,16 @@ error:
     	cdpb.AAASessionsUnlock(auth->hash);
     	cdpb.AAADropCCAccSession(auth);
     }
+
+    shm_free(i_req);
+    //
+    // since callback function will be never called because of the error, we need to release the lock on the session
+    // to it can be reused later.
+    //
+    struct ro_session_entry *ro_session_entry = &(ro_session_table->entries[ro_session->h_entry]);
+    unref_ro_session_unsafe(ro_session, 1, ro_session_entry);//unref from the initial timer that fired this event.
+    ro_session_unlock(ro_session_table, ro_session_entry);
+
     return;
 }
 
@@ -1029,7 +1039,7 @@ static void resume_on_initial_ccr(int is_timeout, void *param, AAAMessage *cca,
     if (is_timeout) {
         update_stat(ccr_timeouts, 1);
         LM_ERR("Transaction timeout - did not get CCA\n");
-	error_code =  RO_RETURN_ERROR;
+        error_code =  RO_RETURN_ERROR;
         goto error0;
     }
 
diff --git a/modules/ims_charging/ro_session_hash.h b/modules/ims_charging/ro_session_hash.h
index d57ef28..105bd37 100644
--- a/modules/ims_charging/ro_session_hash.h
+++ b/modules/ims_charging/ro_session_hash.h
@@ -78,7 +78,7 @@ extern struct ro_session_table *ro_session_table;
  * \param _entry locked entry
  */
 #define ro_session_lock(_table, _entry) \
-		lock_set_get( (_table)->locks, (_entry)->lock_idx);
+		{ LM_DBG("LOCKING %d", (_entry)->lock_idx); lock_set_get( (_table)->locks, (_entry)->lock_idx); LM_DBG("LOCKED %d", (_entry)->lock_idx);}
 
 
 /*!
@@ -87,7 +87,7 @@ extern struct ro_session_table *ro_session_table;
  * \param _entry locked entry
  */
 #define ro_session_unlock(_table, _entry) \
-		lock_set_release( (_table)->locks, (_entry)->lock_idx);
+		{ LM_DBG("UNLOCKING %d", (_entry)->lock_idx); lock_set_release( (_table)->locks, (_entry)->lock_idx); LM_DBG("UNLOCKED %d", (_entry)->lock_idx); }
 
 /*!
  * \brief Reference an ro_session without locking
diff --git a/modules/ims_charging/ro_timer.c b/modules/ims_charging/ro_timer.c
index 1340c3b..797b696 100644
--- a/modules/ims_charging/ro_timer.c
+++ b/modules/ims_charging/ro_timer.c
@@ -256,19 +256,22 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
 	time_t now = time(0);
 	time_t used_secs;
 	struct ro_session_entry *ro_session_entry = NULL;
+	int call_terminated = 0;
 
 	if (!i_req) {
 		LM_ERR("This is so wrong: i_req is NULL\n");
 		return;
 	}
 
+	ro_session_entry = &(ro_session_table->entries[i_req->ro_session->h_entry]);
+
 	LM_DBG("credit=%d credit_valid_for=%d", i_req->new_credit, i_req->credit_valid_for);
 
 	used_secs = now - i_req->ro_session->last_event_timestamp;
 
 	/* check to make sure diameter server is giving us sane values */
 	if (i_req->new_credit > i_req->credit_valid_for) {
-		LM_WARN("That's weird, Diameter server gave us credit with a lower validity period :D. Setting reserved time to validity perioud instead \n");
+		LM_WARN("That's weird, Diameter server gave us credit with a lower validity period :D. Setting reserved time to validity period instead \n");
 		i_req->new_credit = i_req->credit_valid_for;
 	}
 
@@ -321,8 +324,21 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
 		i_req->ro_session->event_type = no_more_credit;
 		int whatsleft = i_req->ro_session->reserved_secs - used_secs;
 		if (whatsleft <= 0) {
-			LM_WARN("Immediately killing call due to no more credit\n");
+			// TODO we need to handle this situation more precisely.
+			// in case CCR times out, we get a call shutdown but the error message assumes it was due to a lack of credit.
+			//
+			LM_WARN("Immediately killing call due to no more credit *OR* no CCA received (timeout) after reservation request\n");
+
+			//
+			// we need to unlock the session or else we might get a deadlock on dlg_terminated() dialog callback.
+			// Do not unref the session because it will be made inside the dlg_terminated() function.
+			//
+
+			//unref_ro_session_unsafe(i_req->ro_session, 1, ro_session_entry);
+			ro_session_unlock(ro_session_table, ro_session_entry);
+
 			dlgb.lookup_terminate_dlg(i_req->ro_session->dlg_h_entry, i_req->ro_session->dlg_h_id, NULL );
+			call_terminated = 1;
 		}
 		else {
 			LM_DBG("No more credit for user - letting call run out of money in [%i] seconds", whatsleft);
@@ -337,10 +353,13 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
 		}
 	}
 
-	ro_session_entry = &(ro_session_table->entries[i_req->ro_session->h_entry]);
-
-	unref_ro_session_unsafe(i_req->ro_session, 1, ro_session_entry);//unref from the initial timer that fired this event.
-	ro_session_unlock(ro_session_table, ro_session_entry);
+	//
+	// if call was forcefully terminated, the lock was released before dlgb.lookup_terminate_dlg() function call.
+	//
+	if (!call_terminated) {
+		unref_ro_session_unsafe(i_req->ro_session, 1, ro_session_entry);//unref from the initial timer that fired this event.
+		ro_session_unlock(ro_session_table, ro_session_entry);
+	}
 
 	shm_free(i_req);
 	LM_DBG("Exiting async ccr interim nicely");
@@ -350,26 +369,21 @@ void resume_ro_session_ontimeout(struct interim_ccr *i_req) {
  * If we cant we need to put a new timer to kill the call at the appropriate time
  */
 void ro_session_ontimeout(struct ro_tl *tl) {
-	time_t now,  used_secs, call_time;
+	time_t now, used_secs, call_time;
 
 	LM_DBG("We have a fired timer [p=%p] and tl=[%i].\n", tl, tl->timeout);
 
 	/* find the session id for this timer*/
 	struct ro_session_entry *ro_session_entry = NULL;
-
-	struct ro_session* ro_session;
-	ro_session = ((struct ro_session*) ((char *) (tl)
-			- (unsigned long) (&((struct ro_session*) 0)->ro_tl)));
+	struct ro_session* ro_session = ((struct ro_session*) ((char *) (tl) - (unsigned long) (&((struct ro_session*) 0)->ro_tl)));
 
 	if (!ro_session) {
 		LM_ERR("Can't find a session. This is bad");
 		return;
 	}
 
-//	LM_ALERT("LOCKING... ");	
 	ro_session_entry = &(ro_session_table->entries[ro_session->h_entry]);
 	ro_session_lock(ro_session_table, ro_session_entry);
-//	LM_ALERT("LOCKED!");
 	
 	LM_DBG("event-type=%d", ro_session->event_type);
 	
@@ -428,7 +442,6 @@ void ro_session_ontimeout(struct ro_tl *tl) {
 				ref_ro_session_unsafe(ro_session, 1);
 			}
 			LM_ERR("Immediately killing call due to unknown error\n");
-			dlgb.lookup_terminate_dlg(ro_session->dlg_h_entry, ro_session->dlg_h_id, NULL );
 		}
 
 		break;
@@ -439,15 +452,15 @@ void ro_session_ontimeout(struct ro_tl *tl) {
 			LM_INFO("Call/session must be ended - no more funds.\n");
 		else if (ro_session->event_type == unknown_error)
 			LM_ERR("last event caused an error. We will now tear down this session.\n");
-
-		dlgb.lookup_terminate_dlg(ro_session->dlg_h_entry, ro_session->dlg_h_id, NULL );
 	}
 
+
 	update_stat(killed_calls, 1);
 
-	unref_ro_session_unsafe(ro_session, 1, ro_session_entry); //unref from the initial timer that fired this event.
+	//unref_ro_session_unsafe(ro_session, 1, ro_session_entry); //unref from the initial timer that fired this event.
 	ro_session_unlock(ro_session_table, ro_session_entry);
 
+	dlgb.lookup_terminate_dlg(ro_session->dlg_h_entry, ro_session->dlg_h_id, NULL);
 	return;
 }
 




More information about the sr-dev mailing list