Module: sip-router Branch: master Commit: b46c6f66e1aa64bc038d7495915c0fd646fcf8fe URL: http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=b46c6f66...
Author: Daniel-Constantin Mierla miconda@gmail.com Committer: Daniel-Constantin Mierla miconda@gmail.com Date: Tue Apr 10 00:00:05 2012 +0200
db_cluster: if a db query fails, mark connection inactive
- default inactive time is 300sec - value can be chanded via inactive_interval parameter
---
modules_k/db_cluster/db_cluster_mod.c | 3 ++ modules_k/db_cluster/dbcl_api.c | 44 ++++++++++++++++++++++++--------- modules_k/db_cluster/dbcl_data.c | 28 +++++++++++++++++++++ modules_k/db_cluster/dbcl_data.h | 7 ++++- 4 files changed, 69 insertions(+), 13 deletions(-)
diff --git a/modules_k/db_cluster/db_cluster_mod.c b/modules_k/db_cluster/db_cluster_mod.c index 12f642c..c0b78cb 100644 --- a/modules_k/db_cluster/db_cluster_mod.c +++ b/modules_k/db_cluster/db_cluster_mod.c @@ -36,6 +36,8 @@ int db_cluster_bind_api(db_func_t *dbb); int dbcl_con_param(modparam_t type, void *val); int dbcl_cls_param(modparam_t type, void *val);
+int dbcl_inactive_interval = 300; + /*! \brief * DB Cluster module interface */ @@ -50,6 +52,7 @@ static cmd_export_t cmds[] = { static param_export_t params[] = { {"connection", STR_PARAM|USE_FUNC_PARAM, (void*)dbcl_con_param}, {"cluster", STR_PARAM|USE_FUNC_PARAM, (void*)dbcl_cls_param}, + {"inactive_interval", INT_PARAM, &dbcl_inactive_interval}, {0, 0, 0} };
diff --git a/modules_k/db_cluster/dbcl_api.c b/modules_k/db_cluster/dbcl_api.c index 430ea98..9f79f35 100644 --- a/modules_k/db_cluster/dbcl_api.c +++ b/modules_k/db_cluster/dbcl_api.c @@ -47,7 +47,7 @@ db1_con_t *dbh=NULL;\ dbcl_cls_t *cls=NULL;\ cls = (dbcl_cls_t*)_h->tail;\ - ret = 0;\ + ret = -1;\ for(i=DBCL_PRIO_SIZE-1; i>0; i--)\ {\ switch(cls->rlist[i].mode) {\ @@ -55,8 +55,7 @@ case 'S':\ for(j=0; j<cls->rlist[i].clen; j++)\ {\ - if(cls->rlist[i].clist[j] != NULL && cls->rlist[i].clist[j]->flags!=0\ - && cls->rlist[i].clist[j]->dbh != NULL)\ + if(dbcl_valid_con(cls->rlist[i].clist[j])==0)\ {\ LM_DBG("serial operation - cluster [%.*s] (%d/%d)\n",\ cls->name.len, cls->name.s, i, j);\ @@ -65,6 +64,11 @@ if (ret==0) {\ cls->usedcon = cls->rlist[i].clist[j];\ return 0;\ + } else {\ + LM_DBG("serial operation - failre on cluster"\ + " [%.*s] (%d/%d)\n",\ + cls->name.len, cls->name.s, i, j);\ + dbcl_inactive_con(cls->rlist[i].clist[j]);\ }\ }\ }\ @@ -74,8 +78,7 @@ for(k=0; k<cls->rlist[i].clen; k++)\ {\ j = (process_no + k + cls->rlist[i].crt) % cls->rlist[i].clen;\ - if(cls->rlist[i].clist[j] != NULL && cls->rlist[i].clist[j]->flags!=0\ - && cls->rlist[i].clist[j]->dbh != NULL)\ + if(dbcl_valid_con(cls->rlist[i].clist[j])==0)\ {\ LM_DBG("round robin operation - cluster [%.*s] (%d/%d)\n",\ cls->name.len, cls->name.s, i, j);\ @@ -86,6 +89,11 @@ cls->usedcon = cls->rlist[i].clist[j];\ cls->rlist[i].crt = (j+1) % cls->rlist[i].clen;\ return 0;\ + } else {\ + LM_DBG("round robin operation - failre on cluster"\ + " [%.*s] (%d/%d)\n",\ + cls->name.len, cls->name.s, i, j);\ + dbcl_inactive_con(cls->rlist[i].clist[j]);\ }\ }\ }\ @@ -110,7 +118,7 @@ db1_con_t *dbh=NULL;\ dbcl_cls_t *cls=NULL;\ cls = (dbcl_cls_t*)_h->tail;\ - ret = 0;\ + ret = -1;\ rok = 0;\ rc = 0;\ for(i=DBCL_PRIO_SIZE-1; i>0; i--)\ @@ -120,8 +128,7 @@ case 'S':\ for(j=0; j<cls->wlist[i].clen; j++)\ {\ - if(cls->wlist[i].clist[j] != NULL && cls->wlist[i].clist[j]->flags!=0\ - && cls->wlist[i].clist[j]->dbh != NULL)\ + if(dbcl_valid_con(cls->wlist[i].clist[j])==0)\ {\ LM_DBG("serial operation - cluster [%.*s] (%d/%d)\n",\ cls->name.len, cls->name.s, i, j);\ @@ -130,6 +137,11 @@ if (ret==0) {\ cls->usedcon = cls->wlist[i].clist[j];\ return 0;\ + } else {\ + LM_DBG("serial operation - failure on cluster"\ + " [%.*s] (%d/%d)\n",\ + cls->name.len, cls->name.s, i, j);\ + dbcl_inactive_con(cls->wlist[i].clist[j]);\ }\ }\ }\ @@ -139,8 +151,7 @@ for(k=0; k<cls->wlist[i].clen; k++)\ {\ j = (process_no + k + cls->wlist[i].crt) % cls->wlist[i].clen;\ - if(cls->wlist[i].clist[j] != NULL && cls->wlist[i].clist[j]->flags!=0\ - && cls->wlist[i].clist[j]->dbh != NULL)\ + if(dbcl_valid_con(cls->wlist[i].clist[j])==0)\ {\ LM_DBG("round robin operation - cluster [%.*s] (%d/%d)\n",\ cls->name.len, cls->name.s, i, j);\ @@ -151,6 +162,11 @@ cls->usedcon = cls->wlist[i].clist[j];\ cls->wlist[i].crt = (j+1) % cls->wlist[i].clen;\ return 0;\ + } else {\ + LM_DBG("round robin operation - failure on cluster"\ + " [%.*s] (%d/%d)\n",\ + cls->name.len, cls->name.s, i, j);\ + dbcl_inactive_con(cls->wlist[i].clist[j]);\ }\ }\ }\ @@ -159,8 +175,7 @@ case 'P':\ for(j=0; j<cls->wlist[i].clen; j++)\ {\ - if(cls->wlist[i].clist[j] != NULL && cls->wlist[i].clist[j]->flags!=0\ - && cls->wlist[i].clist[j]->dbh != NULL)\ + if(dbcl_valid_con(cls->wlist[i].clist[j])==0)\ {\ LM_DBG("parallel operation - cluster [%.*s] (%d/%d)\n",\ cls->name.len, cls->name.s, i, j);\ @@ -169,6 +184,11 @@ if(rc==0) {\ cls->usedcon = cls->wlist[i].clist[j];\ rok = 1;\ + } else {\ + LM_DBG("parallel operation - failure on cluster"\ + " [%.*s] (%d/%d)\n",\ + cls->name.len, cls->name.s, i, j);\ + dbcl_inactive_con(cls->wlist[i].clist[j]);\ }\ ret |= rc;\ }\ diff --git a/modules_k/db_cluster/dbcl_data.c b/modules_k/db_cluster/dbcl_data.c index bb5d5fb..9533421 100644 --- a/modules_k/db_cluster/dbcl_data.c +++ b/modules_k/db_cluster/dbcl_data.c @@ -32,6 +32,7 @@ #include "../../dprint.h" #include "../../hashes.h" #include "../../trim.h" +#include "../../timer.h" #include "../../mem/mem.h" #include "../../mem/shm_mem.h"
@@ -123,6 +124,33 @@ int dbcl_init_con(str *name, str *url) return 0; }
+int dbcl_valid_con(dbcl_con_t *sc) +{ + if(sc==NULL || sc->flags!=0 || sc->dbh!=NULL) + return -1; + if(sc->sinfo==NULL) + return 0; + if(sc->sinfo->state & DBCL_CON_INACTIVE) + { + if(sc->sinfo->aticks>0 && sc->sinfo->aticks<get_ticks()) + return -1; + sc->sinfo->aticks = 0; + sc->sinfo->state &= ~DBCL_CON_INACTIVE; + } + return 0; +} + +extern int dbcl_inactive_interval; + +int dbcl_inactive_con(dbcl_con_t *sc) +{ + if(sc==NULL || sc->sinfo==NULL) + return -1; + sc->sinfo->aticks = get_ticks() + dbcl_inactive_interval; + sc->sinfo->state |= DBCL_CON_INACTIVE; + return 0; +} + int dbcl_parse_con_param(char *val) { str name; diff --git a/modules_k/db_cluster/dbcl_data.h b/modules_k/db_cluster/dbcl_data.h index cc7a64b..ffb2c9f 100644 --- a/modules_k/db_cluster/dbcl_data.h +++ b/modules_k/db_cluster/dbcl_data.h @@ -40,10 +40,12 @@ #define DBCL_PRIO_SIZE 10 #define DBCL_CLIST_SIZE 5
+#define DBCL_CON_INACTIVE 1 + typedef struct dbcl_shared { int state; - int count; + unsigned int aticks; } dbcl_shared_t;
typedef struct dbcl_con @@ -84,6 +86,9 @@ int dbcl_init_connections(dbcl_cls_t *cls); int dbcl_close_connections(dbcl_cls_t *cls); dbcl_cls_t *dbcl_get_cluster(str *name);
+int dbcl_valid_con(dbcl_con_t *sc); +int dbcl_inactive_con(dbcl_con_t *sc); + int dbcl_parse_con_param(char *val); int dbcl_parse_cls_param(char *val); #endif /* KM_DBASE_H */
daniel,
i tried with latest master and the result was that db_cluster queries do not work at all.
whatever the first module in .cfg file that uses mysql is, i get these kind of errors:
Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: db_cluster [dbcl_api.c:294]: invalid mode #000 (0) Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: <core> [db.c:389]: error in db_query Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: <core> [db.c:428]: querying version for table dialplan Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: dialplan [dp_db.c:106]: error during table version check. Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: dialplan [dialplan.c:200]: could not initialize data Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: <core> [sr_module.c:939]: init_mod(): Error while initializing module dialplan (/usr/lib/sip-proxy/modules/dialplan.so)
in the above it was dialplan.
mysql logging only shows Connect and nothing else.
-- juha
Hello,
did you have priority less than 9 or some priority with no connection? If yes, then try the last version from git master, I just pushed a fix.
If it was not that case, let me know.
Cheers, Daniel
On 4/10/12 7:46 PM, Juha Heinanen wrote:
daniel,
i tried with latest master and the result was that db_cluster queries do not work at all.
whatever the first module in .cfg file that uses mysql is, i get these kind of errors:
Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: db_cluster [dbcl_api.c:294]: invalid mode #000 (0) Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR:<core> [db.c:389]: error in db_query Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR:<core> [db.c:428]: querying version for table dialplan Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: dialplan [dp_db.c:106]: error during table version check. Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR: dialplan [dialplan.c:200]: could not initialize data Apr 10 20:41:39 sip /usr/sbin/sip-proxy[22399]: ERROR:<core> [sr_module.c:939]: init_mod(): Error while initializing module dialplan (/usr/lib/sip-proxy/modules/dialplan.so)
in the above it was dialplan.
mysql logging only shows Connect and nothing else.
-- juha
sr-dev mailing list sr-dev@lists.sip-router.org http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-dev
Daniel-Constantin Mierla writes:
did you have priority less than 9 or some priority with no connection? If yes, then try the last version from git master, I just pushed a fix.
i had in the test:
modparam("db_cluster", "cluster", "sip_proxy_cluster=>sip_proxy_con1=9s9s;sip_proxy_con2=8s8s")')
i'll try again.
-- juha
daniel,
db_cluster worked with latest master. i'll try tomorrow what happens when connections stop/start working.
-- juha
Juha Heinanen writes:
db_cluster worked with latest master. i'll try tomorrow what happens when connections stop/start working.
unfortunately in the working test last night my sip proxy was not configured to use db_cluster. now when i started tests again i noticed that db_cluster still does not work. it does not matter if priority 9 is used or not. no queries db queries are made with this db_cluster definition:
modparam("db_cluster", "cluster", "sip_proxy_cluster=>sip_proxy_con1=8s8s;sip_proxy_con2=7s7s")')
and sip proxy does not start because it cannot get table versions:
Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: <core> [db.c:389]: error in db_query Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: <core> [db.c:428]: querying version for table dialplan Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: dialplan [dp_db.c:106]: error during table version check. Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: dialplan [dialplan.c:200]: could not initialize data Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: <core> [sr_module.c:939]: init_mod(): Error while initializing module dialplan (/usr/lib/sip-proxy/modules/dialplan.so)
-- juha
Hello,
the last commit should fix it, it was just pushed to master. An inverted condition was not properly done.
Cheers, Daniel
On 4/11/12 8:19 AM, Juha Heinanen wrote:
Juha Heinanen writes:
db_cluster worked with latest master. i'll try tomorrow what happens when connections stop/start working.
unfortunately in the working test last night my sip proxy was not configured to use db_cluster. now when i started tests again i noticed that db_cluster still does not work. it does not matter if priority 9 is used or not. no queries db queries are made with this db_cluster definition:
modparam("db_cluster", "cluster", "sip_proxy_cluster=>sip_proxy_con1=8s8s;sip_proxy_con2=7s7s")')
and sip proxy does not start because it cannot get table versions:
Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR:<core> [db.c:389]: error in db_query Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR:<core> [db.c:428]: querying version for table dialplan Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: dialplan [dp_db.c:106]: error during table version check. Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR: dialplan [dialplan.c:200]: could not initialize data Apr 11 09:14:35 sip /usr/sbin/sip-proxy[16956]: ERROR:<core> [sr_module.c:939]: init_mod(): Error while initializing module dialplan (/usr/lib/sip-proxy/modules/dialplan.so)
-- juha
Daniel-Constantin Mierla writes:
the last commit should fix it, it was just pushed to master. An inverted condition was not properly done.
now sip proxy started using db_cluster, but i'm still getting errors to syslog when i stopped higher priority mysql server at 192.98.102.13:
Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1351]: ERROR: <core> [db_query.c:156]: error while submitting query Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR: db_mysql [km_dbase.c:122]: driver error on query: Can't connect to MySQL server on '192.98.102.13' (115) Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR: <core> [db_query.c:156]: error while submitting query Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR: db_mysql [km_dbase.c:122]: driver error on query: Lost connection to MySQL server at 'reading initial communication packet', system error: 111 Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR: <core> [db_query.c:127]: error while submitting query
sip proxy does work by using lower priority mysql server.
so it looks like disabling of higher priority server is not working.
i have not added inactive_interval parameter, so the default 300 should be in effect.
-- juha
Hello,
I hope to be fixed as part of my most recent commit. The condition to reactivate the connection after inactive_interval was wrong.
Thanks for testing, Daniel
On 4/11/12 12:46 PM, Juha Heinanen wrote:
Daniel-Constantin Mierla writes:
the last commit should fix it, it was just pushed to master. An inverted condition was not properly done.
now sip proxy started using db_cluster, but i'm still getting errors to syslog when i stopped higher priority mysql server at 192.98.102.13:
Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1351]: ERROR:<core> [db_query.c:156]: error while submitting query Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR: db_mysql [km_dbase.c:122]: driver error on query: Can't connect to MySQL server on '192.98.102.13' (115) Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR:<core> [db_query.c:156]: error while submitting query Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR: db_mysql [km_dbase.c:122]: driver error on query: Lost connection to MySQL server at 'reading initial communication packet', system error: 111 Apr 11 13:38:54 sip /usr/sbin/sip-proxy[1349]: ERROR:<core> [db_query.c:127]: error while submitting query
sip proxy does work by using lower priority mysql server.
so it looks like disabling of higher priority server is not working.
i have not added inactive_interval parameter, so the default 300 should be in effect.
-- juha
Daniel-Constantin Mierla writes:
I hope to be fixed as part of my most recent commit. The condition to reactivate the connection after inactive_interval was wrong.
yes, now my tests show that db_cluster properly switches to lower priority server when higher priority dies and then after 5 minutes back to higher priority server when it is up again.
-- juha