Hi Daniel,
On my initial test in prod, I can see already discrepancies:
root@kamailio2:~# kamctl rpc dlg.stats_active
{
"jsonrpc": "2.0",
"result": {
"starting": 4,
"connecting": 55,
"answering": 2,
"ongoing": 253,
"all": 314
},
"id": 23360
}
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551604",
"dialog:early_dialogs = 15",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 120",
"dialog:processed_dialogs = 340",
root@kamailio2:~#
Now we can go past 9223372036854776000 ??? I don't understand that.
That was right after restarting (green active dialogs, yellow early dialogs):
Here is multiple runs of the stats.get_statistics:
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551614",
"dialog:early_dialogs = 8",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 315",
"dialog:processed_dialogs = 863",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551614",
"dialog:early_dialogs = 8",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 315",
"dialog:processed_dialogs = 864",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551614",
"dialog:early_dialogs = 8",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 315",
"dialog:processed_dialogs = 864",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551613",
"dialog:early_dialogs = 10",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 320",
"dialog:processed_dialogs = 866",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 0",
"dialog:early_dialogs = 8",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 322",
"dialog:processed_dialogs = 870",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 1",
"dialog:early_dialogs = 9",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 323",
"dialog:processed_dialogs = 873",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551615",
"dialog:early_dialogs = 9",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 324",
"dialog:processed_dialogs = 875",
root@kamailio2:~# kamctl rpc stats.get_statistics all | grep dialog
"dialog:active_dialogs = 18446744073709551615",
"dialog:early_dialogs = 10",
"dialog:expired_dialogs = 0",
"dialog:failed_dialogs = 325",
"dialog:processed_dialogs = 876",
root@kamailio2:~#
I tried to have a script gathering metrics:
root@kamailio2:~# cat dialogs.sh
#!/bin/bash
while true; do
DIALOGS=`kamctl rpc dlg.list | grep h_entry | sort | wc -l`
METRICS=`kamctl rpc stats.get_statistics all | grep -e "dialog:active" -e "dialog:early" | tr -d " " | tr -d "\n"`
NEWMETRICS=`kamctl rpc dlg.stats_active | jq -c .result`
echo "`date +'%Y-%m-%d %H:%M:%S.%N'` - dlg.list:$DIALOGS - stats.get_statistics:$METRICS - dlg.stats_active:$NEWMETRICS"
sleep 1
done
root@kamailio2:~#
But, after a little kamailio "starts slowing down" until it dies (had to completely reboot the server).
root@kamailio2:~# ./dialogs.sh
2018-07-13 10:57:14.204158716 - dlg.list:419 - stats.get_statistics:"dialog:active_dialogs=13","dialog:early_dialogs=12", - dlg.stats_active:{"starting":5,"connecting":62,"answering":7,"ongoing":288,"all":362}
2018-07-13 10:57:15.437165579 - dlg.list:422 - stats.get_statistics:"dialog:active_dialogs=15","dialog:early_dialogs=11", - dlg.stats_active:{"starting":4,"connecting":59,"answering":10,"ongoing":289,"all":362}
2018-07-13 10:57:16.678432892 - dlg.list:423 - stats.get_statistics:"dialog:active_dialogs=14","dialog:early_dialogs=9", - dlg.stats_active:{"starting":2,"connecting":58,"answering":9,"ongoing":289,"all":358}
2018-07-13 10:57:17.915458776 - dlg.list:422 - stats.get_statistics:"dialog:active_dialogs=13","dialog:early_dialogs=9", - dlg.stats_active:{"starting":4,"connecting":60,"answering":7,"ongoing":289,"all":360}
2018-07-13 10:57:19.149467641 - dlg.list:420 - stats.get_statistics:"dialog:active_dialogs=13","dialog:early_dialogs=8", - dlg.stats_active:{"starting":3,"connecting":60,"answering":5,"ongoing":290,"all":358}
2018-07-13 10:57:20.386457513 - dlg.list:415 - stats.get_statistics:"dialog:active_dialogs=14","dialog:early_dialogs=7", - dlg.stats_active:{"starting":4,"connecting":59,"answering":5,"ongoing":290,"all":358}
2018-07-13 10:57:21.627241266 - dlg.list:412 - stats.get_statistics:"dialog:active_dialogs=15","dialog:early_dialogs=7", - dlg.stats_active:{"starting":3,"connecting":60,"answering":6,"ongoing":292,"all":361}
2018-07-13 10:57:22.867957903 - dlg.list:415 - stats.get_statistics:"dialog:active_dialogs=14","dialog:early_dialogs=9", - dlg.stats_active:{"starting":2,"connecting":64,"answering":5,"ongoing":291,"all":362}
2018-07-13 10:57:24.099604068 - dlg.list:418 - stats.get_statistics:"dialog:active_dialogs=13","dialog:early_dialogs=10", - dlg.stats_active:{"starting":2,"connecting":65,"answering":3,"ongoing":294,"all":364}
2018-07-13 10:57:33.049706118 - dlg.list:411 - stats.get_statistics:"dialog:active_dialogs=8","dialog:early_dialogs=6", - dlg.stats_active:{"starting":4,"connecting":62,"answering":5,"ongoing":287,"all":358}
2018-07-13 10:57:34.290398926 - dlg.list:418 - stats.get_statistics:"dialog:active_dialogs=8","dialog:early_dialogs=6", - dlg.stats_active:{"starting":9,"connecting":62,"answering":5,"ongoing":287,"all":363}
2018-07-13 10:57:35.531092078 - dlg.list:419 - stats.get_statistics:"dialog:active_dialogs=8","dialog:early_dialogs=6", - dlg.stats_active:{"starting":9,"connecting":62,"answering":5,"ongoing":287,"all":363}
2018-07-13 10:57:36.762143019 - dlg.list:419 - stats.get_statistics:"dialog:active_dialogs=8","dialog:early_dialogs=6", - dlg.stats_active:{"starting":9,"connecting":62,"answering":5,"ongoing":287,"all":363}
2018-07-13 10:57:38.006519396 - dlg.list:420 - stats.get_statistics:"dialog:active_dialogs=8","dialog:early_dialogs=6", - dlg.stats_active:{"starting":10,"connecting":62,"answering":5,"ongoing":287,"all":364}
2018-07-13 10:57:39.254513350 - dlg.list:420 - stats.get_statistics:"dialog:active_dialogs=8","dialog:early_dialogs=6", - dlg.stats_active:{"starting":10,"connecting":62,"answering":5,"ongoing":287,"all":364}
2018-07-13 10:57:55.779414400 - dlg.list:431 - stats.get_statistics:"dialog:active_dialogs=2","dialog:early_dialogs=6", - dlg.stats_active:{"starting":8,"connecting":72,"answering":10,"ongoing":272,"all":362}
^C
root@kamailio2:~#
As you can see it starts taking >10s to reply some times, which I believe means it is dying, leaving a core and restarting:
root@kamailio2:/var/tmp# ls -lrt
total 47781984
-rw------- 1 root kamailio 8607031296 Jul 13 10:59 core.kamailio.24376.13cn4.1531504766
-rw------- 1 root kamailio 2128039936 Jul 13 11:01 core.kamailio.28422.13cn4.1531504860
-rw------- 1 root kamailio 2081087488 Jul 13 11:01 core.kamailio.28420.13cn4.1531504860
-rw------- 1 root kamailio 2136645632 Jul 13 11:01 core.kamailio.28421.13cn4.1531504860
-rw------- 1 root kamailio 2174545920 Jul 13 11:01 core.kamailio.28423.13cn4.1531504860
-rw------- 1 root kamailio 5265956864 Jul 13 11:07 core.kamailio.1335.13cn4.1531505030
-rw------- 1 root kamailio 5309550592 Jul 13 11:07 core.kamailio.1339.13cn4.1531505030
-rw------- 1 root kamailio 5294923776 Jul 13 11:07 core.kamailio.1336.13cn4.1531505030
-rw------- 1 root kamailio 5329113088 Jul 13 11:07 core.kamailio.1334.13cn4.1531505030
-rw------- 1 root kamailio 5338894336 Jul 13 11:07 core.kamailio.1338.13cn4.1531505030
-rw------- 1 root kamailio 5262925824 Jul 13 11:07 core.kamailio.1337.13cn4.1531505030
root@kamailio2:/var/tmp#
I'm grabbing the bt full
and info locals
from all and I'll add them here shortly. I will have to rollback until you have time to check all this.
—
You are receiving this because you are subscribed to this thread.
Reply to this email directly, view it on GitHub, or mute the thread.