Commit 761d42b1 authored by Jake Miles's avatar Jake Miles
Browse files

measures message in-flight time using in-memory hash table

parent 608b4f0d
......@@ -2,13 +2,59 @@ const _ = require('lodash')
const metrics = require('./metrics')
const inFlight = metrics.channelMessagesInFlight
const durations = metrics.channelMessageDuration
function launch(channel, recipient) {
inFlight.inc({ channel })
/*
A hash table of timers, one per recipient message sent.
Each key is channel+recipient, and each value is an array of stop-timer lambdas
provided by prom-client to record the duration of the timer.
Note that this could consume considerable memory
if many thousands of messages (message = channel+recipient)
are in flight at once.
A scalable alternative would be to store each channel+recipient's launch timestamp
in a data store, and retrieve it to record the duration at land time.
Using a data store for queued messages would also provide durability if used instead
of DBUS as signald's queue, so worth exploring overall.
*/
const timers = {}
function startTimer(channel, recipient) {
const key = channel+recipient
timers[key] = [...timers[key] || [],
durations.startTimer({ channel, recipient })]
}
function stopTimer(channel, recipient) {
const key = channel+recipient
// since these come from outside, let's not assume this can't happen
if (! timers[key])
console.error("Received message receipt for a message without a timer!")
else {
const stop = timers[key][0]
stop()
const remaining = timers[key].slice(1)
if (! remaining)
delete timers[key]
else
timers[key] = remaining
}
}
function launch(channel, recipient) {
inFlight.inc({ channel })
startTimer(channel, recipient)
}
function land(channel, recipient) {
function land(channel, recipient) {
inFlight.dec({ channel })
stopTimer(channel, recipient)
}
module.exports = {
......
......@@ -18,12 +18,20 @@ const channelMessagesInFlight = new prometheus.Gauge(register({
labelNames: ['channel']
}))
const channelMessageDuration = new prometheus.Gauge(register({
name: 'channel_message_duration',
help: 'An estimate of the duration of each channel+recipient message broadcast ' +
'from sending it to signald to receiving isReceipt: true from signald.',
labelNames: ['channel', 'recipient']
}))
module.exports = {
registry,
collectDefaults,
signaldMessages,
channelMessages,
channelMessagesInFlight
channelMessagesInFlight,
channelMessageDuration
}
......
......@@ -12,7 +12,7 @@ const {
const run = async (db, sock) => {
logger.log('--- Initializing Registrar...')
// metrics.collectDefaults()
metrics.collectDefaults()
logger.log('----- Starting collection of default prometheus metrics for Registrar service.')
logger.log(`----- Starting Registrar api server...`)
......
......@@ -23,7 +23,6 @@
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The number of messages per channel that signalboost has sent but not yet received receipt of from signald. Average in flight messages per second over the last hour.",
"fieldConfig": {
"defaults": {
"custom": {}
......@@ -39,7 +38,7 @@
"y": 0
},
"hiddenSeries": false,
"id": 20,
"id": 22,
"legend": {
"avg": false,
"current": false,
......@@ -65,7 +64,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(channel_messages_in_flight[1h])",
"expr": "avg(channel_message_duration)",
"interval": "",
"legendFormat": "",
"refId": "A"
......@@ -75,7 +74,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "channel_messages_in_flight: avg / sec over last 1h",
"title": "channel_message_duration: avg ",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -117,8 +116,8 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Purple is process heap in bytes. Lower yellow line is nodejs heap used.",
"datasource": "Prometheus",
"description": "Time at 1.00 is time app is up.",
"fieldConfig": {
"defaults": {
"custom": {}
......@@ -128,13 +127,13 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"h": 9,
"w": 12,
"x": 12,
"y": 0
},
"hiddenSeries": false,
"id": 8,
"id": 2,
"legend": {
"avg": false,
"current": false,
......@@ -160,29 +159,17 @@
"steppedLine": false,
"targets": [
{
"expr": "nodejs_heap_size_used_bytes",
"expr": "up",
"interval": "",
"legendFormat": "",
"refId": "A"
},
{
"expr": "nodejs_heap_space_size_available_bytes",
"interval": "",
"legendFormat": "",
"refId": "B"
},
{
"expr": "process_heap_bytes",
"interval": "",
"legendFormat": "",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Heap available / used",
"title": "Uptime",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -225,6 +212,7 @@
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "The number of messages per channel that signalboost has sent but not yet received receipt of from signald. Average in flight messages per second over the last hour.",
"fieldConfig": {
"defaults": {
"custom": {}
......@@ -240,7 +228,7 @@
"y": 8
},
"hiddenSeries": false,
"id": 18,
"id": 20,
"legend": {
"avg": false,
"current": false,
......@@ -266,7 +254,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(signald_message_count[1h]) ",
"expr": "rate(channel_messages_in_flight[1h])",
"interval": "",
"legendFormat": "",
"refId": "A"
......@@ -276,7 +264,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "signald_message_count (avg / sec over last 1h)",
"title": "channel_messages_in_flight: avg / sec over last 1h",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -319,6 +307,7 @@
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Purple is process heap in bytes. Lower yellow line is nodejs heap used.",
"fieldConfig": {
"defaults": {
"custom": {}
......@@ -331,10 +320,10 @@
"h": 8,
"w": 12,
"x": 12,
"y": 8
"y": 9
},
"hiddenSeries": false,
"id": 6,
"id": 8,
"legend": {
"avg": false,
"current": false,
......@@ -360,23 +349,29 @@
"steppedLine": false,
"targets": [
{
"expr": "process_open_fds",
"expr": "nodejs_heap_size_used_bytes",
"interval": "",
"legendFormat": "",
"refId": "A"
},
{
"expr": "process_max_fds",
"expr": "nodejs_heap_space_size_available_bytes",
"interval": "",
"legendFormat": "",
"refId": "B"
},
{
"expr": "process_heap_bytes",
"interval": "",
"legendFormat": "",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "File descriptors: max / open",
"title": "Heap available / used",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -434,7 +429,7 @@
"y": 16
},
"hiddenSeries": false,
"id": 16,
"id": 18,
"legend": {
"avg": false,
"current": false,
......@@ -460,7 +455,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(channel_message_count[1h])",
"expr": "rate(signald_message_count[1h]) ",
"interval": "",
"legendFormat": "",
"refId": "A"
......@@ -470,7 +465,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "channel_message_count (avg / sec over last 1h)",
"title": "signald_message_count (avg / sec over last 1h)",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -525,10 +520,10 @@
"h": 8,
"w": 12,
"x": 12,
"y": 16
"y": 17
},
"hiddenSeries": false,
"id": 10,
"id": 6,
"legend": {
"avg": false,
"current": false,
......@@ -554,17 +549,23 @@
"steppedLine": false,
"targets": [
{
"expr": "nodejs_active_requests_total",
"expr": "process_open_fds",
"interval": "",
"legendFormat": "",
"refId": "A"
},
{
"expr": "process_max_fds",
"interval": "",
"legendFormat": "",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Process active requests",
"title": "File descriptors: max / open",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -606,8 +607,7 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"description": "Time at 1.00 is time app is up.",
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
......@@ -617,13 +617,13 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"hiddenSeries": false,
"id": 2,
"id": 16,
"legend": {
"avg": false,
"current": false,
......@@ -649,7 +649,7 @@
"steppedLine": false,
"targets": [
{
"expr": "up",
"expr": "rate(channel_message_count[1h])",
"interval": "",
"legendFormat": "",
"refId": "A"
......@@ -659,7 +659,101 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Uptime",
"title": "channel_message_count (avg / sec over last 1h)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 25
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "nodejs_active_requests_total",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Process active requests",
"tooltip": {
"shared": true,
"sort": 0,
......@@ -714,7 +808,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 33
"y": 32
},
"hiddenSeries": false,
"id": 14,
......@@ -808,7 +902,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 41
"y": 40
},
"hiddenSeries": false,
"id": 12,
......@@ -912,5 +1006,5 @@
"timezone": "",
"title": "Signalboost node app",
"uid": "fV4wThmMk",
"version": 7
"version": 8
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment