Skip to main content
Glama
audit-log-pipeline-metrics.json14.2 kB
{ "id": null, "uid": "audit-log-pipeline-metrics", "title": "Audit Log Pipeline Metrics", "tags": [ "audit-logs", "pipeline", "observability" ], "timezone": "browser", "schemaVersion": 16, "version": 0, "refresh": "5s", "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "templating": { "list": [ { "name": "datasource", "type": "datasource", "label": "Data Source", "query": "prometheus", "refresh": 1, "hide": 0 }, { "name": "service", "type": "query", "label": "Service", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "query": "label_values(pending_events_published_total, exported_job)", "refresh": 2, "sort": 1, "multi": true, "includeAll": true, "allValue": ".*" } ] }, "panels": [ { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 100, "panels": [], "title": "Health", "type": "row" }, { "id": 1, "gridPos": { "x": 0, "y": 1, "w": 8, "h": 4 }, "type": "stat", "title": "Total Pending Events Published", "description": "Cumulative count of audit log messages written to the pending_events stream across all event sessions. These events are queued and waiting to be shuttled to the audit_logs stream.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(pending_events_published_total{exported_job=~\"$service\"})", "legendFormat": "Pending Events Published", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" } } }, "options": { "textMode": "value", "colorMode": "value", "graphMode": "area", "orientation": "auto", "reduceOptions": { "values": false, "calcs": [ "lastNotNull" ] } } }, { "id": 2, "gridPos": { "x": 8, "y": 1, "w": 8, "h": 4 }, "type": "stat", "title": "Total Audit Events Published", "description": "Cumulative count of audit log messages successfully shuttled from the pending_events stream to the audit_logs stream. Represents completed audit log publications.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(audit_logs_shuttled_total{exported_job=~\"$service\"})", "legendFormat": "Audit Events Published", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" } } }, "options": { "textMode": "value", "colorMode": "value", "graphMode": "area", "orientation": "auto", "reduceOptions": { "values": false, "calcs": [ "lastNotNull" ] } } }, { "id": 3, "gridPos": { "x": 16, "y": 1, "w": 8, "h": 4 }, "type": "stat", "title": "Current Audit Consumer Lag", "description": "Number of pending audit log messages waiting to be consumed by the audit consumer service. Higher values indicate the consumer is falling behind. Thresholds: green (<100), yellow (100-499), red (≥500).", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "audit_logs_consumer_lag{exported_job=~\"$service\"}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "value": null, "color": "green" }, { "value": 100, "color": "yellow" }, { "value": 500, "color": "red" } ] } } }, "options": { "textMode": "value", "colorMode": "background", "graphMode": "area", "orientation": "auto", "reduceOptions": { "values": false, "calcs": [ "lastNotNull" ] } } }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, "id": 101, "panels": [], "title": "Throughput", "type": "row" }, { "id": 4, "gridPos": { "x": 0, "y": 6, "w": 12, "h": 8 }, "type": "timeseries", "title": "Pending Events Published Rate", "description": "Rate of audit log messages being written to the pending_events stream per second. Shows the throughput of event generation across services.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(pending_events_published_total{exported_job=~\"$service\"}[1m])", "legendFormat": "{{exported_job}} - pending published", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 5, "gridPos": { "x": 12, "y": 6, "w": 12, "h": 8 }, "type": "timeseries", "title": "Audit Events Published Rate", "description": "Rate of audit log messages being shuttled to the audit_logs stream per second. This should closely track the pending events published rate under normal operation.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(audit_logs_shuttled_total{exported_job=~\"$service\"}[1m])", "legendFormat": "{{exported_job}} - audit published", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 6, "gridPos": { "x": 0, "y": 14, "w": 24, "h": 8 }, "type": "timeseries", "title": "Pending Publish vs Audit Publish Rate Comparison", "description": "Side-by-side comparison of pending event publication rate versus audit event publication rate. Helps identify bottlenecks in the shuttle pipeline. Rates should be approximately equal under normal conditions.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(pending_events_published_total{exported_job=~\"$service\"}[1m])", "legendFormat": "{{exported_job}} - pending published", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(audit_logs_shuttled_total{exported_job=~\"$service\"}[1m])", "legendFormat": "{{exported_job}} - audit published", "refId": "B" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }, "id": 102, "panels": [], "title": "Audit Consumer Lag", "type": "row" }, { "id": 7, "gridPos": { "x": 0, "y": 23, "w": 12, "h": 8 }, "type": "timeseries", "title": "Audit Consumer Lag Over Time", "description": "Historical trend of audit consumer lag showing how quickly audit logs are being processed. Sustained increases indicate the consumer cannot keep up with the production rate.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "audit_logs_consumer_lag{exported_job=~\"$service\"}", "legendFormat": "{{exported_job}} - audit consumer lag", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 50, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean", "min" ] } } }, { "id": 8, "gridPos": { "x": 12, "y": 23, "w": 12, "h": 8 }, "type": "timeseries", "title": "Audit Consumer Lag with Pending Publish Rate Context", "description": "Consumer lag plotted alongside the pending event publication rate to correlate lag spikes with periods of high throughput. Useful for capacity planning and identifying performance issues.", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "audit_logs_consumer_lag{exported_job=~\"$service\"}", "legendFormat": "{{exported_job}} - audit consumer lag", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(pending_events_published_total{exported_job=~\"$service\"}[1m])", "legendFormat": "{{exported_job}} - pending publish rate", "refId": "B" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } }, "overrides": [ { "matcher": { "id": "byRegexp", "options": ".*pending publish rate" }, "properties": [ { "id": "custom.axisPlacement", "value": "right" } ] } ] }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } } ], "time": { "from": "now-15m", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ] } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/systeminit/si'

If you have feedback or need assistance with the MCP directory API, please join our Discord server