Skip to main content
Glama
layer-cache-metrics.json80.9 kB
{ "id": null, "uid": "layer-cache-metrics", "title": "LayerCache Metrics", "tags": [ "layercache", "cache", "postgres", "retry", "persistence" ], "timezone": "browser", "schemaVersion": 16, "version": 0, "refresh": "5s", "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "templating": { "list": [ { "name": "datasource", "type": "datasource", "label": "Data Source", "query": "prometheus", "refresh": 1, "hide": 0 }, { "name": "service", "type": "query", "label": "Service", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "query": "label_values({__name__=~\"layer_cache_requests_total|layer_cache_persister_write_attempted\"}, exported_job)", "refresh": 2, "sort": 1, "multi": true, "includeAll": true, "allValue": ".*" }, { "name": "cache", "type": "query", "label": "Cache", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "query": "label_values(layer_cache_persister_write_attempted{exported_job=~\"${service}\"}, cache_name)", "refresh": 2, "sort": 1, "multi": true, "includeAll": true, "allValue": ".*" }, { "name": "operation", "type": "query", "label": "Operation", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "query": "label_values(layer_cache_persistence_latency_seconds_bucket, operation)", "refresh": 2, "sort": 1, "multi": true, "includeAll": true, "allValue": ".*" } ] }, "panels": [ { "id": 200, "type": "row", "title": "Overview & System Health", "gridPos": { "x": 0, "y": 0, "w": 24, "h": 1 }, "collapsed": false }, { "id": 300, "type": "stat", "title": "System Health - Overall Error Rate", "section": "overview", "gridPos": { "x": 0, "y": 1, "w": 8, "h": 4 }, "targets": [ { "expr": "(sum(rate(layer_cache_persister_write_failed_retryable_total[5m])) + sum(rate(s3_write_attempts_total{result=~\"throttle|error_.*\"}[5m]))) / sum(rate(layer_cache_persister_write_attempted[5m]))", "refId": "A", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ], "fieldConfig": { "defaults": { "unit": "percentunit", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" }, { "value": 0.01, "color": "yellow" }, { "value": 0.05, "color": "red" } ] } } }, "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "id": 301, "type": "stat", "title": "System Health - S3 Queue Depth", "section": "overview", "gridPos": { "x": 8, "y": 1, "w": 8, "h": 4 }, "targets": [ { "expr": "sum(s3_write_queue_depth)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ], "fieldConfig": { "defaults": { "unit": "none", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" }, { "value": 100, "color": "yellow" }, { "value": 500, "color": "red" } ] } } }, "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "id": 302, "type": "stat", "title": "System Health - Retry Queue Depth", "section": "overview", "gridPos": { "x": 16, "y": 1, "w": 8, "h": 4 }, "targets": [ { "expr": "sum(layer_cache_retry_queue_depth)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ], "fieldConfig": { "defaults": { "unit": "none", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" }, { "value": 10, "color": "yellow" }, { "value": 100, "color": "red" } ] } } }, "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "id": 8, "gridPos": { "x": 0, "y": 5, "w": 24, "h": 8 }, "type": "stat", "title": "Total Items in Retry Queue", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(layer_cache_retry_queue_depth{exported_job=~\"${service}\", cache_name=~\"${cache}\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "value": null, "color": "green" }, { "value": 10, "color": "yellow" }, { "value": 100, "color": "red" } ] } } }, "options": { "textMode": "value_and_name", "colorMode": "background", "graphMode": "area", "orientation": "auto", "reduceOptions": { "values": false, "calcs": [ "lastNotNull" ] } } }, { "id": 34, "gridPos": { "x": 0, "y": 19, "w": 24, "h": 8 }, "type": "timeseries", "title": "Cache Resolution Distribution Over Time", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\", result=\"hit\"}[1m]))", "legendFormat": "Foyer", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"hit\"}[1m]))", "legendFormat": "S3", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\", result=\"hit\"}[1m]))", "legendFormat": "PostgreSQL", "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", result=\"miss\"}[1m]))", "legendFormat": "Not Found", "refId": "D" } ], "fieldConfig": { "defaults": { "custom": { "fillOpacity": 10, "lineWidth": 2, "spanNulls": false }, "unit": "reqps" } }, "options": { "legend": { "displayMode": "list", "placement": "bottom" }, "tooltip": { "mode": "multi", "sort": "desc" } } }, { "id": 35, "gridPos": { "x": 0, "y": 27, "w": 8, "h": 8 }, "type": "bargauge", "title": "${cache} Resolution by Backend", "repeat": "cache", "repeatDirection": "h", "maxPerRow": 3, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (backend) (rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=\"${cache}\"}[5m]))", "legendFormat": "{{backend}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "reqps", "min": 0, "displayName": "${__field.labels.backend}" } }, "options": { "orientation": "horizontal", "displayMode": "gradient", "showUnfilled": true, "text": { "titleSize": 12, "valueSize": 18 } } }, { "id": 201, "type": "row", "title": "Foyer Cache", "gridPos": { "x": 0, "y": 60, "w": 24, "h": 1 }, "collapsed": false }, { "id": 28, "gridPos": { "x": 0, "y": 69, "w": 24, "h": 8 }, "type": "timeseries", "title": "${cache} - End-to-End Request Latency (p50, p95, p99)", "repeat": "cache", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_request_latency_ms_bucket{exported_job=~\"${service}\", cache_name=\"${cache}\"}[$__interval])) by (le))", "legendFormat": "p50", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_request_latency_ms_bucket{exported_job=~\"${service}\", cache_name=\"${cache}\"}[$__interval])) by (le))", "legendFormat": "p95", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_request_latency_ms_bucket{exported_job=~\"${service}\", cache_name=\"${cache}\"}[$__interval])) by (le))", "legendFormat": "p99", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } }, "interval": "1m" }, { "id": 29, "gridPos": { "x": 0, "y": 77, "w": 24, "h": 8 }, "type": "timeseries", "title": "${cache} - Backend Hit Latency Comparison (p95)", "repeat": "cache", "repeatDirection": "v", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=\"${cache}\", result=\"hit\"}[1m])) by (backend, le))", "legendFormat": "{{backend}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } }, "interval": "1m" }, { "id": 30, "gridPos": { "x": 0, "y": 85, "w": 24, "h": 8 }, "type": "timeseries", "title": "Overall Cache Miss Rate", "repeatDirection": "v", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_request_latency_ms_count{result=\"miss\"}[5m])) by (cache_name) / sum(rate(layer_cache_request_latency_ms_count[5m])) by (cache_name)", "legendFormat": "{{cache_name}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "percentunit", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" }, "min": 0, "max": 1 } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 303, "type": "timeseries", "title": "Fallback Tracking (S3\u2192PostgreSQL)", "section": "overview", "gridPos": { "x": 0, "y": 35, "w": 24, "h": 8 }, "targets": [ { "expr": "sum by (exported_job, cache_name) (rate(layer_cache_read_fallback_total{from_backend=\"s3\", to_backend=\"postgres\"}[5m]))", "legendFormat": "{{exported_job}}/{{cache_name}} - S3\u2192PG fallback", "refId": "A", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ], "fieldConfig": { "defaults": { "unit": "ops", "custom": { "lineWidth": 2, "fillOpacity": 10 } } }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } } }, { "id": 202, "type": "row", "title": "S3 Persistence", "gridPos": { "x": 0, "y": 130, "w": 24, "h": 1 }, "collapsed": false }, { "id": 36, "gridPos": { "x": 0, "y": 139, "w": 12, "h": 8 }, "type": "timeseries", "title": "S3 Write Queue Depth", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (exported_job, cache_name) (s3_write_queue_depth{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"})", "legendFormat": "{{exported_job}}/{{cache_name}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } }, "alert": { "name": "S3 Queue Growing", "conditions": [ { "evaluator": { "params": [ 100 ], "type": "gt" }, "query": { "params": [ "A", "5m", "now" ] }, "type": "query" } ], "message": "S3 write queue depth exceeds 100 for 5 minutes" } }, { "id": 3801, "type": "gauge", "title": "S3 Write Attempts - Success Rate Percentage", "gridPos": { "x": 12, "y": 139, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"success\"}[5m])) / sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[5m]))", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "decimals": 2, "color": { "mode": "palette-classic" }, "thresholds": { "mode": "absolute", "steps": [ { "value": null, "color": "blue" } ] } } }, "options": { "showThresholdLabels": false, "showThresholdMarkers": false, "text": {} } }, { "id": 38, "gridPos": { "x": 0, "y": 155, "w": 12, "h": 8 }, "type": "timeseries", "title": "S3 Write Success Rate", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"success\"}[5m])) by (exported_job, cache_name)", "legendFormat": "{{exported_job}}/{{cache_name}} - Success", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"retry\"}[5m])) by (exported_job, cache_name)", "legendFormat": "{{exported_job}}/{{cache_name}} - Retry", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"dead_letter_queue\"}[5m])) by (exported_job, cache_name)", "legendFormat": "{{exported_job}}/{{cache_name}} - DLQ", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 3802, "type": "timeseries", "title": "S3 Upload Retry Rate", "gridPos": { "x": 12, "y": 155, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"retry\"}[5m])) by (exported_job, cache_name)", "legendFormat": "{{exported_job}}/{{cache_name}} - Retries", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 4001, "type": "timeseries", "title": "S3 Worker Queue Depth (Per Worker)", "gridPos": { "x": 0, "y": 147, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "s3_worker_queue_depth{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}", "legendFormat": "{{exported_job}}/{{cache_name}}/worker-{{worker_id}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 4002, "type": "timeseries", "title": "S3 Worker Active Uploads (Per Worker)", "gridPos": { "x": 12, "y": 147, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "s3_worker_active_uploads{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}", "legendFormat": "{{exported_job}}/{{cache_name}}/worker-{{worker_id}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 304, "type": "timeseries", "title": "S3 Write Error Breakdown by Category", "section": "s3", "gridPos": { "x": 0, "y": 179, "w": 24, "h": 8 }, "targets": [ { "expr": "sum by (exported_job, cache_name, result) (rate(s3_write_attempts_total{result=~\"error_.*\"}[5m]))", "legendFormat": "{{exported_job}}/{{cache_name}} - {{result}}", "refId": "A", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ], "fieldConfig": { "defaults": { "unit": "ops", "custom": { "lineWidth": 2, "fillOpacity": 10, "stacking": { "mode": "normal" } } } }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } } }, { "id": 3803, "type": "timeseries", "title": "S3 Dead Letter Queue Rate", "gridPos": { "x": 0, "y": 187, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(s3_write_attempts_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"dead_letter_queue\"}[5m])) by (exported_job, cache_name)", "legendFormat": "{{exported_job}}/{{cache_name}} - DLQ entries/sec", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 305, "type": "timeseries", "title": "S3 Dead Letter Queue Depth", "section": "s3", "gridPos": { "x": 12, "y": 187, "w": 12, "h": 8 }, "targets": [ { "expr": "sum by (exported_job, cache_name) (s3_dlq_depth)", "legendFormat": "{{exported_job}}/{{cache_name}} - DLQ depth", "refId": "A", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ], "fieldConfig": { "defaults": { "unit": "none", "custom": { "lineWidth": 2, "fillOpacity": 10 }, "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" }, { "value": 1, "color": "red" } ] } } }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } } }, { "id": 203, "type": "row", "title": "PostgreSQL Persistence", "gridPos": { "x": 0, "y": 266, "w": 24, "h": 1 }, "collapsed": false }, { "id": 11, "gridPos": { "x": 0, "y": 291, "w": 12, "h": 8 }, "type": "timeseries", "title": "Persister - Write Failures (Initial Attempts)", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_failed_retryable{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - retryable", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_failed_permanent{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - permanent", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_failed_retryable{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", error_kind!=\"\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}}/S3 - {{error_kind}}", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 14, "gridPos": { "x": 0, "y": 299, "w": 12, "h": 8 }, "type": "timeseries", "title": "Persister - Error Source (Write)", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_pg_error{exported_job=~\"${service}\", cache_name=~\"${cache}\", operation=\"write\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - PostgreSQL", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_nats_error{exported_job=~\"${service}\", cache_name=~\"${cache}\", operation=\"write\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - NATS", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_both_error{exported_job=~\"${service}\", cache_name=~\"${cache}\", operation=\"write\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - Both", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 15, "gridPos": { "x": 12, "y": 299, "w": 12, "h": 8 }, "type": "timeseries", "title": "Persister - Error Source (Evict)", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_pg_error{exported_job=~\"${service}\", cache_name=~\"${cache}\", operation=\"evict\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - PostgreSQL", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_nats_error{exported_job=~\"${service}\", cache_name=~\"${cache}\", operation=\"evict\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - NATS", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_both_error{exported_job=~\"${service}\", cache_name=~\"${cache}\", operation=\"evict\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - Both", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 16, "gridPos": { "x": 0, "y": 307, "w": 24, "h": 8 }, "type": "timeseries", "title": "Persister - Event Kind Distribution", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_event_by_kind{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - {{event_kind}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 26, "gridPos": { "x": 0, "y": 315, "w": 24, "h": 8 }, "type": "timeseries", "title": "PostgreSQL - ${operation} End-to-End Latency (p50, p95, p99)", "repeat": "operation", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_persistence_latency_seconds_bucket{operation=\"${operation}\", exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])) by (le)) * 1000", "legendFormat": "p50", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_persistence_latency_seconds_bucket{operation=\"${operation}\", exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])) by (le)) * 1000", "legendFormat": "p95", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_persistence_latency_seconds_bucket{operation=\"${operation}\", exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])) by (le)) * 1000", "legendFormat": "p99", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean", "min" ] } } }, { "id": 2, "gridPos": { "x": 0, "y": 323, "w": 12, "h": 8 }, "type": "timeseries", "title": "Retry Queue - Enqueue Rate", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_retry_queue_enqueued{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - enqueued", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 3, "gridPos": { "x": 12, "y": 323, "w": 12, "h": 8 }, "type": "timeseries", "title": "Retry Queue - Success Rate", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_retry_queue_success{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - success", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 4, "gridPos": { "x": 0, "y": 331, "w": 12, "h": 8 }, "type": "timeseries", "title": "Retry Queue - Failure Rate", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_retry_queue_failed{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - failed", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 5, "gridPos": { "x": 12, "y": 331, "w": 12, "h": 8 }, "type": "timeseries", "title": "Retry Queue - Permanent Failure Rate", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_retry_queue_permanent_failure{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - permanent failure", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 6, "gridPos": { "x": 0, "y": 339, "w": 12, "h": 8 }, "type": "timeseries", "title": "Retry Queue - Current Depth", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (exported_job, cache_name) (layer_cache_retry_queue_depth{exported_job=~\"${service}\", cache_name=~\"${cache}\"})", "legendFormat": "{{exported_job}}/{{cache_name}} - queue depth", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "auto" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 9, "gridPos": { "x": 12, "y": 339, "w": 12, "h": 8 }, "type": "timeseries", "title": "Retry Queue - Depth by Cache", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (exported_job, cache_name) (layer_cache_retry_queue_depth{exported_job=~\"${service}\", cache_name=~\"${cache}\"})", "legendFormat": "{{exported_job}}/{{cache_name}}", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 30, "showPoints": "never", "stacking": { "mode": "normal" } } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max" ] } } }, { "id": 3301, "type": "gauge", "title": "Foyer Resolution Rate", "section": "foyer", "gridPos": { "x": 0, "y": 13, "w": 8, "h": 6 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\", result=\"hit\"}[5m])) / sum(rate(layer_cache_requests_total{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[5m]))", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "color": { "mode": "fixed", "fixedColor": "blue" } } } }, { "id": 3302, "type": "gauge", "title": "S3 Resolution Rate", "section": "s3", "gridPos": { "x": 8, "y": 13, "w": 8, "h": 6 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\", result=\"hit\"}[5m])) / sum(rate(layer_cache_requests_total{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[5m]))", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "color": { "mode": "fixed", "fixedColor": "blue" } } } }, { "id": 3303, "type": "gauge", "title": "PostgreSQL Resolution Rate", "section": "postgres", "gridPos": { "x": 16, "y": 13, "w": 8, "h": 6 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(layer_cache_backend_resolved_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\", result=\"hit\"}[5m])) / sum(rate(layer_cache_requests_total{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[5m]))", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "color": { "mode": "fixed", "fixedColor": "blue" } } } }, { "id": 2701, "gridPos": { "x": 0, "y": 61, "w": 24, "h": 8 }, "type": "timeseries", "title": "Foyer - Read Latency (p50, p95, p99)", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[$__interval])) by (le))", "legendFormat": "p50 - Foyer", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[$__interval])) by (le))", "legendFormat": "p95 - Foyer", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[$__interval])) by (le))", "legendFormat": "p99 - Foyer", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } }, "interval": "1m", "section": "foyer" }, { "id": 2702, "gridPos": { "x": 0, "y": 131, "w": 24, "h": 8 }, "type": "timeseries", "title": "S3 - Read Latency (p50, p95, p99)", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[$__interval])) by (le))", "legendFormat": "p50 - S3", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[$__interval])) by (le))", "legendFormat": "p95 - S3", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[$__interval])) by (le))", "legendFormat": "p99 - S3", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } }, "interval": "1m", "section": "s3" }, { "id": 2703, "gridPos": { "x": 0, "y": 267, "w": 24, "h": 8 }, "type": "timeseries", "title": "PostgreSQL - Read Latency (p50, p95, p99)", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[$__interval])) by (le))", "legendFormat": "p50 - PostgreSQL", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[$__interval])) by (le))", "legendFormat": "p95 - PostgreSQL", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[$__interval])) by (le))", "legendFormat": "p99 - PostgreSQL", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean" ] } }, "interval": "1m", "section": "postgres" }, { "id": 3101, "gridPos": { "x": 0, "y": 101, "w": 24, "h": 8 }, "type": "heatmap", "title": "Foyer Backend - Read Latency Distribution", "section": "foyer", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(increase(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[$__interval])) by (le)", "legendFormat": "{{le}}", "refId": "A", "format": "heatmap" } ], "fieldConfig": { "defaults": { "custom": { "hideFrom": { "tooltip": false, "viz": false, "legend": false } } } }, "options": { "calculate": false, "cellGap": 2, "color": { "exponent": 0.5, "fill": "dark-orange", "mode": "scheme", "scheme": "Oranges", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-09 }, "legend": { "show": true }, "rowsFrame": { "layout": "auto" }, "tooltip": { "show": true, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "reverse": false, "unit": "ms" } }, "interval": "1m" }, { "id": 3102, "gridPos": { "x": 0, "y": 171, "w": 24, "h": 8 }, "type": "heatmap", "title": "S3 Backend - Read Latency Distribution", "section": "s3", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(increase(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[$__interval])) by (le)", "legendFormat": "{{le}}", "refId": "A", "format": "heatmap" } ], "fieldConfig": { "defaults": { "custom": { "hideFrom": { "tooltip": false, "viz": false, "legend": false } } } }, "options": { "calculate": false, "cellGap": 2, "color": { "exponent": 0.5, "fill": "dark-orange", "mode": "scheme", "scheme": "Oranges", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-09 }, "legend": { "show": true }, "rowsFrame": { "layout": "auto" }, "tooltip": { "show": true, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "reverse": false, "unit": "ms" } }, "interval": "1m" }, { "id": 3103, "gridPos": { "x": 0, "y": 275, "w": 24, "h": 8 }, "type": "heatmap", "title": "PostgreSQL Backend - Read Latency Distribution", "section": "postgres", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(increase(layer_cache_read_latency_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[$__interval])) by (le)", "legendFormat": "{{le}}", "refId": "A", "format": "heatmap" } ], "fieldConfig": { "defaults": { "custom": { "hideFrom": { "tooltip": false, "viz": false, "legend": false } } } }, "options": { "calculate": false, "cellGap": 2, "color": { "exponent": 0.5, "fill": "dark-orange", "mode": "scheme", "scheme": "Oranges", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-09 }, "legend": { "show": true }, "rowsFrame": { "layout": "auto" }, "tooltip": { "show": true, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "reverse": false, "unit": "ms" } }, "interval": "1m" }, { "id": 1002, "gridPos": { "x": 0, "y": 195, "w": 12, "h": 8 }, "type": "timeseries", "title": "S3 - Write Operations (Initial Attempts)", "section": "s3", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_attempted{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - attempted", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_success{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - success", "refId": "B" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 1003, "gridPos": { "x": 0, "y": 283, "w": 12, "h": 8 }, "type": "timeseries", "title": "PostgreSQL - Write Operations (Initial Attempts)", "section": "postgres", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_attempted{exported_job=~\"${service}\", cache_name=~\"${cache}\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - attempted", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_persister_write_success{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])", "legendFormat": "{{exported_job}}/{{cache_name}} - success", "refId": "B" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 2102, "gridPos": { "x": 12, "y": 195, "w": 12, "h": 8 }, "type": "timeseries", "title": "S3 - Write Duration (p50, p95, p99)", "section": "s3", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_persister_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le))", "legendFormat": "p50 - S3", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_persister_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le))", "legendFormat": "p95 - S3", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_persister_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le))", "legendFormat": "p99 - S3", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean", "min" ] } } }, { "id": 3804, "gridPos": { "x": 0, "y": 163, "w": 24, "h": 8 }, "type": "timeseries", "title": "S3 - Disk Write Duration (p50, p95, p99)", "section": "s3", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(s3_disk_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le))", "legendFormat": "p50 - Disk Write", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(s3_disk_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le))", "legendFormat": "p95 - Disk Write", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(s3_disk_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le))", "legendFormat": "p99 - Disk Write", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean", "min" ] } } }, { "id": 2103, "gridPos": { "x": 12, "y": 283, "w": 12, "h": 8 }, "type": "timeseries", "title": "PostgreSQL - Write Duration (p50, p95, p99)", "section": "postgres", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_persister_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])) by (le))", "legendFormat": "p50 - PostgreSQL", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_persister_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])) by (le))", "legendFormat": "p95 - PostgreSQL", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_persister_write_duration_ms_bucket{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[1m])) by (le))", "legendFormat": "p99 - PostgreSQL", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean", "min" ] } } }, { "id": 2602, "gridPos": { "x": 0, "y": 203, "w": 24, "h": 8 }, "type": "timeseries", "title": "S3 - ${operation} End-to-End Latency (p50, p95, p99)", "repeat": "operation", "section": "s3", "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum(rate(layer_cache_persistence_latency_seconds_bucket{operation=\"${operation}\", exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le)) * 1000", "legendFormat": "p50", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum(rate(layer_cache_persistence_latency_seconds_bucket{operation=\"${operation}\", exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le)) * 1000", "legendFormat": "p95", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum(rate(layer_cache_persistence_latency_seconds_bucket{operation=\"${operation}\", exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"s3\"}[1m])) by (le)) * 1000", "legendFormat": "p99", "refId": "C" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "bottom", "calcs": [ "lastNotNull", "max", "mean", "min" ] } } }, { "id": 1901, "type": "timeseries", "title": "Foyer - Evict Rate", "section": "foyer", "gridPos": { "x": 0, "y": 93, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_evictions_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[5m])", "legendFormat": "Foyer Evictions", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 1903, "type": "timeseries", "title": "PostgreSQL - Evict Rate", "section": "postgres", "gridPos": { "x": 0, "y": 347, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_evictions_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[5m])", "legendFormat": "PostgreSQL Evictions", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 2001, "type": "timeseries", "title": "Foyer - Evict Failures", "section": "foyer", "gridPos": { "x": 12, "y": 93, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_eviction_failures_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[5m])", "legendFormat": "Foyer Eviction Failures", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 2003, "type": "timeseries", "title": "PostgreSQL - Evict Failures", "section": "postgres", "gridPos": { "x": 12, "y": 347, "w": 12, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_eviction_failures_total{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[5m])", "legendFormat": "PostgreSQL Eviction Failures", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ops", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 2301, "type": "timeseries", "title": "Foyer - Evict Duration (Average)", "section": "foyer", "gridPos": { "x": 0, "y": 109, "w": 24, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_evict_duration_ms_sum{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[5m]) / rate(layer_cache_evict_duration_ms_count{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"foyer\"}[5m])", "legendFormat": "Foyer Avg Eviction Duration", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } }, { "id": 2303, "type": "timeseries", "title": "PostgreSQL - Evict Duration (Average)", "section": "postgres", "gridPos": { "x": 0, "y": 355, "w": 24, "h": 8 }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "rate(layer_cache_evict_duration_ms_sum{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[5m]) / rate(layer_cache_evict_duration_ms_count{exported_job=~\"${service}\", cache_name=~\"${cache}\", backend=\"postgres\"}[5m])", "legendFormat": "PostgreSQL Avg Eviction Duration", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { "lineWidth": 2, "fillOpacity": 10, "showPoints": "never" } } }, "options": { "tooltip": { "mode": "multi", "sort": "desc" }, "legend": { "displayMode": "table", "placement": "right", "calcs": [ "lastNotNull", "max", "mean" ] } } } ], "time": { "from": "now-15m", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ] } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/systeminit/si'

If you have feedback or need assistance with the MCP directory API, please join our Discord server