Security MCP Server

alerts.yml•7.55 KiB

groups: - name: general-availability rules: - alert: TargetDown expr: up == 0 for: 2m labels: severity: warning category: availability annotations: summary: "Target down: {{ $labels.job }} on {{ $labels.instance }}" description: "Prometheus target {{ $labels.job }} ({{ $labels.instance }}) has been down for 2 minutes." - alert: MCPServerDown expr: up{job="mcp-server"} == 0 for: 1m labels: severity: critical category: availability annotations: summary: "MCP server metrics endpoint down" description: "The mcp-server metrics endpoint {{ $labels.instance }} is unreachable for 1 minute." - name: mcp-reliability rules: - alert: MCPFailureRateHigh expr: | sum by (tool) (rate(mcp_tool_execution_total{success="false"}[10m])) / clamp_min(sum by (tool) (rate(mcp_tool_execution_total[10m])), 1e-6) > 0.2 for: 10m labels: severity: warning category: reliability annotations: summary: "High failure rate for {{ $labels.tool }}" description: "Failure ratio > 20% over 10m for tool {{ $labels.tool }}." - alert: MCPFailureRateSevere expr: | sum by (tool) (rate(mcp_tool_execution_total{success="false"}[10m])) / clamp_min(sum by (tool) (rate(mcp_tool_execution_total[10m])), 1e-6) > 0.5 for: 10m labels: severity: critical category: reliability annotations: summary: "Severe failure rate for {{ $labels.tool }}" description: "Failure ratio > 50% over 10m for tool {{ $labels.tool }}." - alert: MCPTimeoutRateHigh expr: | sum by (tool) (rate(mcp_tool_execution_total{timed_out="true"}[10m])) / clamp_min(sum by (tool) (rate(mcp_tool_execution_total[10m])), 1e-6) > 0.1 for: 10m labels: severity: warning category: reliability annotations: summary: "Timeouts > 10% for {{ $labels.tool }}" description: "Timeout rate exceeds 10% over 10m for tool {{ $labels.tool }}." - alert: MCPValidationErrorsObserved expr: | sum by (tool) (rate(mcp_tool_errors_total{error_type="validation_error"}[10m])) > 0 for: 10m labels: severity: info category: reliability annotations: summary: "Input validation errors for {{ $labels.tool }}" description: "Sustained validation errors over 10m. Investigate client inputs or configuration." - alert: MCPExecutionErrorsSpike expr: | sum by (tool, error_type) (rate(mcp_tool_errors_total{error_type!~"validation_error|timeout"}[10m])) > 0.05 for: 10m labels: severity: warning category: reliability annotations: summary: "Execution errors for {{ $labels.tool }} (type={{ $labels.error_type }})" description: "Non-validation, non-timeout errors > 0.05/s over 10m." - name: mcp-latency rules: - alert: MCPLatencyP95High expr: | histogram_quantile( 0.95, sum by (le, tool) (rate(mcp_tool_execution_seconds_bucket[5m])) ) > 5 for: 10m labels: severity: warning category: performance annotations: summary: "High p95 latency for {{ $labels.tool }}" description: "p95 execution time > 5s for 10m." - alert: MCPLatencyP99Severe expr: | histogram_quantile( 0.99, sum by (le, tool) (rate(mcp_tool_execution_seconds_bucket[5m])) ) > 10 for: 10m labels: severity: critical category: performance annotations: summary: "Severe p99 latency for {{ $labels.tool }}" description: "p99 execution time > 10s for 10m." - name: mcp-concurrency rules: - alert: MCPActiveExecutionsHigh expr: mcp_tool_active > 4 for: 10m labels: severity: warning category: capacity annotations: summary: "High active executions for {{ $labels.tool }}" description: "Active executions > 4 for 10m. Review concurrency limits or workload spikes." - alert: MCPActiveExecutionsCritical expr: mcp_tool_active > 8 for: 10m labels: severity: critical category: capacity annotations: summary: "Critical active executions for {{ $labels.tool }}" description: "Active executions > 8 for 10m. Possible runaway concurrency." - name: container-observability rules: - alert: MCPContainerMemoryPressure expr: | (container_memory_usage_bytes{container="mcp-server"} / container_spec_memory_limit_bytes{container="mcp-server"}) > 0.9 for: 10m labels: severity: warning category: infrastructure annotations: summary: "High memory usage in mcp-server container" description: "mcp-server memory usage > 90% of limit for 10m." - alert: MCPContainerMemoryCritical expr: | (container_memory_usage_bytes{container="mcp-server"} / container_spec_memory_limit_bytes{container="mcp-server"}) > 0.95 for: 10m labels: severity: critical category: infrastructure annotations: summary: "Critical memory usage in mcp-server container" description: "mcp-server memory usage > 95% of limit for 10m." - name: node-infrastructure rules: - alert: NodeHighCPU expr: 1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) > 0.8 for: 10m labels: severity: warning category: infrastructure annotations: summary: "High CPU on {{ $labels.instance }}" description: "CPU usage > 80% for 10m." - alert: NodeLowMemoryAvailable expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) < 0.2 for: 10m labels: severity: warning category: infrastructure annotations: summary: "Low memory available on {{ $labels.instance }}" description: "Available memory < 20% for 10m." - name: prometheus-self rules: - alert: PrometheusRuleEvaluationErrors expr: rate(prometheus_rule_evaluation_failures_total[5m]) > 0 for: 5m labels: severity: warning category: observability annotations: summary: "Prometheus rule evaluation errors" description: "Rule evaluation failures observed over 5m." # Optional (enable if breaker metric exists): # - name: mcp-circuit-breaker # rules: # - alert: MCPCircuitBreakerOpenLabeled # expr: mcp_circuit_breaker_state{state="open"} == 1 # for: 5m # labels: # severity: critical # category: reliability # annotations: # summary: "Circuit breaker OPEN for {{ $labels.tool }}" # description: "Breaker has been OPEN for 5m for tool {{ $labels.tool }}." # # - alert: MCPCircuitBreakerOpenNumeric # expr: mcp_circuit_breaker_state >= 2 # for: 5m # labels: # severity: critical # category: reliability # annotations: # summary: "Circuit breaker OPEN (numeric) for {{ $labels.tool }}" # description: "Breaker numeric state indicates OPEN for 5m."

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nordeim/Security-MCP-Server-v3'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

alerts.yml•7.55 KiB