CodeGraph CLI MCP Server

prometheus-rules.yml•15.9 KiB

# ============================================================================= # CodeGraph Production Monitoring Rules # Prometheus alerting rules for zero-downtime deployments # ============================================================================= groups: # ========================================================================== # Application Health and Availability # ========================================================================== - name: codegraph.application.health rules: - alert: CodeGraphDown expr: up{job="codegraph-api"} == 0 for: 30s labels: severity: critical component: application runbook: "https://docs.codegraph.example.com/runbooks/application-down" annotations: summary: "CodeGraph API is down" description: "CodeGraph API instance {{ $labels.instance }} has been down for more than 30 seconds" impact: "Complete service unavailability" action: "Immediate investigation required - check pod status, logs, and resource availability" - alert: CodeGraphHealthCheckFailing expr: codegraph_health_check_status != 1 for: 2m labels: severity: warning component: health annotations: summary: "CodeGraph health check failing" description: "Health check for {{ $labels.instance }} has been failing for 2 minutes" action: "Check application logs and dependent services" - alert: CodeGraphHighErrorRate expr: | ( rate(http_requests_total{job="codegraph-api",status=~"5.."}[5m]) / rate(http_requests_total{job="codegraph-api"}[5m]) ) * 100 > 5 for: 5m labels: severity: warning component: application annotations: summary: "High error rate detected" description: "Error rate is {{ $value }}% for {{ $labels.instance }}" action: "Check application logs for error patterns and investigate root cause" # ========================================================================== # Performance and Latency # ========================================================================== - name: codegraph.performance rules: - alert: CodeGraphHighLatency expr: | histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.5 for: 10m labels: severity: warning component: performance annotations: summary: "High request latency" description: "95th percentile latency is {{ $value }}s for {{ $labels.instance }}" target: "Sub-50ms P95 latency per architecture requirements" action: "Monitor query performance, check database connections, review memory usage" - alert: CodeGraphExtremeLatency expr: | histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 2.0 for: 5m labels: severity: critical component: performance annotations: summary: "Extreme request latency detected" description: "99th percentile latency is {{ $value }}s for {{ $labels.instance }}" action: "Immediate investigation required - potential system overload or resource contention" - alert: CodeGraphLowThroughput expr: rate(http_requests_total{job="codegraph-api"}[5m]) < 1 for: 15m labels: severity: warning component: performance annotations: summary: "Unusually low request throughput" description: "Request rate is {{ $value }} req/s for {{ $labels.instance }}" action: "Check if this is expected traffic pattern or investigate connectivity issues" # ========================================================================== # Resource Utilization # ========================================================================== - name: codegraph.resources rules: - alert: CodeGraphHighMemoryUsage expr: | (process_resident_memory_bytes{job="codegraph-api"} / 1024 / 1024 / 1024) > 0.8 for: 10m labels: severity: warning component: memory annotations: summary: "High memory usage" description: "Memory usage is {{ $value }}GB for {{ $labels.instance }}" target: "Architecture target: <500MB for 100k LOC" action: "Monitor memory leaks, check cache sizes, review graph data retention" - alert: CodeGraphMemoryExhaustion expr: | (process_resident_memory_bytes{job="codegraph-api"} / 1024 / 1024 / 1024) > 1.5 for: 2m labels: severity: critical component: memory annotations: summary: "Memory exhaustion risk" description: "Memory usage is {{ $value }}GB for {{ $labels.instance }}" action: "Immediate action required - consider pod restart or traffic reduction" - alert: CodeGraphHighCPUUsage expr: rate(process_cpu_seconds_total{job="codegraph-api"}[5m]) * 100 > 80 for: 15m labels: severity: warning component: cpu annotations: summary: "High CPU utilization" description: "CPU usage is {{ $value }}% for {{ $labels.instance }}" action: "Monitor query complexity, check for infinite loops, review algorithm efficiency" - alert: CodeGraphDiskSpaceLow expr: | ( (node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs"} - node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs"}) / node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs"} ) * 100 > 85 for: 5m labels: severity: warning component: storage annotations: summary: "Low disk space" description: "Disk usage is {{ $value }}% on {{ $labels.device }}" action: "Monitor RocksDB growth, implement data retention policies, consider storage expansion" # ========================================================================== # Database and Storage # ========================================================================== - name: codegraph.database rules: - alert: CodeGraphRocksDBErrors expr: increase(rocksdb_errors_total{job="codegraph-api"}[5m]) > 0 for: 1m labels: severity: warning component: database annotations: summary: "RocksDB errors detected" description: "{{ $value }} RocksDB errors in the last 5 minutes for {{ $labels.instance }}" action: "Check RocksDB logs, verify disk health, monitor I/O metrics" - alert: CodeGraphSlowQueries expr: | histogram_quantile(0.95, rate(graph_query_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.1 for: 10m labels: severity: warning component: performance annotations: summary: "Slow graph queries detected" description: "95th percentile query time is {{ $value }}s for {{ $labels.instance }}" target: "Architecture target: sub-50ms query latency" action: "Review query patterns, check index efficiency, monitor graph size" - alert: CodeGraphCacheHitRateLow expr: | ( rate(graph_cache_hits_total{job="codegraph-api"}[10m]) / (rate(graph_cache_hits_total{job="codegraph-api"}[10m]) + rate(graph_cache_misses_total{job="codegraph-api"}[10m])) ) * 100 < 85 for: 15m labels: severity: warning component: cache annotations: summary: "Low cache hit rate" description: "Cache hit rate is {{ $value }}% for {{ $labels.instance }}" target: "Architecture target: >92% cache hit rate" action: "Review cache configuration, monitor cache eviction patterns, check memory allocation" # ========================================================================== # Vector Search Performance # ========================================================================== - name: codegraph.vector rules: - alert: CodeGraphVectorSearchSlow expr: | histogram_quantile(0.95, rate(vector_search_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.2 for: 10m labels: severity: warning component: vector-search annotations: summary: "Slow vector search performance" description: "95th percentile vector search time is {{ $value }}s for {{ $labels.instance }}" action: "Check FAISS index efficiency, monitor embedding generation, review query complexity" - alert: CodeGraphEmbeddingGenerationSlow expr: | histogram_quantile(0.95, rate(embedding_generation_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.1 for: 10m labels: severity: warning component: embeddings annotations: summary: "Slow embedding generation" description: "95th percentile embedding generation time is {{ $value }}s for {{ $labels.instance }}" target: "Architecture target: <100ms P95" action: "Monitor model performance, check CPU/GPU utilization, review batch processing" # ========================================================================== # Deployment and Release Monitoring # ========================================================================== - name: codegraph.deployment rules: - alert: CodeGraphDeploymentRollback expr: increase(deployment_rollback_total{job="codegraph-api"}[1h]) > 0 for: 1m labels: severity: critical component: deployment annotations: summary: "Deployment rollback occurred" description: "{{ $value }} rollback(s) detected in the last hour for {{ $labels.instance }}" action: "Investigate rollback cause, review deployment logs, assess system stability" - alert: CodeGraphNewVersionDeployed expr: changes(codegraph_version_info[30m]) > 0 for: 1m labels: severity: info component: deployment annotations: summary: "New version deployed" description: "New CodeGraph version deployed: {{ $labels.version }}" action: "Monitor system metrics closely for performance regressions or issues" - alert: CodeGraphPodRestartLoop expr: | rate(kube_pod_container_status_restarts_total{ container="codegraph-api" }[15m]) * 60 * 15 > 2 for: 5m labels: severity: warning component: kubernetes annotations: summary: "Pod restart loop detected" description: "Pod {{ $labels.pod }} is restarting frequently" action: "Check pod logs, review resource limits, investigate liveness/readiness probes" # ========================================================================== # Security and Compliance # ========================================================================== - name: codegraph.security rules: - alert: CodeGraphUnauthorizedAccess expr: | rate(http_requests_total{ job="codegraph-api", status="401" }[5m]) > 0.1 for: 2m labels: severity: warning component: security annotations: summary: "High rate of unauthorized access attempts" description: "{{ $value }} unauthorized requests per second for {{ $labels.instance }}" action: "Review authentication logs, check for brute force attacks, verify API key management" - alert: CodeGraphSuspiciousActivity expr: | rate(http_requests_total{ job="codegraph-api", status=~"4.." }[5m]) > 1.0 for: 5m labels: severity: warning component: security annotations: summary: "High rate of client errors" description: "{{ $value }} client errors per second for {{ $labels.instance }}" action: "Review error patterns, check for potential attacks or misconfigurations" # ========================================================================== # Business Logic and Application-Specific # ========================================================================== - name: codegraph.business rules: - alert: CodeGraphParsingSlow expr: | histogram_quantile(0.95, rate(code_parsing_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 1.0 for: 10m labels: severity: warning component: parser annotations: summary: "Code parsing is slow" description: "95th percentile parsing time is {{ $value }}s for {{ $labels.instance }}" action: "Review parser efficiency, check file size limits, monitor language-specific performance" - alert: CodeGraphGraphBuilding expr: | histogram_quantile(0.95, rate(graph_building_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 2.0 for: 15m labels: severity: warning component: graph annotations: summary: "Graph building is slow" description: "95th percentile graph building time is {{ $value }}s for {{ $labels.instance }}" action: "Monitor graph complexity, check dependency resolution, review incremental update logic" - alert: CodeGraphIndexingBacklog expr: codegraph_indexing_queue_size{job="codegraph-api"} > 100 for: 10m labels: severity: warning component: indexing annotations: summary: "Indexing queue backlog" description: "Indexing queue has {{ $value }} items pending for {{ $labels.instance }}" action: "Monitor indexing throughput, check for stuck jobs, review resource allocation" # ========================================================================== # Kubernetes and Infrastructure # ========================================================================== - name: codegraph.kubernetes rules: - alert: CodeGraphPodNotReady expr: | kube_pod_status_ready{ condition="false", pod=~"codegraph-api-.*" } == 1 for: 5m labels: severity: warning component: kubernetes annotations: summary: "Pod not ready" description: "Pod {{ $labels.pod }} has been not ready for 5 minutes" action: "Check pod status, review readiness probe configuration, investigate resource constraints" - alert: CodeGraphServiceEndpointDown expr: | up{job="codegraph-api"} == 0 for: 1m labels: severity: critical component: service annotations: summary: "Service endpoint is down" description: "CodeGraph service endpoint {{ $labels.instance }} is unreachable" action: "Immediate investigation - check service configuration, network connectivity, pod status" - alert: CodeGraphHPAMaxReplicas expr: | kube_horizontalpodautoscaler_status_current_replicas{ horizontalpodautoscaler=~"codegraph-api.*" } >= kube_horizontalpodautoscaler_spec_max_replicas{ horizontalpodautoscaler=~"codegraph-api.*" } for: 15m labels: severity: warning component: scaling annotations: summary: "HPA at maximum replicas" description: "HPA {{ $labels.horizontalpodautoscaler }} has reached maximum replicas" action: "Review scaling metrics, consider increasing max replicas or optimizing performance"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

prometheus-rules.yml•15.9 KiB