Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
prometheus-rules.yml16.2 kB
# ============================================================================= # CodeGraph Production Monitoring Rules # Prometheus alerting rules for zero-downtime deployments # ============================================================================= groups: # ========================================================================== # Application Health and Availability # ========================================================================== - name: codegraph.application.health rules: - alert: CodeGraphDown expr: up{job="codegraph-api"} == 0 for: 30s labels: severity: critical component: application runbook: "https://docs.codegraph.example.com/runbooks/application-down" annotations: summary: "CodeGraph API is down" description: "CodeGraph API instance {{ $labels.instance }} has been down for more than 30 seconds" impact: "Complete service unavailability" action: "Immediate investigation required - check pod status, logs, and resource availability" - alert: CodeGraphHealthCheckFailing expr: codegraph_health_check_status != 1 for: 2m labels: severity: warning component: health annotations: summary: "CodeGraph health check failing" description: "Health check for {{ $labels.instance }} has been failing for 2 minutes" action: "Check application logs and dependent services" - alert: CodeGraphHighErrorRate expr: | ( rate(http_requests_total{job="codegraph-api",status=~"5.."}[5m]) / rate(http_requests_total{job="codegraph-api"}[5m]) ) * 100 > 5 for: 5m labels: severity: warning component: application annotations: summary: "High error rate detected" description: "Error rate is {{ $value }}% for {{ $labels.instance }}" action: "Check application logs for error patterns and investigate root cause" # ========================================================================== # Performance and Latency # ========================================================================== - name: codegraph.performance rules: - alert: CodeGraphHighLatency expr: | histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.5 for: 10m labels: severity: warning component: performance annotations: summary: "High request latency" description: "95th percentile latency is {{ $value }}s for {{ $labels.instance }}" target: "Sub-50ms P95 latency per architecture requirements" action: "Monitor query performance, check database connections, review memory usage" - alert: CodeGraphExtremeLatency expr: | histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 2.0 for: 5m labels: severity: critical component: performance annotations: summary: "Extreme request latency detected" description: "99th percentile latency is {{ $value }}s for {{ $labels.instance }}" action: "Immediate investigation required - potential system overload or resource contention" - alert: CodeGraphLowThroughput expr: rate(http_requests_total{job="codegraph-api"}[5m]) < 1 for: 15m labels: severity: warning component: performance annotations: summary: "Unusually low request throughput" description: "Request rate is {{ $value }} req/s for {{ $labels.instance }}" action: "Check if this is expected traffic pattern or investigate connectivity issues" # ========================================================================== # Resource Utilization # ========================================================================== - name: codegraph.resources rules: - alert: CodeGraphHighMemoryUsage expr: | (process_resident_memory_bytes{job="codegraph-api"} / 1024 / 1024 / 1024) > 0.8 for: 10m labels: severity: warning component: memory annotations: summary: "High memory usage" description: "Memory usage is {{ $value }}GB for {{ $labels.instance }}" target: "Architecture target: <500MB for 100k LOC" action: "Monitor memory leaks, check cache sizes, review graph data retention" - alert: CodeGraphMemoryExhaustion expr: | (process_resident_memory_bytes{job="codegraph-api"} / 1024 / 1024 / 1024) > 1.5 for: 2m labels: severity: critical component: memory annotations: summary: "Memory exhaustion risk" description: "Memory usage is {{ $value }}GB for {{ $labels.instance }}" action: "Immediate action required - consider pod restart or traffic reduction" - alert: CodeGraphHighCPUUsage expr: rate(process_cpu_seconds_total{job="codegraph-api"}[5m]) * 100 > 80 for: 15m labels: severity: warning component: cpu annotations: summary: "High CPU utilization" description: "CPU usage is {{ $value }}% for {{ $labels.instance }}" action: "Monitor query complexity, check for infinite loops, review algorithm efficiency" - alert: CodeGraphDiskSpaceLow expr: | ( (node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs"} - node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs"}) / node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs"} ) * 100 > 85 for: 5m labels: severity: warning component: storage annotations: summary: "Low disk space" description: "Disk usage is {{ $value }}% on {{ $labels.device }}" action: "Monitor RocksDB growth, implement data retention policies, consider storage expansion" # ========================================================================== # Database and Storage # ========================================================================== - name: codegraph.database rules: - alert: CodeGraphRocksDBErrors expr: increase(rocksdb_errors_total{job="codegraph-api"}[5m]) > 0 for: 1m labels: severity: warning component: database annotations: summary: "RocksDB errors detected" description: "{{ $value }} RocksDB errors in the last 5 minutes for {{ $labels.instance }}" action: "Check RocksDB logs, verify disk health, monitor I/O metrics" - alert: CodeGraphSlowQueries expr: | histogram_quantile(0.95, rate(graph_query_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.1 for: 10m labels: severity: warning component: performance annotations: summary: "Slow graph queries detected" description: "95th percentile query time is {{ $value }}s for {{ $labels.instance }}" target: "Architecture target: sub-50ms query latency" action: "Review query patterns, check index efficiency, monitor graph size" - alert: CodeGraphCacheHitRateLow expr: | ( rate(graph_cache_hits_total{job="codegraph-api"}[10m]) / (rate(graph_cache_hits_total{job="codegraph-api"}[10m]) + rate(graph_cache_misses_total{job="codegraph-api"}[10m])) ) * 100 < 85 for: 15m labels: severity: warning component: cache annotations: summary: "Low cache hit rate" description: "Cache hit rate is {{ $value }}% for {{ $labels.instance }}" target: "Architecture target: >92% cache hit rate" action: "Review cache configuration, monitor cache eviction patterns, check memory allocation" # ========================================================================== # Vector Search Performance # ========================================================================== - name: codegraph.vector rules: - alert: CodeGraphVectorSearchSlow expr: | histogram_quantile(0.95, rate(vector_search_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.2 for: 10m labels: severity: warning component: vector-search annotations: summary: "Slow vector search performance" description: "95th percentile vector search time is {{ $value }}s for {{ $labels.instance }}" action: "Check FAISS index efficiency, monitor embedding generation, review query complexity" - alert: CodeGraphEmbeddingGenerationSlow expr: | histogram_quantile(0.95, rate(embedding_generation_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 0.1 for: 10m labels: severity: warning component: embeddings annotations: summary: "Slow embedding generation" description: "95th percentile embedding generation time is {{ $value }}s for {{ $labels.instance }}" target: "Architecture target: <100ms P95" action: "Monitor model performance, check CPU/GPU utilization, review batch processing" # ========================================================================== # Deployment and Release Monitoring # ========================================================================== - name: codegraph.deployment rules: - alert: CodeGraphDeploymentRollback expr: increase(deployment_rollback_total{job="codegraph-api"}[1h]) > 0 for: 1m labels: severity: critical component: deployment annotations: summary: "Deployment rollback occurred" description: "{{ $value }} rollback(s) detected in the last hour for {{ $labels.instance }}" action: "Investigate rollback cause, review deployment logs, assess system stability" - alert: CodeGraphNewVersionDeployed expr: changes(codegraph_version_info[30m]) > 0 for: 1m labels: severity: info component: deployment annotations: summary: "New version deployed" description: "New CodeGraph version deployed: {{ $labels.version }}" action: "Monitor system metrics closely for performance regressions or issues" - alert: CodeGraphPodRestartLoop expr: | rate(kube_pod_container_status_restarts_total{ container="codegraph-api" }[15m]) * 60 * 15 > 2 for: 5m labels: severity: warning component: kubernetes annotations: summary: "Pod restart loop detected" description: "Pod {{ $labels.pod }} is restarting frequently" action: "Check pod logs, review resource limits, investigate liveness/readiness probes" # ========================================================================== # Security and Compliance # ========================================================================== - name: codegraph.security rules: - alert: CodeGraphUnauthorizedAccess expr: | rate(http_requests_total{ job="codegraph-api", status="401" }[5m]) > 0.1 for: 2m labels: severity: warning component: security annotations: summary: "High rate of unauthorized access attempts" description: "{{ $value }} unauthorized requests per second for {{ $labels.instance }}" action: "Review authentication logs, check for brute force attacks, verify API key management" - alert: CodeGraphSuspiciousActivity expr: | rate(http_requests_total{ job="codegraph-api", status=~"4.." }[5m]) > 1.0 for: 5m labels: severity: warning component: security annotations: summary: "High rate of client errors" description: "{{ $value }} client errors per second for {{ $labels.instance }}" action: "Review error patterns, check for potential attacks or misconfigurations" # ========================================================================== # Business Logic and Application-Specific # ========================================================================== - name: codegraph.business rules: - alert: CodeGraphParsingSlow expr: | histogram_quantile(0.95, rate(code_parsing_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 1.0 for: 10m labels: severity: warning component: parser annotations: summary: "Code parsing is slow" description: "95th percentile parsing time is {{ $value }}s for {{ $labels.instance }}" action: "Review parser efficiency, check file size limits, monitor language-specific performance" - alert: CodeGraphGraphBuilding expr: | histogram_quantile(0.95, rate(graph_building_duration_seconds_bucket{job="codegraph-api"}[5m]) ) > 2.0 for: 15m labels: severity: warning component: graph annotations: summary: "Graph building is slow" description: "95th percentile graph building time is {{ $value }}s for {{ $labels.instance }}" action: "Monitor graph complexity, check dependency resolution, review incremental update logic" - alert: CodeGraphIndexingBacklog expr: codegraph_indexing_queue_size{job="codegraph-api"} > 100 for: 10m labels: severity: warning component: indexing annotations: summary: "Indexing queue backlog" description: "Indexing queue has {{ $value }} items pending for {{ $labels.instance }}" action: "Monitor indexing throughput, check for stuck jobs, review resource allocation" # ========================================================================== # Kubernetes and Infrastructure # ========================================================================== - name: codegraph.kubernetes rules: - alert: CodeGraphPodNotReady expr: | kube_pod_status_ready{ condition="false", pod=~"codegraph-api-.*" } == 1 for: 5m labels: severity: warning component: kubernetes annotations: summary: "Pod not ready" description: "Pod {{ $labels.pod }} has been not ready for 5 minutes" action: "Check pod status, review readiness probe configuration, investigate resource constraints" - alert: CodeGraphServiceEndpointDown expr: | up{job="codegraph-api"} == 0 for: 1m labels: severity: critical component: service annotations: summary: "Service endpoint is down" description: "CodeGraph service endpoint {{ $labels.instance }} is unreachable" action: "Immediate investigation - check service configuration, network connectivity, pod status" - alert: CodeGraphHPAMaxReplicas expr: | kube_horizontalpodautoscaler_status_current_replicas{ horizontalpodautoscaler=~"codegraph-api.*" } >= kube_horizontalpodautoscaler_spec_max_replicas{ horizontalpodautoscaler=~"codegraph-api.*" } for: 15m labels: severity: warning component: scaling annotations: summary: "HPA at maximum replicas" description: "HPA {{ $labels.horizontalpodautoscaler }} has reached maximum replicas" action: "Review scaling metrics, consider increasing max replicas or optimizing performance"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server