KYC MCP Server

CTD-MCP
deploy
monitoring

alerts.yml•8.25 KiB

# Prometheus Alert Rules for KYC MCP Server groups: - name: kyc_mcp_server_alerts interval: 30s rules: # Instance down alert - alert: InstanceDown expr: up == 0 for: 2m labels: severity: critical component: infrastructure annotations: summary: "Instance {{ $labels.instance }} is down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." # High CPU usage - alert: HighCPUUsage expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 for: 5m labels: severity: warning component: system annotations: summary: "High CPU usage on {{ $labels.instance }}" description: "CPU usage is above 80% (current value: {{ $value }}%)" # Critical CPU usage - alert: CriticalCPUUsage expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95 for: 2m labels: severity: critical component: system annotations: summary: "Critical CPU usage on {{ $labels.instance }}" description: "CPU usage is above 95% (current value: {{ $value }}%)" # High memory usage - alert: HighMemoryUsage expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80 for: 5m labels: severity: warning component: system annotations: summary: "High memory usage on {{ $labels.instance }}" description: "Memory usage is above 80% (current value: {{ $value }}%)" # Critical memory usage - alert: CriticalMemoryUsage expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 95 for: 2m labels: severity: critical component: system annotations: summary: "Critical memory usage on {{ $labels.instance }}" description: "Memory usage is above 95% (current value: {{ $value }}%)" # High disk usage - alert: HighDiskUsage expr: (node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs|squashfs|vfat"} - node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs|squashfs|vfat"}) / node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs|squashfs|vfat"} * 100 > 80 for: 5m labels: severity: warning component: system annotations: summary: "High disk usage on {{ $labels.instance }}" description: "Disk usage on {{ $labels.mountpoint }} is above 80% (current value: {{ $value }}%)" # Critical disk usage - alert: CriticalDiskUsage expr: (node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs|squashfs|vfat"} - node_filesystem_avail_bytes{fstype!~"tmpfs|fuse.lxcfs|squashfs|vfat"}) / node_filesystem_size_bytes{fstype!~"tmpfs|fuse.lxcfs|squashfs|vfat"} * 100 > 90 for: 2m labels: severity: critical component: system annotations: summary: "Critical disk usage on {{ $labels.instance }}" description: "Disk usage on {{ $labels.mountpoint }} is above 90% (current value: {{ $value }}%)" # High disk I/O - alert: HighDiskIO expr: rate(node_disk_io_time_seconds_total[5m]) > 0.8 for: 5m labels: severity: warning component: system annotations: summary: "High disk I/O on {{ $labels.instance }}" description: "Disk I/O time is high on {{ $labels.device }} (current value: {{ $value }})" # Redis down - alert: RedisDown expr: redis_up == 0 for: 1m labels: severity: critical component: redis annotations: summary: "Redis is down on {{ $labels.instance }}" description: "Redis instance has been down for more than 1 minute." # High Redis memory usage - alert: HighRedisMemoryUsage expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80 for: 5m labels: severity: warning component: redis annotations: summary: "High Redis memory usage on {{ $labels.instance }}" description: "Redis memory usage is above 80% (current value: {{ $value }}%)" # High number of rejected connections - alert: HighRejectedConnections expr: rate(redis_rejected_connections_total[5m]) > 0 for: 2m labels: severity: warning component: redis annotations: summary: "Redis rejecting connections on {{ $labels.instance }}" description: "Redis is rejecting connections (rate: {{ $value }} per second)" # Application error rate - alert: HighErrorRate expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) * 100 > 5 for: 5m labels: severity: warning component: application annotations: summary: "High error rate on {{ $labels.instance }}" description: "Error rate is above 5% (current value: {{ $value }}%)" # High response time - alert: HighResponseTime expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1 for: 5m labels: severity: warning component: application annotations: summary: "High response time on {{ $labels.instance }}" description: "95th percentile response time is above 1 second (current value: {{ $value }}s)" # Rate limit exceeded frequently - alert: FrequentRateLimitExceeded expr: rate(rate_limit_exceeded_total[5m]) > 10 for: 5m labels: severity: warning component: application annotations: summary: "Frequent rate limit exceeded on {{ $labels.instance }}" description: "Rate limit is being exceeded frequently (rate: {{ $value }} per second)" # Container restart - alert: ContainerRestart expr: rate(container_last_seen[5m]) > 0 for: 1m labels: severity: warning component: docker annotations: summary: "Container {{ $labels.name }} restarted" description: "Container {{ $labels.name }} has restarted" # High container CPU usage - alert: HighContainerCPU expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80 for: 5m labels: severity: warning component: docker annotations: summary: "High CPU usage in container {{ $labels.name }}" description: "Container CPU usage is above 80% (current value: {{ $value }}%)" # High container memory usage - alert: HighContainerMemory expr: container_memory_usage_bytes / container_spec_memory_limit_bytes * 100 > 80 for: 5m labels: severity: warning component: docker annotations: summary: "High memory usage in container {{ $labels.name }}" description: "Container memory usage is above 80% (current value: {{ $value }}%)" # SSL certificate expiring soon - alert: SSLCertificateExpiringSoon expr: (ssl_certificate_expiry_seconds - time()) / 86400 < 30 for: 1h labels: severity: warning component: ssl annotations: summary: "SSL certificate expiring soon" description: "SSL certificate will expire in {{ $value }} days" # SSL certificate expired - alert: SSLCertificateExpired expr: ssl_certificate_expiry_seconds - time() < 0 for: 1m labels: severity: critical component: ssl annotations: summary: "SSL certificate expired" description: "SSL certificate has expired" # Too many open file descriptors - alert: TooManyOpenFileDescriptors expr: process_open_fds / process_max_fds * 100 > 80 for: 5m labels: severity: warning component: system annotations: summary: "Too many open file descriptors on {{ $labels.instance }}" description: "Open file descriptors usage is above 80% (current value: {{ $value }}%)"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/CTD-Techs/CTD-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

alerts.yml•8.25 KiB