# Universal Crypto MCP - Alert Rules
# Critical alerts for payment processing and system health
#
# @author nirholas
# @license Apache-2.0
groups:
# ═══════════════════════════════════════════════════════════════
# Payment Alerts
# ═══════════════════════════════════════════════════════════════
- name: payments
rules:
- alert: PaymentProcessingDown
expr: up{job="ucm-gateway"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Payment gateway is down"
description: "UCM payment gateway has been unreachable for more than 1 minute"
- alert: PaymentFailureRateHigh
expr: |
sum(rate(ucm_payments_failed_total[5m]))
/ sum(rate(ucm_payments_total[5m])) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High payment failure rate"
description: "More than 10% of payments are failing"
- alert: NoPaymentsReceived
expr: sum(increase(ucm_payments_total[1h])) == 0
for: 2h
labels:
severity: warning
annotations:
summary: "No payments received in 2 hours"
description: "The system hasn't processed any payments in the last 2 hours"
- alert: LargePaymentReceived
expr: ucm_payment_amount > 100
labels:
severity: info
annotations:
summary: "Large payment received: ${{ $value }}"
description: "A payment over $100 was received from {{ $labels.payer }}"
# ═══════════════════════════════════════════════════════════════
# Rate Limiting Alerts
# ═══════════════════════════════════════════════════════════════
- name: rate_limiting
rules:
- alert: HighRateLimitHits
expr: sum(rate(ucm_rate_limit_exceeded_total[5m])) > 100
for: 5m
labels:
severity: warning
annotations:
summary: "High rate limit violations"
description: "More than 100 rate limit violations per minute"
- alert: PotentialDDoS
expr: sum(rate(nginx_http_requests_total[1m])) > 10000
for: 2m
labels:
severity: critical
annotations:
summary: "Potential DDoS attack detected"
description: "Request rate exceeds 10,000/minute"
# ═══════════════════════════════════════════════════════════════
# Service Health Alerts
# ═══════════════════════════════════════════════════════════════
- name: service_health
rules:
- alert: MCPServerDown
expr: up{job="mcp-servers"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "MCP server {{ $labels.instance }} is down"
description: "MCP server has been unreachable for more than 1 minute"
- alert: HighResponseTime
expr: |
histogram_quantile(0.95,
sum(rate(ucm_request_duration_seconds_bucket[5m])) by (le, route)
) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "High response time on {{ $labels.route }}"
description: "95th percentile response time is over 2 seconds"
- alert: HighErrorRate
expr: |
sum(rate(ucm_requests_total{status=~"5.."}[5m]))
/ sum(rate(ucm_requests_total[5m])) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "More than 5% of requests are returning 5xx errors"
# ═══════════════════════════════════════════════════════════════
# Database Alerts
# ═══════════════════════════════════════════════════════════════
- name: database
rules:
- alert: PostgresDown
expr: pg_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL database has been unreachable for more than 1 minute"
- alert: RedisDown
expr: redis_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Redis is down"
description: "Redis cache has been unreachable for more than 1 minute"
- alert: DatabaseConnectionsHigh
expr: pg_stat_activity_count > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High database connection count"
description: "PostgreSQL has more than 80 active connections"
- alert: RedisMemoryHigh
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Redis memory usage high"
description: "Redis is using more than 90% of available memory"
# ═══════════════════════════════════════════════════════════════
# Subscription Alerts
# ═══════════════════════════════════════════════════════════════
- name: subscriptions
rules:
- alert: SubscriptionExpiringSoon
expr: |
(ucm_subscription_expires_at - time()) < 86400 * 3
labels:
severity: info
annotations:
summary: "Subscription expiring soon"
description: "Subscription for {{ $labels.wallet }} expires in less than 3 days"
- alert: NewEnterpriseSubscription
expr: increase(ucm_subscriptions_total{tier="enterprise"}[1h]) > 0
labels:
severity: info
annotations:
summary: "New enterprise subscription! 🎉"
description: "A new enterprise subscription was created"
# ═══════════════════════════════════════════════════════════════
# Revenue Alerts
# ═══════════════════════════════════════════════════════════════
- name: revenue
rules:
- alert: DailyRevenueGoalMet
expr: sum(increase(ucm_revenue_total[24h])) > 1000
labels:
severity: info
annotations:
summary: "Daily revenue goal met! 💰"
description: "Revenue in the last 24 hours exceeded $1,000"
- alert: RevenueDrop
expr: |
sum(increase(ucm_revenue_total[24h]))
< sum(increase(ucm_revenue_total[24h] offset 1d)) * 0.5
for: 1h
labels:
severity: warning
annotations:
summary: "Significant revenue drop"
description: "Revenue is down more than 50% compared to yesterday"