prometheus-mcp

MIT License

bench.sh•10.3 kB

#!/bin/bash # PostgreSQL AI DBA Test Journey - Full 60-Minute Black Friday Simulation # This script orchestrates the complete test scenario automatically set -o pipefail # Configuration POSTGRES_HOST=${POSTGRES_HOST:-postgres} POSTGRES_PORT=${POSTGRES_PORT:-5432} POSTGRES_DB=${POSTGRES_DB:-postgres} POSTGRES_USER=${POSTGRES_USER:-postgres} POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-mysecretpassword} # Colors for logging GREEN='\033[0;32m' BLUE='\033[0;34m' YELLOW='\033[1;33m' RED='\033[0;31m' NC='\033[0m' # Logging functions log() { echo -e "${BLUE}[$(date '+%H:%M:%S')] [INFO]${NC} $1" } log_phase() { echo -e "${GREEN}[$(date '+%H:%M:%S')] PHASE $1:${NC} $2" } log_warning() { echo -e "${YELLOW}[$(date '+%H:%M:%S')] [WARNING]${NC} $1" } log_error() { echo -e "${RED}[$(date '+%H:%M:%S')] [ERROR]${NC} $1" } # Wait for PostgreSQL to be ready wait_for_postgres() { log "Waiting for PostgreSQL to be ready..." for i in {1..60}; do if PGPASSWORD=$POSTGRES_PASSWORD psql -h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER -d $POSTGRES_DB -c "SELECT 1;" >/dev/null 2>&1; then log "PostgreSQL is ready!" return 0 fi log " Attempt $i/60 - PostgreSQL not ready yet..." sleep 5 done log_error "PostgreSQL not ready after 5 minutes" exit 1 } # Run pgbench workload in background run_workload() { local workload_file=$1 local clients=$2 local jobs=$3 local duration=$4 local description="$5" log " Starting: $description" log " File: $workload_file | Clients: $clients | Jobs: $jobs | Duration: ${duration}s" # Check if workload file exists if [[ ! -f "$workload_file" ]]; then log_error "Workload file not found: $workload_file" return 1 fi # Test database connection first if ! PGPASSWORD=$POSTGRES_PASSWORD psql -h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER -d $POSTGRES_DB -c "SELECT 1;" >/dev/null 2>&1; then log_error "Database connection failed for workload: $description" return 1 fi PGPASSWORD=$POSTGRES_PASSWORD pgbench \ -h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER -d $POSTGRES_DB \ -c $clients -j $jobs -T $duration \ -f "$workload_file" \ --progress=60 \ >/dev/null 2>&1 & } # Wait for background processes wait_for_workloads() { local pids=("$@") log " Waiting for ${#pids[@]} workload(s) to complete..." local failed=0 for pid in "${pids[@]}"; do if wait $pid; then log " Workload $pid completed successfully" else local exit_code=$? log_warning "Workload $pid failed with exit code $exit_code (continuing...)" failed=$((failed + 1)) fi done if [ $failed -eq 0 ]; then log " All workloads completed successfully" else log_warning "$failed workload(s) had issues, but continuing test journey" fi } # Show progress during phases show_progress() { local phase_name="$1" local duration=$2 local interval=30 local elapsed=0 while [ $elapsed -lt $duration ]; do sleep $interval elapsed=$((elapsed + interval)) local remaining=$((duration - elapsed)) log " $phase_name - ${elapsed}s elapsed, ${remaining}s remaining..." done } # Main test journey main() { log "Starting PostgreSQL AI DBA Test Journey" log "Scenario: ShopFast E-commerce Black Friday" log "Duration: 60 minutes (3600 seconds)" log "Goal: Generate realistic performance issues for AI analysis" echo wait_for_postgres # Initialize pgbench tables (idempotent) log "Initializing pgbench tables..." PGPASSWORD=$POSTGRES_PASSWORD pgbench \ -h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER -d $POSTGRES_DB \ -i -s 1 >/dev/null 2>&1 || true echo log "Beginning Black Friday simulation..." echo # ============================================================================ # PHASE 1: Normal Operations (0-10 minutes) # ============================================================================ log_phase "1" "Normal Operations (10 minutes)" log "Expected: Baseline performance, healthy metrics" run_workload "normal_operations.sql" 10 2 600 "Regular shopping activity" pid1=$! show_progress "Normal Operations" 600 & progress_pid1=$! wait_for_workloads $pid1 kill $progress_pid1 2>/dev/null || true log "30-second buffer between phases..." sleep 30 echo # ============================================================================ # PHASE 2: Traffic Surge (10-20 minutes) # ============================================================================ log_phase "2" "Traffic Surge - Black Friday Begins! (10 minutes)" log "Expected: 3x traffic increase, slight performance degradation" run_workload "heavy_browsing.sql" 30 4 600 "Heavy browsing traffic" pid2a=$! run_workload "product_searches.sql" 15 2 600 "Product search surge" pid2b=$! show_progress "Traffic Surge" 600 & progress_pid2=$! wait_for_workloads $pid2a $pid2b kill $progress_pid2 2>/dev/null || true log "30-second buffer between phases..." sleep 30 echo # ============================================================================ # PHASE 3: Lock Contention Storm (20-35 minutes) # ============================================================================ log_phase "3" "Lock Contention Storm [HIGH LOAD] (15 minutes)" log "Expected: Inventory deadlocks, long-running transactions" log "This should trigger CRITICAL ALERTS in your AI agent!" run_workload "inventory_updates.sql" 5 1 900 "Inventory management locks" pid3a=$! run_workload "purchase_attempts.sql" 20 4 900 "Competing purchase flows" pid3b=$! show_progress "Lock Contention Storm" 900 & progress_pid3=$! wait_for_workloads $pid3a $pid3b kill $progress_pid3 2>/dev/null || true log "30-second buffer between phases..." sleep 30 echo # ============================================================================ # PHASE 4: Query Performance Collapse (30-45 minutes) # ============================================================================ log_phase "4" "Query Performance Collapse [CRITICAL] (15 minutes)" log "Expected: Slow queries, sequential scans, cache misses" log "AI should identify specific problematic queries" run_workload "expensive_analytics.sql" 15 3 900 "Resource-intensive analytics" pid4a=$! run_workload "unoptimized_searches.sql" 10 2 900 "Unoptimized search patterns" pid4b=$! show_progress "Query Performance Collapse" 900 & progress_pid4=$! wait_for_workloads $pid4a $pid4b kill $progress_pid4 2>/dev/null || true log "30-second buffer between phases..." sleep 30 echo # ============================================================================ # PHASE 5: Connection Pool Exhaustion (40-55 minutes) # ============================================================================ log_phase "5" "Connection Pool Exhaustion [CRITICAL] (15 minutes)" log "Expected: Connection limit approached, idle connections" log "This should trigger CRITICAL ALERTS - connection leak detected!" run_workload "connection_leak.sql" 70 10 900 "Mobile app connection leak" pid5=$! show_progress "Connection Pool Exhaustion" 900 & progress_pid5=$! wait_for_workloads $pid5 kill $progress_pid5 2>/dev/null || true log "30-second buffer between phases..." sleep 30 echo # ============================================================================ # PHASE 6: Recovery & Stabilization (50-60 minutes) # ============================================================================ log_phase "6" "Recovery & Stabilization [RECOVERY] (10 minutes)" log "Expected: Metrics return to normal, system stabilizes" log "AI should detect recovery and provide post-incident analysis" run_workload "recovery_load.sql" 15 3 600 "Gradual load reduction" pid6=$! show_progress "Recovery & Stabilization" 600 & progress_pid6=$! wait_for_workloads $pid6 kill $progress_pid6 2>/dev/null || true echo log "Black Friday simulation completed successfully!" echo log "What happened during the 60-minute journey:" log " - Phase 1 (0-10m): Normal baseline operations" log " - Phase 2 (10-20m): 3x traffic surge begins" log " - Phase 3 (20-35m): Lock contention storm [HIGH LOAD]" log " - Phase 4 (35-50m): Query performance collapse [CRITICAL]" log " - Phase 5 (50-65m): Connection pool exhaustion [CRITICAL]" log " - Phase 6 (65-75m): Recovery and stabilization [RECOVERY]" echo log "Now it's time for your AI DBA agent to analyze the data!" log "Access Prometheus: http://localhost:9090" log "Access Grafana: http://localhost:3000 (admin/admin123)" log "Use prompts in: ./prompts/ directory" echo log "Key metrics to investigate:" log " - pg_database_numbackends (connection count)" log " - pg_locks_count (lock contention)" log " - pg_stat_statements_mean_time (query performance)" log " - pg_database_blks_hit / pg_database_blks_read (cache efficiency)" echo log "The load generator will now keep running with minimal background activity..." # Keep container alive with minimal background load while true; do sleep 300 # 5 minutes PGPASSWORD=$POSTGRES_PASSWORD pgbench \ -h $POSTGRES_HOST -p $POSTGRES_PORT -U $POSTGRES_USER -d $POSTGRES_DB \ -c 2 -j 1 -T 60 \ -f "normal_operations.sql" \ >/dev/null 2>&1 || true done } # Handle shutdown gracefully cleanup() { log "Shutting down load generator..." # Kill any remaining background processes jobs -p | xargs -r kill 2>/dev/null || true exit 0 } trap cleanup SIGTERM SIGINT main "$@"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/idanfishman/prometheus-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server