#version: "3.9" # Supported by both podman-compose and Docker Compose v2+
###############################################################################
# HOST SYSTEM TUNING FOR LOAD TESTING (run before docker compose up)
# See docs/docs/testing/performance.md for full details
#
# One-liner (TCP + VM + I/O tuning):
# sudo sysctl -w net.core.somaxconn=65535 net.core.netdev_max_backlog=65535 net.ipv4.tcp_max_syn_backlog=65535 net.ipv4.tcp_tw_reuse=1 net.ipv4.tcp_fin_timeout=15 net.ipv4.ip_local_port_range="1024 65535" vm.swappiness=10 fs.aio-max-nr=1048576
#
# Make persistent: sudo tee /etc/sysctl.d/99-mcp-loadtest.conf (see docs)
###############################################################################
###############################################################################
# NETWORKS + VOLUMES - declared first so they can be referenced later
###############################################################################
networks:
mcpnet: # Single user-defined bridge network keeps traffic private
driver: bridge
volumes: # Named volumes survive podman-compose down/up
pgdata:
# pgdata18: # Enable for postgres 18+
mariadbdata:
mysqldata:
mongodata:
pgadmindata:
redisinsight_data:
nginx_cache:
grafanadata:
prometheusdata:
lokidata:
###############################################################################
# CORE SERVICE - MCP Gateway
###############################################################################
services:
# ──────────────────────────────────────────────────────────────────────
# Nginx Caching Proxy - High-performance reverse proxy with CDN-like caching
# ──────────────────────────────────────────────────────────────────────
nginx:
build:
context: ./infra/nginx
dockerfile: Dockerfile
image: mcpgateway/nginx-cache:latest
restart: unless-stopped
ports:
- "8080:80" # HTTP caching proxy (public-facing)
networks: [mcpnet]
depends_on:
gateway:
condition: service_healthy
volumes:
- nginx_cache:/var/cache/nginx # Persistent cache storage
- ./infra/nginx/nginx.conf:/etc/nginx/nginx.conf:ro # Mount config as read-only
# TCP kernel tuning for 3000 concurrent connections
# Note: net.core.* sysctls are host-level and cannot be set per-container
# Only net.ipv4.* sysctls that are network-namespace aware work here
sysctls:
- net.ipv4.tcp_fin_timeout=15 # Faster cleanup of FIN_WAIT2 sockets
- net.ipv4.ip_local_port_range=1024 65535 # More ephemeral ports for upstream
ulimits:
nofile:
soft: 65535
hard: 65535
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
deploy:
resources:
limits:
cpus: '4'
memory: 1G
reservations:
cpus: '2'
memory: 512M
# ──────────────────────────────────────────────────────────────────────
# MCP Gateway - the main API server for the MCP stack
# ──────────────────────────────────────────────────────────────────────
gateway:
image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} # Use the local latest image. Run `make docker-prod` to build it.
#image: ghcr.io/ibm/mcp-context-forge:1.0.0-BETA-1 # Use the release MCP Context Forge image
#image: ghcr.io/ibm/mcp-context-forge:0.7.0 # Testing migration from 0.7.0
build:
context: .
dockerfile: Containerfile.lite # Same one the Makefile builds
restart: unless-stopped
# NOTE: When using replicas > 1, access via nginx:8080 instead of direct port 4444
# ports:
# - "4444:4444" # Disabled for multi-replica mode
networks: [mcpnet]
# ──────────────────────────────────────────────────────────────────────
# Environment - pick ONE database URL line, comment the rest
# ──────────────────────────────────────────────────────────────────────
environment:
# ═══════════════════════════════════════════════════════════════════════════
# HTTP Server Selection: gunicorn vs granian
# ═══════════════════════════════════════════════════════════════════════════
# Performance comparison (2500 concurrent users, PostgreSQL backend):
# Gunicorn: ~2.7GB RAM, ~740% CPU, no backpressure (queues unbounded)
# Granian: ~4.0GB RAM, ~680% CPU, native backpressure (rejects excess with 503)
#
# Choose Gunicorn for: memory-constrained environments (32% less RAM)
# Choose Granian for: load spike protection, bursty traffic (graceful degradation)
# Both achieve same RPS when database is the bottleneck.
# ═══════════════════════════════════════════════════════════════════════════
- HTTP_SERVER=granian # Rust-based, native backpressure, +47% memory, -8% CPU
# - HTTP_SERVER=gunicorn # Python-based, battle-tested, lower memory usage
- HOST=0.0.0.0
- PORT=4444
# Transport: sse, streamablehttp, http, or all (default: all)
- TRANSPORT_TYPE=streamablehttp
# Database connection: Via PgBouncer (default) or direct PostgreSQL
# PgBouncer provides connection pooling for better performance under high concurrency
- DATABASE_URL=postgresql+psycopg://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@pgbouncer:6432/mcp
# Direct PostgreSQL connection (bypass PgBouncer - increase DB_POOL_SIZE if using):
# - DATABASE_URL=postgresql+psycopg://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp
# SQLAlchemy query logging (useful for N+1 detection; noisy under load)
# NOTE: SQLALCHEMY_ECHO logs at INFO; set LOG_LEVEL=INFO/DEBUG to see output.
- SQLALCHEMY_ECHO=false
# - DATABASE_URL=mysql+pymysql://mysql:${MYSQL_PASSWORD:-changeme}@mariadb:3306/mcp
# - DATABASE_URL=mysql+pymysql://admin:${MARIADB_PASSWORD:-changeme}@mariadb:3306/mcp
# - DATABASE_URL=mongodb://admin:${MONGO_PASSWORD:-changeme}@mongodb:27017/mcp
- CACHE_TYPE=redis # backend for caching (memory, redis, database, or none)
- REDIS_URL=redis://redis:6379/0
# Redis parser: hiredis (C extension ~83x faster for large responses)
- REDIS_PARSER=hiredis
# Redis connection pool tuning for load testing (32 workers × 150 = 4800 < 5000 maxclients)
- REDIS_MAX_CONNECTIONS=150
- REDIS_SOCKET_TIMEOUT=5.0
- REDIS_SOCKET_CONNECT_TIMEOUT=5.0
- REDIS_HEALTH_CHECK_INTERVAL=30
# ═══════════════════════════════════════════════════════════════════════════
# Redis Startup Resilience (prevents crash-loop on Redis outage)
# ═══════════════════════════════════════════════════════════════════════════
# With exponential backoff: 2s, 4s, 8s, 16s, 30s (capped), 30s...
# 30 retries = ~5 minutes total wait before worker gives up
- REDIS_MAX_RETRIES=30 # Max attempts before worker exits (default: 30)
- REDIS_RETRY_INTERVAL_MS=2000 # Base interval, grows exponentially with jitter
- REDIS_MAX_BACKOFF_SECONDS=30 # Max backoff cap (jitter ±25% applied after)
# Auth Cache Configuration (reduces DB queries per auth request from 3-4 to 0-1)
- AUTH_CACHE_ENABLED=${AUTH_CACHE_ENABLED:-true}
- AUTH_CACHE_USER_TTL=300
- AUTH_CACHE_REVOCATION_TTL=120
- AUTH_CACHE_TEAM_TTL=300
- AUTH_CACHE_ROLE_TTL=300
- AUTH_CACHE_BATCH_QUERIES=true
- AUTH_CACHE_TEAMS_TTL=300
# Registry Cache Configuration (reduces DB queries for list endpoints)
- REGISTRY_CACHE_ENABLED=true
- REGISTRY_CACHE_TOOLS_TTL=300
- REGISTRY_CACHE_PROMPTS_TTL=300
- REGISTRY_CACHE_RESOURCES_TTL=300
- REGISTRY_CACHE_AGENTS_TTL=300
- REGISTRY_CACHE_SERVERS_TTL=300
- REGISTRY_CACHE_GATEWAYS_TTL=300
- REGISTRY_CACHE_CATALOG_TTL=300
# Admin Stats Cache Configuration (reduces aggregate queries for dashboard)
- ADMIN_STATS_CACHE_ENABLED=true
- ADMIN_STATS_CACHE_SYSTEM_TTL=60
- ADMIN_STATS_CACHE_OBSERVABILITY_TTL=30
- ADMIN_STATS_CACHE_TAGS_TTL=120
- ADMIN_STATS_CACHE_PLUGINS_TTL=120
- ADMIN_STATS_CACHE_PERFORMANCE_TTL=60
# Team member count cache (reduces N+1 queries)
- TEAM_MEMBER_COUNT_CACHE_ENABLED=true
- TEAM_MEMBER_COUNT_CACHE_TTL=300
# Metrics aggregation cache (reduces full table scans, see #1906)
- METRICS_CACHE_ENABLED=true
- METRICS_CACHE_TTL_SECONDS=120
# MCP Server Health Check
# Interval in seconds between health checks (default: 300)
- HEALTH_CHECK_INTERVAL=300
# Timeout in seconds for each health check request (default: 5)
- HEALTH_CHECK_TIMEOUT=5
# Consecutive failures before marking gateway offline (default: 3)
- UNHEALTHY_THRESHOLD=3
# Gateway URL validation timeout in seconds (default: 5)
- GATEWAY_VALIDATION_TIMEOUT=5
# Max concurrent health checks per worker (default: 10)
- MAX_CONCURRENT_HEALTH_CHECKS=10
# JWT Configuration - Choose ONE approach:
# Option 1: HMAC (Default - Simple deployments)
- JWT_ALGORITHM=HS256
- JWT_SECRET_KEY=my-test-key
# Option 2: RSA (Production - Asymmetric, uncomment and generate certs)
# - JWT_ALGORITHM=RS256
# - JWT_PUBLIC_KEY_PATH=/app/certs/jwt/public.pem
# - JWT_PRIVATE_KEY_PATH=/app/certs/jwt/private.pem
- JWT_AUDIENCE=mcpgateway-api
- JWT_ISSUER=mcpgateway
- EMAIL_AUTH_ENABLED=true
- PLATFORM_ADMIN_EMAIL=admin@example.com
- PLATFORM_ADMIN_PASSWORD=changeme
- REQUIRE_TOKEN_EXPIRATION=false
- REQUIRE_JTI=${REQUIRE_JTI:-false}
- REQUIRE_USER_IN_DB=${REQUIRE_USER_IN_DB:-false}
- MCPGATEWAY_UI_ENABLED=true
- MCPGATEWAY_ADMIN_API_ENABLED=true
# Security configuration (using defaults)
- ENVIRONMENT=development
- SECURITY_HEADERS_ENABLED=true
- CORS_ALLOW_CREDENTIALS=true
- SECURE_COOKIES=false
## Uncomment to enable HTTPS
# - SSL=true
# - CERT_FILE=/app/certs/cert.pem
# - KEY_FILE=/app/certs/key.pem
# - KEY_FILE_PASSWORD=${KEY_FILE_PASSWORD} # Optional: Set in .env for passphrase-protected keys
# Uncomment to enable plugins
- PLUGINS_ENABLED=false
# Uncomment to enable catalog
- MCPGATEWAY_CATALOG_ENABLED=true
- MCPGATEWAY_CATALOG_FILE=/app/mcp-catalog.yml
# Authentication configuration
- AUTH_REQUIRED=true
- MCP_CLIENT_AUTH_ENABLED=true
- TRUST_PROXY_AUTH=false
# Logging configuration
# NOTE: LOG_LEVEL=INFO/DEBUG is required for SQLALCHEMY_ECHO output.
- LOG_LEVEL=${LOG_LEVEL:-ERROR} # Required for SQLALCHEMY_ECHO output during load testing
- DISABLE_ACCESS_LOG=true # Disable uvicorn access logs for performance (massive I/O overhead)
# Template auto-reload disabled for performance (prevents re-parsing templates on each request)
- TEMPLATES_AUTO_RELOAD=false
- STRUCTURED_LOGGING_DATABASE_ENABLED=false # Disable DB logging for performance (use true only for debugging)
# Audit trail logging - disabled by default for performance
# WARNING: Causes a DB write on EVERY API request - can generate millions of rows during load testing!
- AUDIT_TRAIL_ENABLED=false # Set to true for compliance requirements (SOC2, HIPAA, etc.)
# Security event logging - disabled by default for performance
# WARNING: "all" level logs every request and causes massive DB write load
- SECURITY_LOGGING_ENABLED=false # Set to true to enable security event logging
- SECURITY_LOGGING_LEVEL=failures_only # Options: all, failures_only, high_severity
# Performance optimizations - disable CPU-intensive middlewares
# NOTE: Keep compression enabled when running without nginx that already has compression
# Disabling causes throughput drop due to larger payloads
- COMPRESSION_ENABLED=false
# Disable optional middlewares for maximum throughput
- VALIDATION_MIDDLEWARE_ENABLED=true
- CORRELATION_ID_ENABLED=false
- OBSERVABILITY_ENABLED=false
# ═══════════════════════════════════════════════════════════════════════════
# Database Connection Pool Configuration
# ═══════════════════════════════════════════════════════════════════════════
# Pool class options:
# - "null": NullPool - no application pooling, PgBouncer handles all pooling (recommended)
# - "queue": QueuePool - application-side pooling (use with direct PostgreSQL)
# - "auto": Automatic - NullPool if PgBouncer detected in URL, else QueuePool
#
# WITH PgBouncer (default in docker-compose):
# Option A: NullPool - safest, eliminates stale connection errors, ~10% slower
# - DB_POOL_CLASS=null
# Option B: QueuePool + pre_ping - better performance, validates before use
- DB_POOL_CLASS=queue
- DB_POOL_PRE_PING=true # Validate connections before use (SELECT 1)
- DB_POOL_SIZE=20 # Pool size per worker
- DB_MAX_OVERFLOW=10 # Extra connections under load
- DB_POOL_TIMEOUT=60 # Time to wait for connection before failing
- DB_POOL_RECYCLE=60 # Recycle before PgBouncer CLIENT_IDLE_TIMEOUT (half of 120s)
# ═══════════════════════════════════════════════════════════════════════════
# Database Startup Resilience (prevents crash-loop on DB outage)
# ═══════════════════════════════════════════════════════════════════════════
# With exponential backoff: 2s, 4s, 8s, 16s, 30s (capped), 30s...
# 30 retries = ~5 minutes total wait before worker gives up
- DB_MAX_RETRIES=30 # Max attempts before worker exits (default: 30)
- DB_RETRY_INTERVAL_MS=2000 # Base interval, grows exponentially with jitter
- DB_MAX_BACKOFF_SECONDS=30 # Max backoff cap (jitter ±25% applied after)
# Tool configuration for high-concurrency load testing
- TOOL_TIMEOUT=60 # Seconds before tool invocation times out
- MAX_TOOL_RETRIES=3 # Retry attempts for failed tool invocations
- TOOL_RATE_LIMIT=60000 # Max tool invocations per minute
- TOOL_CONCURRENT_LIMIT=1000 # Max concurrent tool invocations
- FEDERATION_TIMEOUT=30
# ═══════════════════════════════════════════════════════════════════════════
# HTTPX Client Connection Pool Configuration
# ═══════════════════════════════════════════════════════════════════════════
# Shared HTTP client for all outbound requests (federation, health checks,
# A2A, SSO, catalog). Provides ~20x better performance than per-request clients.
- HTTPX_MAX_CONNECTIONS=200 # Total connections in pool (default: 200)
- HTTPX_MAX_KEEPALIVE_CONNECTIONS=100 # Keepalive connections (default: 100)
- HTTPX_KEEPALIVE_EXPIRY=30.0 # Idle connection expiry (seconds)
- HTTPX_CONNECT_TIMEOUT=5.0 # TCP connection timeout (seconds)
- HTTPX_READ_TIMEOUT=120.0 # Response read timeout (seconds, high for slow tools)
- HTTPX_WRITE_TIMEOUT=30.0 # Request write timeout (seconds)
- HTTPX_POOL_TIMEOUT=10.0 # Wait for available connection (seconds)
- HTTPX_HTTP2_ENABLED=false # HTTP/2 support (requires server support)
- HTTPX_ADMIN_READ_TIMEOUT=30.0 # Admin UI/health check timeout (seconds)
# Worker and server tuning for high-concurrency load testing
- GUNICORN_WORKERS=16
# ═══════════════════════════════════════════════════════════════════════════
# Granian Backpressure Configuration (used when HTTP_SERVER=granian)
# ═══════════════════════════════════════════════════════════════════════════
# Backpressure provides overload protection by rejecting excess requests with
# immediate 503 responses instead of queuing them (which can cause OOM/timeouts).
# Total capacity = GRANIAN_WORKERS × GRANIAN_BACKPRESSURE = 16 × 128 = 2048 concurrent
# Requests beyond this limit receive immediate 503 (no queuing, no OOM)
- GRANIAN_WORKERS=16
- GRANIAN_BACKLOG=4096
- GRANIAN_BACKPRESSURE=128
- GRANIAN_HTTP1_BUFFER_SIZE=524288
- GRANIAN_RESPAWN_FAILED=true
# HTTP/2: Granian supports native HTTP/2 multiplexing, but not useful here because:
# - nginx sits in front and downgrades to HTTP/1.1 for upstream connections
# - nginx open-source doesn't support HTTP/2 to backends (only nginx Plus does)
# - Internal Docker network is fast enough that HTTP/2 gains are negligible
# To use HTTP/2, either bypass nginx or use Granian with TLS directly.
# - GRANIAN_HTTP=2
# ═══════════════════════════════════════════════════════════════════════════
# MCP Session Pool Configuration
# ═══════════════════════════════════════════════════════════════════════════
# Session pooling for MCP ClientSessions reduces per-request overhead from
# ~20ms to ~1-2ms (10-20x improvement). Sessions are isolated per user/tenant
# via identity hashing to prevent cross-user session sharing.
- MCP_SESSION_POOL_ENABLED=true # Enable session pooling (default: false, enabled for docker-compose)
- MCP_SESSION_POOL_MAX_PER_KEY=50 # Max sessions per (URL, identity, transport)
- MCP_SESSION_POOL_TTL=300.0 # Session TTL in seconds (default: 300)
- MCP_SESSION_POOL_HEALTH_CHECK_INTERVAL=60.0 # Idle time before health check (default: 60)
- MCP_SESSION_POOL_ACQUIRE_TIMEOUT=60.0 # Timeout waiting for session slot (default: 30)
- MCP_SESSION_POOL_CREATE_TIMEOUT=30.0 # Timeout creating new session (default: 30)
- MCP_SESSION_POOL_CIRCUIT_BREAKER_THRESHOLD=5 # Failures before circuit opens
- MCP_SESSION_POOL_CIRCUIT_BREAKER_RESET=60.0 # Seconds before circuit resets
- MCP_SESSION_POOL_IDLE_EVICTION=600.0 # Evict idle pool keys after (default: 600)
- MCP_SESSION_POOL_TRANSPORT_TIMEOUT=30.0 # Timeout for all HTTP operations (default: 30)
- MCP_SESSION_POOL_EXPLICIT_HEALTH_RPC=false # Force RPC on health checks (default: false)
# Configurable health check chain - ordered list of methods to try (JSON array)
# Options: ping, list_tools, list_prompts, list_resources, skip
# - MCP_SESSION_POOL_HEALTH_CHECK_METHODS=["ping", "skip"] # Try ping, skip if unsupported
- MCP_SESSION_POOL_HEALTH_CHECK_METHODS=["skip"] # skip, highest performance
- MCP_SESSION_POOL_HEALTH_CHECK_TIMEOUT=5.0 # Timeout per health check attempt
# ═══════════════════════════════════════════════════════════════════════════
# Execution Metrics Recording
# ═══════════════════════════════════════════════════════════════════════════
# Controls tool/resource/prompt/server/A2A execution metrics (one DB row per operation).
# Disable when using external observability to improve performance.
# Set to true if you need per-operation metrics in the database.
# Note: Does NOT affect log aggregation (METRICS_AGGREGATION_ENABLED) or Prometheus.
- DB_METRICS_RECORDING_ENABLED=false
# ═══════════════════════════════════════════════════════════════════════════
# Metrics Configuration
# ═══════════════════════════════════════════════════════════════════════════
# Raw metrics are deleted after hourly rollups exist (default: 1 hour retention).
# Rollups preserve all analytics (counts, p50/p95/p99) for 365 days.
#
# If using external observability (ELK, Datadog, Splunk), raw metrics are
# redundant - your external platform handles debugging and audit trails.
#
# Configurable settings (uncomment to override defaults):
# - METRICS_DELETE_RAW_AFTER_ROLLUP=true # Delete raw after rollup (default)
# - METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS=1 # Raw retention when rollup exists
# - METRICS_CLEANUP_INTERVAL_HOURS=1 # Cleanup frequency (default: hourly)
# - METRICS_RETENTION_DAYS=7 # Fallback retention (rollup disabled)
#
# For debugging without external observability, increase raw retention:
# - METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS=168 # Keep raw data 7 days
# Phoenix Observability Integration (uncomment when using Phoenix)
# - PHOENIX_ENDPOINT=${PHOENIX_ENDPOINT:-http://phoenix:6006}
# - OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT:-http://phoenix:4317}
# - OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-mcp-gateway}
# - OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-otlp}
# - OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-otlp}
# - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES:-deployment.environment=docker,service.namespace=mcp}
# TCP kernel tuning for high-concurrency MCP tool invocations
# Each tool call creates a new connection → many TIME_WAIT sockets
sysctls:
- net.ipv4.tcp_fin_timeout=15 # Faster cleanup of FIN_WAIT2 sockets (default: 60)
- net.ipv4.ip_local_port_range=1024 65535 # More ephemeral ports (default: 32768-60999)
ulimits:
nofile:
soft: 65535
hard: 65535
depends_on: # Default stack: PgBouncer + Redis (PgBouncer depends on Postgres)
pgbouncer:
condition: service_healthy # ▶ wait for connection pooler
redis:
condition: service_started
# Direct PostgreSQL (uncomment if bypassing PgBouncer):
# postgres:
# condition: service_healthy
# migration:
# condition: service_completed_successfully
healthcheck:
## Uncomment for HTTP healthcheck
test: ["CMD", "python3", "-c", "import urllib.request; import json; resp = urllib.request.urlopen('http://localhost:4444/health', timeout=5); data = json.loads(resp.read()); exit(0 if data.get('status') == 'healthy' else 1)"]
## Uncomment for HTTPS healthcheck
# test: ["CMD", "curl", "-f", "https://localhost:4444/health"]
## Uncomment to skip SSL validation (self-signed certs)
# test: ["CMD", "curl", "-fk", "https://localhost:4444/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
# Scaling options:
# - Single instance: use port 4444 directly, replicas: 1
# - Multi-instance: comment out ports, set replicas: 2+, access via nginx:8080
# ──────────────────────────────────────────────────────────────────────
# Server Engine Selection (Default: Granian - Rust-based HTTP server)
# ──────────────────────────────────────────────────────────────────────
# Default is Granian. For Gunicorn with Uvicorn workers:
# command: ["./run-gunicorn.sh"]
deploy:
mode: replicated
replicas: 3
resources:
limits:
cpus: '8'
memory: 8G
reservations:
cpus: '4'
memory: 4G
# ──────────────────────────────────────────────────────────────────────
# Volume Mounts
# ──────────────────────────────────────────────────────────────────────
# Uncomment to mount catalog configuration and SSL certificates
# volumes:
# - ./mcp-catalog.yml:/app/mcp-catalog.yml:ro # mount catalog configuration
# - ./certs:/app/mcpgateway/certs:ro # mount certs folder read-only (includes both SSL and JWT keys)
#
# SSL/TLS Certificate Setup:
# 1. Generate certificates:
# - Without passphrase: make certs
# - With passphrase: make certs-passphrase
# 2. Uncomment the volumes mount above
# 3. Set SSL environment variables
# 4. If using passphrase-protected key, set KEY_FILE_PASSWORD in .env file
#
# For JWT asymmetric keys:
# 1. Generate keys: make certs-jwt
# 2. Uncomment volumes mount above
# 3. Switch JWT_ALGORITHM to RS256 and uncomment JWT_*_KEY_PATH variables
###############################################################################
# DATABASES - enable ONE of these blocks and adjust DATABASE_URL
###############################################################################
postgres:
image: postgres:18
ulimits:
nofile:
soft: 8192
hard: 8192
ports:
- "5433:5432" # Expose for baseline load testing (5433 to avoid conflict with local postgres)
# Performance tuning for high-load testing (3000 sustained users)
# WITH PgBouncer (default): 800 connections provides headroom for 700 pool + system overhead
# DIRECT connection mode: increase to 4000 for (3 replicas × 16 workers × 80 pool)
command:
- "postgres"
- "-c"
- "max_connections=800" # Must exceed PgBouncer MAX_DB_CONNECTIONS (700) + overhead
- "-c"
- "shared_buffers=512MB"
- "-c"
- "work_mem=16MB"
- "-c"
- "effective_cache_size=1536MB"
- "-c"
- "maintenance_work_mem=128MB"
- "-c"
- "checkpoint_completion_target=0.9"
- "-c"
- "wal_buffers=16MB"
- "-c"
- "random_page_cost=1.1"
- "-c"
- "effective_io_concurrency=200"
- "-c"
- "max_worker_processes=8" # Total background workers (must be >= max_parallel_workers)
- "-c"
- "max_parallel_workers_per_gather=4" # Max workers per query's parallel operation
- "-c"
- "max_parallel_workers=8" # Total parallel workers available system-wide
# === HIGH-CONCURRENCY TUNING (3000 users) ===
# CRITICAL: idle_in_transaction_session_timeout prevents connection starvation
# Application code now properly closes transactions via get_db() commit-on-success pattern
# This timeout is a safety net for any edge cases
- "-c"
- "idle_in_transaction_session_timeout=300s" # Kill stuck transactions after 300s (aligned with PgBouncer)
- "-c"
- "statement_timeout=120s" # Kill runaway queries after 120s
- "-c"
- "synchronous_commit=off" # Async WAL writes (2-10x faster commits)
- "-c"
- "commit_delay=100" # Batch commits within 100μs window
# ═══════════════════════════════════════════════════════════════════════════
# AUTOVACUUM TUNING - High-insert workloads (metrics tables)
# ═══════════════════════════════════════════════════════════════════════════
# High insert rates cause dead tuple accumulation. These settings help
# PostgreSQL keep up with table bloat from metrics writes.
# Uncomment if experiencing performance degradation under sustained load:
# - "-c"
# - "autovacuum_naptime=30s" # Check more frequently (default: 60s)
# - "-c"
# - "autovacuum_vacuum_scale_factor=0.05" # Vacuum at 5% dead tuples (default: 0.2)
# - "-c"
# - "autovacuum_vacuum_cost_limit=1000" # More vacuum work per cycle (default: 200)
# === PG_STAT_STATEMENTS + AUTO_EXPLAIN ===
# Query performance tracking and slow query plan logging
# NOTE: Both extensions must be in the SAME shared_preload_libraries line!
# After enabling, run in psql:
# CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
# SELECT * FROM pg_stat_statements ORDER BY total_exec_time DESC LIMIT 10;
# - "-c"
# - "shared_preload_libraries=pg_stat_statements"
# - "shared_preload_libraries=pg_stat_statements,auto_explain" # Use this line to enable both
# - "-c"
# - "pg_stat_statements.track=all"
# - "-c"
#- "pg_stat_statements.max=10000"
# AUTO_EXPLAIN settings (uncomment if using combined shared_preload_libraries above)
# - "-c"
# - "auto_explain.log_min_duration=1000"
# - "-c"
# - "auto_explain.log_analyze=on"
# === ROLLBACK DEBUGGING (disabled for performance) ===
# - "-c"
# - "log_min_error_statement=error"
# - "-c"
# - "log_min_messages=warning"
# - "-c"
# - "log_error_verbosity=verbose"
# - "-c"
# - "log_line_prefix=%t [%p]: user=%u,db=%d,app=%a,client=%h "
# - "-c"
# - "log_lock_waits=on"
# - "-c"
# - "deadlock_timeout=1s"
# - "-c"
# - "log_temp_files=0"
# - "-c"
# - "log_checkpoints=on"
# - "-c"
# - "log_connections=on"
# - "-c"
# - "log_disconnections=on"
# - "-c"
# - "idle_in_transaction_session_timeout=60s"
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=mysecretpassword
- POSTGRES_DB=mcp
volumes:
# - pgdata:/var/lib/postgresql/data
- pgdata:/var/lib/postgresql # Enable for postgres 18+
networks: [mcpnet]
healthcheck:
test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER"]
interval: 30s
timeout: 5s
retries: 5
start_period: 20s
deploy:
resources:
limits:
cpus: '4'
memory: 8G
reservations:
cpus: '2'
memory: 2G
# ──────────────────────────────────────────────────────────────────────
# PgBouncer - Connection Pooler for PostgreSQL
# Reduces connection overhead, improves throughput under high concurrency.
# Enable by switching gateway DATABASE_URL to use pgbouncer:6432 instead of postgres:5432
# ──────────────────────────────────────────────────────────────────────
pgbouncer:
image: edoburu/pgbouncer:latest
restart: unless-stopped
networks: [mcpnet]
ulimits:
nofile:
soft: 65536
hard: 65536
ports:
- "6432:6432" # PgBouncer port (optional external access)
environment:
# Connection to upstream PostgreSQL
- DATABASE_URL=postgres://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp
# PgBouncer listen port (default would be 5432, using 6432 to distinguish from PostgreSQL)
- LISTEN_PORT=6432
# Pool mode: transaction (recommended), session, or statement
# transaction: connection returned after each transaction (best for web apps)
- POOL_MODE=transaction
# ═══════════════════════════════════════════════════════════════════════════
# Connection Pool Tuning for 3000 Sustained Users
# PgBouncer handles connection multiplexing - many app connections share fewer DB connections
# ═══════════════════════════════════════════════════════════════════════════
# Client-side limits (from gateway workers via SQLAlchemy)
- MAX_CLIENT_CONN=5000 # Max app connections; must exceed (replicas × workers × pool)
- DEFAULT_POOL_SIZE=600 # Shared DB connections; sized for ~70 concurrent tx × 8x headroom
- MIN_POOL_SIZE=100 # Pre-warmed connections for instant response to load spikes
- RESERVE_POOL_SIZE=150 # Emergency pool for burst traffic beyond DEFAULT_POOL_SIZE
- RESERVE_POOL_TIMEOUT=2 # Seconds before tapping reserve pool
# Server-side limits (to PostgreSQL)
- MAX_DB_CONNECTIONS=700 # Max connections to PostgreSQL; must be < PG max_connections
- MAX_USER_CONNECTIONS=700 # Per-user limit; typically equals MAX_DB_CONNECTIONS
# Connection lifecycle
- SERVER_LIFETIME=3600 # Recycle server connections after 1 hour (prevents stale state)
- SERVER_IDLE_TIMEOUT=600 # Close unused server connections after 10 min
# Timeout settings
- QUERY_WAIT_TIMEOUT=60 # Max wait for available connection before failing request
- CLIENT_IDLE_TIMEOUT=300 # Close idle client connections (aligned with pool_recycle)
- SERVER_CONNECT_TIMEOUT=5 # Timeout for new connections to PostgreSQL
# Transaction cleanup - critical for avoiding idle-in-transaction buildup
# NOTE: In transaction pooling, session-level advisory locks (used by migrations)
# can stick unless the reset query clears them; DISCARD ALL is safest.
- SERVER_RESET_QUERY=DISCARD ALL # Reset connection state when returned to pool
- SERVER_RESET_QUERY_ALWAYS=1 # Always run reset query even after clean transactions
- IDLE_TRANSACTION_TIMEOUT=300 # Kill transactions idle > 300s (allows slow admin page rendering)
# Authentication
- AUTH_TYPE=scram-sha-256 # Match PostgreSQL auth method
depends_on:
postgres:
condition: service_healthy
healthcheck:
test: ["CMD", "pg_isready", "-h", "localhost", "-p", "6432"]
interval: 10s
timeout: 5s
retries: 3
start_period: 10s
deploy:
resources:
limits:
cpus: '1'
memory: 256M
reservations:
cpus: '0.5'
memory: 128M
# mariadb:
# image: mariadb:10.6
# environment:
# MARIADB_ROOT_PASSWORD: mysecretpassword
# MARIADB_DATABASE: mcp
# MARIADB_USER: mariadb
# MARIADB_PASSWORD: mysecretpassword
# volumes:
# - mariadbdata:/var/lib/mysql
# networks: [mcpnet]
# ports:
# - "3306:3306"
# healthcheck:
# test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
# interval: 30s
# timeout: 5s
# retries: 5
# start_period: 20s
# mariadb:
# image: mariadb:11
# environment:
# - MARIADB_ROOT_PASSWORD=mysecretpassword
# - MARIADB_DATABASE=mcp
# - MARIADB_USER=admin
# - MARIADB_PASSWORD=changeme
# volumes: [mariadbdata:/var/lib/mysql]
# networks: [mcpnet]
# mariadb:
# image: registry.redhat.io/rhel9/mariadb-106:12.0.2-ubi10
# environment:
# - MYSQL_ROOT_PASSWORD=mysecretpassword
# - MYSQL_DATABASE=mcp
# - MYSQL_USER=mysql
# - MYSQL_PASSWORD=changeme
# volumes: ["mariadbdata:/var/lib/mysql"]
# networks: [mcpnet]
# ports:
# - "3306:3306"
# healthcheck:
# test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-pmysecretpassword"]
# interval: 30s
# timeout: 10s
# retries: 5
# start_period: 30s
# mongodb:
# image: mongo:7
# environment:
# - MONGO_INITDB_ROOT_USERNAME=admin
# - MONGO_INITDB_ROOT_PASSWORD=changeme
# - MONGO_INITDB_DATABASE=mcp
# volumes: [mongodata:/data/db]
# networks: [mcpnet]
# migration:
# #image: ghcr.io/ibm/mcp-context-forge:0.7.0 # Testing migration from 0.7.0
# image: mcpgateway/mcpgateway:latest # Use the local latest image. Run `make docker-prod` to build it.
# build:
# context: .
# dockerfile: Containerfile
# environment:
# - DATABASE_URL=postgresql+psycopg://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp
# command: alembic -c mcpgateway/alembic.ini upgrade head
# depends_on:
# postgres:
# condition: service_healthy
# networks: [mcpnet]
###############################################################################
# CACHE
###############################################################################
redis:
image: redis:latest
ulimits:
nofile:
soft: 65536
hard: 65536
# Performance tuning for 1000+ RPS high-concurrency load testing
command:
- "redis-server"
- "--maxmemory"
- "1gb"
- "--maxmemory-policy"
- "allkeys-lru"
- "--tcp-backlog"
- "2048"
- "--timeout"
- "0"
- "--tcp-keepalive"
- "300"
- "--maxclients"
- "10000"
ports:
- "6379:6379" # expose only if you want host access
networks: [mcpnet]
deploy:
resources:
limits:
cpus: '2'
memory: 2G
reservations:
cpus: '1'
memory: 1G
###############################################################################
# MONITORING STACK (enabled with --profile monitoring)
# Usage: docker compose --profile monitoring up -d
# Access: Grafana http://localhost:3000 (admin/changeme)
# Prometheus http://localhost:9090
###############################################################################
# ──────────────────────────────────────────────────────────────────────
# Prometheus PostgreSQL Exporter - Database metrics
# Metrics: connections, query duration, locks, cache hit ratio
# ──────────────────────────────────────────────────────────────────────
postgres_exporter:
image: quay.io/prometheuscommunity/postgres-exporter:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "9187:9187" # http://localhost:9187/metrics
environment:
- DATA_SOURCE_NAME=postgresql://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp?sslmode=disable
- PG_EXPORTER_AUTO_DISCOVER_DATABASES=true
depends_on:
postgres:
condition: service_healthy
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Prometheus Redis Exporter - Cache metrics
# Metrics: memory, clients, commands/sec, keyspace stats
# ──────────────────────────────────────────────────────────────────────
redis_exporter:
image: oliver006/redis_exporter:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "9121:9121" # http://localhost:9121/metrics
environment:
- REDIS_ADDR=redis://redis:6379
depends_on:
redis:
condition: service_started
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Prometheus PgBouncer Exporter - Connection pool metrics
# Metrics: active/waiting clients, server connections, pool stats
# ──────────────────────────────────────────────────────────────────────
pgbouncer_exporter:
image: prometheuscommunity/pgbouncer-exporter:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "9127:9127" # http://localhost:9127/metrics
environment:
- PGBOUNCER_EXPORTER_CONNECTION_STRING=postgres://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@pgbouncer:6432/pgbouncer?sslmode=disable
depends_on:
pgbouncer:
condition: service_healthy
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Prometheus Nginx Exporter - Proxy metrics
# Metrics: active connections, requests/sec, response codes
# Requires stub_status enabled in nginx.conf (location /nginx_status)
# ──────────────────────────────────────────────────────────────────────
nginx_exporter:
image: nginx/nginx-prometheus-exporter:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "9113:9113" # http://localhost:9113/metrics
command:
- '-nginx.scrape-uri=http://nginx:80/nginx_status'
depends_on:
nginx:
condition: service_healthy
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# cAdvisor - Container metrics (CPU, memory, network, disk I/O)
# Metrics: container_cpu_usage_seconds_total, container_memory_usage_bytes
# Dashboard: Grafana ID 14282 (Docker and cAdvisor)
# ──────────────────────────────────────────────────────────────────────
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "8085:8080" # http://localhost:8085/metrics
privileged: true
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Prometheus - Metrics collection and storage
# Scrapes: gateway, postgres, redis, nginx, cadvisor
# Retention: 7 days (configurable via --storage.tsdb.retention.time)
# ──────────────────────────────────────────────────────────────────────
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "9090:9090" # http://localhost:9090
volumes:
- ./infra/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheusdata:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=7d'
- '--web.enable-lifecycle'
depends_on:
- postgres_exporter
- redis_exporter
- nginx_exporter
- cadvisor
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Loki - Log aggregation system (like Prometheus, but for logs)
# Query logs with LogQL in Grafana
# ──────────────────────────────────────────────────────────────────────
loki:
image: grafana/loki:latest
restart: unless-stopped
networks: [mcpnet]
user: "0" # Run as root to avoid permission issues
ports:
- "3100:3100" # http://localhost:3100/ready
volumes:
- ./infra/monitoring/loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
- lokidata:/loki
command: -config.file=/etc/loki/local-config.yaml
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Promtail - Log collector for Loki
# Collects logs from all containers via Docker socket
# ──────────────────────────────────────────────────────────────────────
promtail:
image: grafana/promtail:latest
restart: unless-stopped
networks: [mcpnet]
volumes:
- ./infra/monitoring/loki/promtail-config.yaml:/etc/promtail/config.yaml:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
command: -config.file=/etc/promtail/config.yaml
depends_on:
- loki
profiles: ["monitoring"]
# ──────────────────────────────────────────────────────────────────────
# Grafana - Dashboard visualization
# Default login: admin / changeme
# Recommended dashboards:
# - Docker/cAdvisor: 14282
# - PostgreSQL: 9628
# - Redis: 763
# - Nginx: 12708
# ──────────────────────────────────────────────────────────────────────
grafana:
image: grafana/grafana:latest
restart: unless-stopped
networks: [mcpnet]
user: "0" # Run as root to avoid permission issues with provisioning
ports:
- "3000:3000" # http://localhost:3000
environment:
- GF_SECURITY_ADMIN_PASSWORD=changeme
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- grafanadata:/var/lib/grafana
- ./infra/monitoring/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
- ./infra/monitoring/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
depends_on:
- prometheus
profiles: ["monitoring"]
###############################################################################
# OPTIONAL ADMIN TOOLS - handy web UIs for DB & cache (disabled by default)
###############################################################################
pgadmin: # 🔧 Postgres admin UI
image: dpage/pgadmin4:9.11.0
environment:
- PGADMIN_DEFAULT_EMAIL=admin@example.com
- PGADMIN_DEFAULT_PASSWORD=changeme
ports:
- "5050:80" # http://localhost:5050
volumes:
- pgadmindata:/var/lib/pgadmin
networks: [mcpnet]
depends_on:
postgres:
condition: service_healthy
# ──────────────────────────────────────────────────────────────────────
# Redis Commander - a web-based Redis GUI
# ──────────────────────────────────────────────────────────────────────
redis_commander: # 🔧 Redis key browser
image: rediscommander/redis-commander:latest
restart: unless-stopped
networks: [mcpnet]
depends_on:
redis:
condition: service_started
ports:
- "8081:8081" # http://localhost:8081
environment:
- REDIS_HOSTS=local:redis:6379
- HTTP_USER=admin
- HTTP_PASSWORD=changeme
# # ──────────────────────────────────────────────────────────────────────
# # Redis Insight - a powerful Redis GUI (recently updated)
# # ──────────────────────────────────────────────────────────────────────
# redis_insight: # 🔧 Redis Insight GUI
# image: redis/redisinsight:latest
# container_name: redisinsight
# restart: unless-stopped
# networks: [mcpnet]
# ports:
# - "5540:5540" # Redis Insight UI (default 5540)
# depends_on: # Default stack: Postgres + Redis
# redis:
# condition: service_started
# # ──────────────────────────────────────────────────────────────────────
# # Persist data (config, logs, history) between restarts
# # ──────────────────────────────────────────────────────────────────────
# # volumes:
# # - ./redisinsight_data:/data
# volumes:
# - redisinsight_data:/data # <- persist data in named volume
# # ──────────────────────────────────────────────────────────────────────
# # Preconfigure Redis connection(s) via env vars
# # ──────────────────────────────────────────────────────────────────────
# environment:
# # Single connection (omit "*" since only one):
# - RI_REDIS_HOST=redis # <- your Redis hostname
# - RI_REDIS_PORT=6379 # <- your Redis port
# - RI_REDIS_USERNAME=default # <- ACL/username (Redis 6+)
# #- RI_REDIS_PASSWORD=changeme # <- Redis AUTH password
# #- RI_REDIS_TLS=true # <- enable TLS
# # Optional: validate self-signed CA instead of trusting all:
# # - RI_REDIS_TLS_CA_PATH=/certs/selfsigned.crt
# # - RI_REDIS_TLS_CERT_PATH=/certs/client.crt
# # - RI_REDIS_TLS_KEY_PATH=/certs/client.key
# # - RI_REDIS_TLS=true # (already set above)
# # ──────────────────────────────────────────────────────────────────
# # Core Redis Insight settings
# # ──────────────────────────────────────────────────────────────────
# - RI_APP_HOST=0.0.0.0 # <- listen on all interfaces
# - RI_APP_PORT=5540 # <- UI port (container-side)
# mongo_express: # 🔧 MongoDB GUI (works if mongodb service is enabled)
# image: mongo-express:1
# environment:
# - ME_CONFIG_MONGODB_ADMINUSERNAME=admin
# - ME_CONFIG_MONGODB_ADMINPASSWORD=changeme
# - ME_CONFIG_MONGODB_SERVER=mongodb
# ports:
# - "8082:8081" # http://localhost:8082
# networks: [mcpnet]
# depends_on:
# mongodb:
# condition: service_started
# phpmyadmin: # 🔧 MySQL / MariaDB GUI
# image: phpmyadmin:latest
# environment:
# - PMA_HOST=mysql # or mariadb
# - PMA_USER=mysql
# - PMA_PASSWORD=changeme
# - PMA_ARBITRARY=1 # allow login to any host if you switch DBs
# ports:
# - "8083:80" # http://localhost:8083
# networks: [mcpnet]
# depends_on:
# mysql:
# condition: service_started
###############################################################################
# OPTIONAL MCP SERVERS - drop-in helpers the Gateway can call
###############################################################################
###############################################################################
# Fast Time Server - High-performance time/timezone service for MCP
# Note: This is an amd64-only image. On ARM platforms (Apple Silicon),
# emulation may not work properly.
###############################################################################
fast_time_server:
image: ghcr.io/ibm/fast-time-server:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "8888:8080" # Map host port 8888 to container port 8080
# Use dual mode for both SSE (/sse) and Streamable HTTP (/http) endpoints
command: ["-transport=dual", "-listen=0.0.0.0", "-port=8080", "-log-level=info"]
###############################################################################
# Auto-registration service - registers fast_time_server with gateway
###############################################################################
register_fast_time:
image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest}
networks: [mcpnet]
depends_on:
gateway:
condition: service_healthy
fast_time_server:
condition: service_started
environment:
- JWT_SECRET_KEY=my-test-key
# This is a one-shot container that exits after registration
restart: "no"
entrypoint: ["/bin/sh", "-c"]
command:
- |
echo "Using latest gateway image with current JWT utility..."
echo "Waiting for services to be ready..."
# Wait for gateway to be ready using Python
python3 -c "
import time
import urllib.request
import urllib.error
for i in range(1, 61):
try:
with urllib.request.urlopen('http://gateway:4444/health', timeout=2) as response:
if response.status == 200:
print('✅ gateway is healthy')
break
except:
pass
print(f'Waiting for gateway... ({i}/60)')
time.sleep(2)
else:
print('❌ Gateway failed to become healthy')
exit(1)
"
# Wait for fast_time_server to be ready using Python
python3 -c "
import time
import urllib.request
import urllib.error
for i in range(1, 31):
try:
with urllib.request.urlopen('http://fast_time_server:8080/health', timeout=2) as response:
if response.status == 200:
print('✅ fast_time_server is healthy')
break
except:
pass
print(f'Waiting for fast_time_server... ({i}/30)')
time.sleep(2)
else:
print('❌ Fast time server failed to become healthy')
exit(1)
"
echo "Generating JWT token..."
echo "Environment: JWT_SECRET_KEY=$$JWT_SECRET_KEY"
echo "Running: python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256"
# Only capture stdout (the token), let warnings go to stderr
export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null)
echo "Generated token: $$MCPGATEWAY_BEARER_TOKEN"
# Decode the token to verify it has expiration
echo "Decoding token to verify claims..."
python3 -m mcpgateway.utils.create_jwt_token --decode "$$MCPGATEWAY_BEARER_TOKEN" 2>/dev/null || echo "Failed to decode token"
# Test authentication first
echo "Testing authentication..."
# Use Python to make HTTP requests
python3 -c "
import urllib.request
import urllib.error
import json
import sys
import os
import time
token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '')
def api_request(method, path, data=None):
'''Helper to make authenticated API requests.'''
url = f'http://gateway:4444{path}'
req = urllib.request.Request(url, method=method)
req.add_header('Authorization', f'Bearer {token}')
req.add_header('Content-Type', 'application/json')
if data:
req.data = json.dumps(data).encode('utf-8')
with urllib.request.urlopen(req) as response:
return json.loads(response.read().decode('utf-8'))
# Test version endpoint without auth
print('Checking gateway config...')
try:
with urllib.request.urlopen('http://gateway:4444/version') as response:
data = response.read().decode('utf-8')
print(f'Gateway version response (no auth): {data[:200]}')
except Exception as e:
print(f'Version check failed: {e}')
# Test version endpoint with auth
print('Testing authentication...')
try:
req = urllib.request.Request('http://gateway:4444/version')
req.add_header('Authorization', f'Bearer {token}')
with urllib.request.urlopen(req) as response:
data = response.read().decode('utf-8')
print(f'Auth test response: SUCCESS')
auth_success = True
except Exception as e:
print(f'Auth test response: FAILED - {e}')
auth_success = False
# Register fast_time_server with gateway using Streamable HTTP transport
print('Registering fast_time_server with gateway (Streamable HTTP)...')
# First check if gateway already exists and delete it
gateway_id = None
try:
gateways = api_request('GET', '/gateways')
for gw in gateways:
if gw.get('name') == 'fast_time':
print(f'Found existing gateway {gw[\"id\"]}, deleting...')
api_request('DELETE', f'/gateways/{gw[\"id\"]}')
print('Deleted existing gateway')
except Exception as e:
print(f'Note: Could not check/delete existing gateway: {e}')
# Delete existing virtual server if present (using fixed ID)
VIRTUAL_SERVER_ID = '9779b6698cbd4b4995ee04a4fab38737'
try:
api_request('DELETE', f'/servers/{VIRTUAL_SERVER_ID}')
print(f'Deleted existing virtual server {VIRTUAL_SERVER_ID}')
except Exception as e:
print(f'Note: No existing virtual server to delete (or error: {e})')
# Register the gateway
try:
result = api_request('POST', '/gateways', {
'name': 'fast_time',
'url': 'http://fast_time_server:8080/http',
'transport': 'STREAMABLEHTTP'
})
print(f'Registration response: {result}')
if 'id' in result:
gateway_id = result['id']
print(f'✅ Successfully registered fast_time_server (gateway_id: {gateway_id})')
else:
print('❌ Registration failed - no ID in response')
sys.exit(1)
except Exception as e:
print(f'❌ Registration failed: {e}')
sys.exit(1)
# Wait for tools to be synced from the gateway
print('Waiting for tools/resources/prompts to sync...')
for i in range(30):
time.sleep(1)
try:
tools = api_request('GET', '/tools')
# Filter tools from fast_time gateway (note: camelCase gatewayId)
fast_time_tools = [t for t in tools if t.get('gatewayId') == gateway_id]
if fast_time_tools:
print(f'Found {len(fast_time_tools)} tools from fast_time gateway')
break
except Exception as e:
pass
print(f'Waiting for sync... ({i+1}/30)')
else:
print('⚠️ No tools synced, continuing anyway...')
# Fetch all tools, resources, and prompts
# Note: Tools use gatewayId (camelCase), resources/prompts from catalog have no gatewayId
tool_ids = []
resource_ids = []
prompt_ids = []
try:
tools = api_request('GET', '/tools')
# Get tools from the fast_time gateway
tool_ids = [t['id'] for t in tools if t.get('gatewayId') == gateway_id]
print(f'Found tools: {[t[\"name\"] for t in tools if t.get(\"gatewayId\") == gateway_id]}')
except Exception as e:
print(f'Failed to fetch tools: {e}')
try:
resources = api_request('GET', '/resources')
# Include all resources (from catalog)
resource_ids = [r['id'] for r in resources]
print(f'Found resources: {[r[\"name\"] for r in resources]}')
except Exception as e:
print(f'Failed to fetch resources: {e}')
try:
prompts = api_request('GET', '/prompts')
# Include all prompts (from catalog)
prompt_ids = [p['id'] for p in prompts]
print(f'Found prompts: {[p[\"name\"] for p in prompts]}')
except Exception as e:
print(f'Failed to fetch prompts: {e}')
# Create virtual server with all tools, resources, and prompts
print('Creating virtual server...')
try:
# API expects payload wrapped in 'server' key
# Use fixed UUID for consistent server ID across restarts
server_payload = {
'server': {
'id': '9779b6698cbd4b4995ee04a4fab38737',
'name': 'Fast Time Server',
'description': 'Virtual server exposing Fast Time MCP tools, resources, and prompts',
'associated_tools': tool_ids,
'associated_resources': resource_ids,
'associated_prompts': prompt_ids
}
}
result = api_request('POST', '/servers', server_payload)
print(f'Virtual server created: {result}')
print(f'✅ Successfully created virtual server with {len(tool_ids)} tools, {len(resource_ids)} resources, {len(prompt_ids)} prompts')
except Exception as e:
print(f'❌ Failed to create virtual server: {e}')
sys.exit(1)
"
# Write the bearer token to a file for load testing
echo "Writing bearer token to /tmp/gateway-token.txt..."
echo "$$MCPGATEWAY_BEARER_TOKEN" > /tmp/gateway-token.txt
echo "Token written to /tmp/gateway-token.txt"
echo "✅ Setup complete!"
###############################################################################
# Fast Test Server - Ultra-fast Rust MCP server for performance testing
# Provides: echo, get_system_time, get_stats tools via MCP Streamable HTTP
# Also exposes REST API endpoints for baseline comparison
# Usage: docker compose --profile testing up -d
###############################################################################
fast_test_server:
build:
context: ./mcp-servers/rust/fast-test-server
dockerfile: Containerfile
image: mcpgateway/fast-test-server:latest
restart: unless-stopped
networks: [mcpnet]
ports:
- "8880:8880" # Port 8880 (avoids conflict with benchmark servers on 9000+)
environment:
- BIND_ADDRESS=0.0.0.0:8880
- RUST_LOG=info
# TCP kernel tuning for high-concurrency load testing
sysctls:
- net.ipv4.tcp_fin_timeout=15 # Faster cleanup of FIN_WAIT2 sockets
- net.ipv4.ip_local_port_range=1024 65535 # More ephemeral ports
- net.core.somaxconn=65535 # Max listen backlog
ulimits:
nofile:
soft: 65535
hard: 65535
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:8880/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
deploy:
resources:
limits:
cpus: '2'
memory: 1G
reservations:
cpus: '0.5'
memory: 128M
profiles: ["testing", "monitoring"]
###############################################################################
# Auto-registration service - registers fast_test_server with gateway
###############################################################################
register_fast_test:
image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest}
networks: [mcpnet]
depends_on:
gateway:
condition: service_healthy
fast_test_server:
condition: service_healthy
environment:
- JWT_SECRET_KEY=my-test-key
restart: "no"
entrypoint: ["/bin/sh", "-c"]
command:
- |
echo "Registering fast_test_server with gateway..."
# Generate JWT token
export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null)
# Register using Python
python3 -c "
import urllib.request
import json
import os
import time
token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '')
def api_request(method, path, data=None):
url = f'http://gateway:4444{path}'
req = urllib.request.Request(url, method=method)
req.add_header('Authorization', f'Bearer {token}')
req.add_header('Content-Type', 'application/json')
if data:
req.data = json.dumps(data).encode('utf-8')
with urllib.request.urlopen(req) as response:
return json.loads(response.read().decode('utf-8'))
# Delete existing gateway if present
try:
gateways = api_request('GET', '/gateways')
for gw in gateways:
if gw.get('name') == 'fast_test':
print(f'Deleting existing gateway {gw[\"id\"]}...')
api_request('DELETE', f'/gateways/{gw[\"id\"]}')
except Exception as e:
print(f'Note: {e}')
# Register the gateway
try:
result = api_request('POST', '/gateways', {
'name': 'fast_test',
'url': 'http://fast_test_server:8880/mcp',
'transport': 'STREAMABLEHTTP'
})
print(f'✅ Registered fast_test_server: {result.get(\"id\", \"unknown\")}')
except Exception as e:
print(f'❌ Registration failed: {e}')
exit(1)
"
echo "✅ Registration complete!"
profiles: ["testing", "monitoring"]
###############################################################################
# Benchmark Server - Multi-server MCP benchmark tool
# Spawns multiple lightweight MCP servers for load testing
# Usage: make benchmark-up (or: docker compose --profile benchmark up -d)
#
# Environment variables:
# BENCHMARK_SERVER_COUNT - Number of servers to spawn (default: 10)
# BENCHMARK_START_PORT - Starting port number (default: 9000)
###############################################################################
benchmark_server:
build:
context: ./mcp-servers/go/benchmark-server
dockerfile: Dockerfile
image: mcpgateway/benchmark-server:latest
restart: unless-stopped
networks: [mcpnet]
command:
- "-transport=http"
- "-server-count=${BENCHMARK_SERVER_COUNT:-10}"
- "-start-port=${BENCHMARK_START_PORT:-9000}"
- "-tools=50"
- "-resources=20"
- "-prompts=10"
ports:
# Port range supports up to 100 servers (9000-9099)
# Actual servers spawned controlled by BENCHMARK_SERVER_COUNT
- "9000-9099:9000-9099"
# Note: No healthcheck - scratch-based Go image has no shell
# Verify health via: curl http://localhost:9000/health
deploy:
resources:
limits:
cpus: '2'
memory: 1G
reservations:
cpus: '0.5'
memory: 256M
profiles: ["benchmark"]
###############################################################################
# Auto-registration service - registers benchmark servers with gateway
# Uses BENCHMARK_SERVER_COUNT and BENCHMARK_START_PORT environment variables
###############################################################################
register_benchmark:
image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest}
networks: [mcpnet]
depends_on:
gateway:
condition: service_healthy
benchmark_server:
condition: service_started
environment:
- JWT_SECRET_KEY=my-test-key
- BENCHMARK_SERVER_COUNT=${BENCHMARK_SERVER_COUNT:-10}
- BENCHMARK_START_PORT=${BENCHMARK_START_PORT:-9000}
restart: "no"
entrypoint: ["/bin/sh", "-c"]
command:
- |
echo "Registering benchmark servers with gateway..."
# Wait for benchmark servers to start (no healthcheck available)
echo "Waiting for benchmark servers to start..."
sleep 5
# Generate JWT token
export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null)
# Register benchmark servers using environment variables
python3 -c "
import urllib.request
import json
import os
token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '')
server_count = int(os.environ.get('BENCHMARK_SERVER_COUNT', '10'))
start_port = int(os.environ.get('BENCHMARK_START_PORT', '9000'))
headers = {
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
}
def api_request(method, path, data=None):
url = f'http://gateway:4444{path}'
body = json.dumps(data).encode() if data else None
req = urllib.request.Request(url, data=body, headers=headers, method=method)
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
# Register benchmark servers
print(f'Registering {server_count} benchmark servers (ports {start_port}-{start_port + server_count - 1})...')
registered = 0
for port in range(start_port, start_port + server_count):
name = f'benchmark-{port}'
try:
result = api_request('POST', '/gateways', {
'name': name,
'url': f'http://benchmark_server:{port}/mcp',
'transport': 'STREAMABLEHTTP'
})
print(f'✅ Registered {name}: {result.get(\"id\", \"unknown\")}')
registered += 1
except urllib.error.HTTPError as e:
if e.code == 409:
print(f'⏭️ {name} already registered')
registered += 1
else:
print(f'❌ Failed to register {name}: HTTP {e.code}')
except Exception as e:
print(f'❌ Failed to register {name}: {e}')
print(f'✅ Registration complete: {registered}/{server_count} benchmark servers')
"
profiles: ["benchmark"]
###############################################################################
# Hashicorp Terraform MCP Server
# https://hub.docker.com/r/hashicorp/terraform-mcp-server
# https://github.com/hashicorp/terraform-mcp-server/blob/main/README.md
###############################################################################
# terraform-mcp-server:
# image: docker.io/hashicorp/terraform-mcp-server:dev
# container_name: terraform-mcp-server
# networks: [mcpnet]
# ports:
# - "8001:8080" # Map host port 8888 to container port 8080
# restart: unless-stopped
# environment:
# - TRANSPORT_MODE=streamable-http
# - TRANSPORT_HOST=0.0.0.0
# - TRANSPORT_PORT=8080
# - MCP_CORS_MODE=disabled
# healthcheck:
# test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
# interval: 30s
# timeout: 10s
# retries: 5
# start_period: 20s