ContextForge MCP Gateway

Official

Overview Schema Related Servers Score Discussions

mcp-context-forge

docker-compose.yml•100 KiB

#version: "3.9" # Supported by both podman-compose and Docker Compose v2+ ############################################################################### # HOST SYSTEM TUNING FOR LOAD TESTING (run before docker compose up) # See docs/docs/testing/performance.md for full details # # One-liner (TCP + VM + I/O tuning): # sudo sysctl -w net.core.somaxconn=65535 net.core.netdev_max_backlog=65535 net.ipv4.tcp_max_syn_backlog=65535 net.ipv4.tcp_tw_reuse=1 net.ipv4.tcp_fin_timeout=15 net.ipv4.ip_local_port_range="1024 65535" vm.swappiness=10 fs.aio-max-nr=1048576 # # Make persistent: sudo tee /etc/sysctl.d/99-mcp-loadtest.conf (see docs) ############################################################################### ############################################################################### # DOCKER COMPOSE PROFILES # # Default (no profile): Gateway + Postgres + Redis + Nginx (HTTP only) # --profile monitoring: Adds Prometheus, Grafana, Loki, exporters # --profile testing: Adds Locust (web UI), A2A echo agent, fast_test_server + auto-registration # --profile benchmark: Adds benchmark MCP servers for load testing # --profile tls: Enables HTTPS via nginx_tls (auto-generates certs) # --profile inspector: Adds MCP Inspector client (http://localhost:6274) # # TLS Quick Start: # make compose-tls # HTTP:8080 + HTTPS:8443 # make compose-tls-https # Force HTTPS (HTTP redirects) # curl -sk https://localhost:8443/health # # Custom certificates: # mkdir -p certs && cp your-cert.pem certs/cert.pem && cp your-key.pem certs/key.pem # make compose-tls # # Environment variables (TLS profile): # NGINX_FORCE_HTTPS=true # Redirect all HTTP to HTTPS ############################################################################### ############################################################################### # NETWORKS + VOLUMES - declared first so they can be referenced later ############################################################################### networks: mcpnet: # Single user-defined bridge network keeps traffic private driver: bridge volumes: # Named volumes survive podman-compose down/up pgdata: # pgdata18: # Enable for postgres 18+ pgadmindata: redisinsight_data: nginx_cache: grafanadata: prometheusdata: lokidata: tempodata: locust_token: ############################################################################### # CORE SERVICE - MCP Gateway ############################################################################### services: # ────────────────────────────────────────────────────────────────────── # Nginx Caching Proxy - High-performance reverse proxy with CDN-like caching # ────────────────────────────────────────────────────────────────────── nginx: build: context: ./infra/nginx dockerfile: Dockerfile image: mcpgateway/nginx-cache:latest restart: unless-stopped ports: - "8080:80" # HTTP caching proxy (public-facing) # - "8443:443" # HTTPS caching proxy (public-facing) networks: [mcpnet] depends_on: gateway: condition: service_healthy volumes: - nginx_cache:/var/cache/nginx # Persistent cache storage - ./infra/nginx/nginx.conf:/etc/nginx/nginx.conf:ro # Mount config as read-only # - ./certs:/app/certs:ro # Mount SSL certs for HTTPS backend verification # TCP kernel tuning for 3000 concurrent connections # Note: net.core.* sysctls are host-level and cannot be set per-container # Only net.ipv4.* sysctls that are network-namespace aware work here sysctls: - net.ipv4.tcp_fin_timeout=15 # Faster cleanup of FIN_WAIT2 sockets - net.ipv4.ip_local_port_range=1024 65535 # More ephemeral ports for upstream ulimits: nofile: soft: 65535 hard: 65535 healthcheck: test: ["CMD", "curl", "-f", "http://localhost/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s deploy: resources: limits: cpus: '4' memory: 1G reservations: cpus: '2' memory: 512M # ────────────────────────────────────────────────────────────────────── # MCP Gateway - the main API server for the MCP stack # ────────────────────────────────────────────────────────────────────── gateway: image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} # Use the local latest image. Run `make docker-prod` to build it. #image: ghcr.io/ibm/mcp-context-forge:1.0.0-BETA-2 # Use the release MCP Context Forge image #image: ghcr.io/ibm/mcp-context-forge:0.7.0 # Testing migration from 0.7.0 build: context: . dockerfile: Containerfile.lite # Same one the Makefile builds restart: unless-stopped # NOTE: When using replicas > 1, access via nginx:8080 instead of direct port 4444 # ports: # - "4444:4444" # Disabled for multi-replica mode networks: [mcpnet] # ────────────────────────────────────────────────────────────────────── # Environment - pick ONE database URL line, comment the rest # ────────────────────────────────────────────────────────────────────── environment: # ═══════════════════════════════════════════════════════════════════════════ # HTTP Server Selection: gunicorn vs granian # ═══════════════════════════════════════════════════════════════════════════ # Performance comparison (2500 concurrent users, PostgreSQL backend): # Gunicorn: ~2.7GB RAM, ~740% CPU, no backpressure (queues unbounded) # Granian: ~4.0GB RAM, ~680% CPU, native backpressure (rejects excess with 503) # # Choose Gunicorn for: memory-constrained environments (32% less RAM) # Choose Granian for: load spike protection, bursty traffic (graceful degradation) # Both achieve same RPS when database is the bottleneck. # ═══════════════════════════════════════════════════════════════════════════ # - HTTP_SERVER=granian # Rust-based, native backpressure, +47% memory, -8% CPU - HTTP_SERVER=gunicorn # Python-based, battle-tested, lower memory usage - HOST=0.0.0.0 - PORT=4444 # Domain for CORS/cookies (nginx default at http://localhost:8080) - APP_DOMAIN=${APP_DOMAIN:-http://localhost:8080} # Transport: sse, streamablehttp, http, or all (default: all) - TRANSPORT_TYPE=streamablehttp # Database connection: Via PgBouncer (default) or direct PostgreSQL # PgBouncer provides connection pooling for better performance under high concurrency - DATABASE_URL=postgresql+psycopg://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@pgbouncer:6432/mcp # Direct PostgreSQL connection (bypass PgBouncer - increase DB_POOL_SIZE if using): # - DATABASE_URL=postgresql+psycopg://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp # SQLAlchemy query logging (useful for N+1 detection; noisy under load) # NOTE: SQLALCHEMY_ECHO logs at INFO; set LOG_LEVEL=INFO/DEBUG to see output. - SQLALCHEMY_ECHO=false - CACHE_TYPE=redis # backend for caching (memory, redis, database, or none) - REDIS_URL=redis://redis:6379/0 # Redis parser: hiredis (C extension ~83x faster for large responses) - REDIS_PARSER=hiredis # Redis connection pool tuning for load testing (3 replicas × 24 workers × 100 = 7200 < 10000 maxclients) - REDIS_MAX_CONNECTIONS=100 - REDIS_SOCKET_TIMEOUT=5.0 - REDIS_SOCKET_CONNECT_TIMEOUT=5.0 - REDIS_HEALTH_CHECK_INTERVAL=30 # ═══════════════════════════════════════════════════════════════════════════ # Redis Startup Resilience (prevents crash-loop on Redis outage) # ═══════════════════════════════════════════════════════════════════════════ # With exponential backoff: 2s, 4s, 8s, 16s, 30s (capped), 30s... # 30 retries = ~5 minutes total wait before worker gives up - REDIS_MAX_RETRIES=30 # Max attempts before worker exits (default: 30) - REDIS_RETRY_INTERVAL_MS=2000 # Base interval, grows exponentially with jitter - REDIS_MAX_BACKOFF_SECONDS=30 # Max backoff cap (jitter ±25% applied after) # Auth Cache Configuration (reduces DB queries per auth request from 3-4 to 0-1) - AUTH_CACHE_ENABLED=${AUTH_CACHE_ENABLED:-true} - AUTH_CACHE_USER_TTL=300 - AUTH_CACHE_REVOCATION_TTL=120 - AUTH_CACHE_TEAM_TTL=300 - AUTH_CACHE_ROLE_TTL=300 - AUTH_CACHE_BATCH_QUERIES=true - AUTH_CACHE_TEAMS_TTL=300 # Registry Cache Configuration (reduces DB queries for list endpoints) - REGISTRY_CACHE_ENABLED=true - REGISTRY_CACHE_TOOLS_TTL=300 - REGISTRY_CACHE_PROMPTS_TTL=300 - REGISTRY_CACHE_RESOURCES_TTL=300 - REGISTRY_CACHE_AGENTS_TTL=300 - REGISTRY_CACHE_SERVERS_TTL=300 - REGISTRY_CACHE_GATEWAYS_TTL=300 - REGISTRY_CACHE_CATALOG_TTL=300 # Admin Stats Cache Configuration (reduces aggregate queries for dashboard) - ADMIN_STATS_CACHE_ENABLED=true - ADMIN_STATS_CACHE_SYSTEM_TTL=60 - ADMIN_STATS_CACHE_OBSERVABILITY_TTL=30 - ADMIN_STATS_CACHE_TAGS_TTL=120 - ADMIN_STATS_CACHE_PLUGINS_TTL=120 - ADMIN_STATS_CACHE_PERFORMANCE_TTL=60 # Team member count cache (reduces N+1 queries) - TEAM_MEMBER_COUNT_CACHE_ENABLED=true - TEAM_MEMBER_COUNT_CACHE_TTL=300 # Metrics aggregation cache (reduces full table scans, see #1906) - METRICS_CACHE_ENABLED=true - METRICS_CACHE_TTL_SECONDS=120 # MCP Server Health Check # Interval in seconds between health checks (default: 300) - HEALTH_CHECK_INTERVAL=300 # Timeout in seconds for each health check request (default: 5) - HEALTH_CHECK_TIMEOUT=5 # Consecutive failures before marking gateway offline (default: 3) - UNHEALTHY_THRESHOLD=3 # Gateway URL validation timeout in seconds (default: 5) - GATEWAY_VALIDATION_TIMEOUT=5 # Max concurrent health checks per worker (default: 10) - MAX_CONCURRENT_HEALTH_CHECKS=10 # JWT Configuration - Choose ONE approach: # Option 1: HMAC (Default - Simple deployments) - JWT_ALGORITHM=HS256 - JWT_SECRET_KEY=my-test-key # Option 2: RSA (Production - Asymmetric, uncomment and generate certs) # - JWT_ALGORITHM=RS256 # - JWT_PUBLIC_KEY_PATH=/app/certs/jwt/public.pem # - JWT_PRIVATE_KEY_PATH=/app/certs/jwt/private.pem - JWT_AUDIENCE=mcpgateway-api - JWT_ISSUER=mcpgateway # Basic auth is DISABLED by default for security (API_ALLOW_BASIC_AUTH=false) # Only set these if you explicitly enable Basic auth for backwards compatibility # - API_ALLOW_BASIC_AUTH=true # - BASIC_AUTH_USER=${BASIC_AUTH_USER:-admin} # - BASIC_AUTH_PASSWORD=${BASIC_AUTH_PASSWORD:-changeme} # Auth encryption secret + default user password - AUTH_ENCRYPTION_SECRET=${AUTH_ENCRYPTION_SECRET:-my-test-salt} - DEFAULT_USER_PASSWORD=${DEFAULT_USER_PASSWORD:-changeme} # Admin UI uses email/password authentication - EMAIL_AUTH_ENABLED=true - PROTECT_ALL_ADMINS=${PROTECT_ALL_ADMINS:-true} - PLATFORM_ADMIN_EMAIL=admin@example.com - PLATFORM_ADMIN_PASSWORD=changeme # Security defaults (tokens require expiration and JTI for revocation) - REQUIRE_TOKEN_EXPIRATION=${REQUIRE_TOKEN_EXPIRATION:-true} - REQUIRE_JTI=${REQUIRE_JTI:-true} - REQUIRE_USER_IN_DB=${REQUIRE_USER_IN_DB:-false} - MCPGATEWAY_UI_ENABLED=true - MCPGATEWAY_ADMIN_API_ENABLED=true # Security configuration (using defaults) - ENVIRONMENT=development - SECURITY_HEADERS_ENABLED=true - CORS_ALLOW_CREDENTIALS=true - SECURE_COOKIES=false # ═══════════════════════════════════════════════════════════════════════════ # SSRF Protection (Server-Side Request Forgery) # ═══════════════════════════════════════════════════════════════════════════ # Prevents gateway from accessing internal resources or cloud metadata services. # Default: enabled with safe settings for dev/internal deployments. # Cloud metadata (169.254.169.254, etc.) is ALWAYS blocked by default. # - SSRF_PROTECTION_ENABLED=true # Master switch (default: true) # - SSRF_ALLOW_LOCALHOST=true # Allow localhost (default: true for dev) # - SSRF_ALLOW_PRIVATE_NETWORKS=true # Allow 10.x, 172.16.x, 192.168.x (default: true) # - SSRF_DNS_FAIL_CLOSED=false # Reject on DNS failure (default: false = fail open) # For strict production mode (external endpoints only): # - SSRF_ALLOW_LOCALHOST=false # - SSRF_ALLOW_PRIVATE_NETWORKS=false # - SSRF_DNS_FAIL_CLOSED=true # Uncomment to enable stateful sessions for Streamable HTTP transport # - USE_STATEFUL_SESSIONS=true # Uncomment to enable session affinity between downstream (from client) and upstrean (to MCP server) sessions # - MCPGATEWAY_SESSION_AFFINITY_ENABLED=true ## Uncomment to enable HTTPS (run `make certs` first) # - SSL=true # - CERT_FILE=/app/certs/cert.pem # - KEY_FILE=/app/certs/key.pem # For passphrase-protected keys: run `make certs-passphrase` and use: # - KEY_FILE=/app/certs/key-encrypted.pem # - KEY_FILE_PASSWORD=${KEY_FILE_PASSWORD} # Uncomment to enable plugins - PLUGINS_ENABLED=true # Uncomment to enable catalog - MCPGATEWAY_CATALOG_ENABLED=true - MCPGATEWAY_CATALOG_FILE=/app/mcp-catalog.yml # Authentication configuration - AUTH_REQUIRED=true - MCP_CLIENT_AUTH_ENABLED=true - TRUST_PROXY_AUTH=false # Logging configuration # NOTE: LOG_LEVEL=INFO/DEBUG is required for SQLALCHEMY_ECHO output. - LOG_LEVEL=${LOG_LEVEL:-ERROR} # Required for SQLALCHEMY_ECHO output during load testing - DISABLE_ACCESS_LOG=true # Disable uvicorn access logs for performance (massive I/O overhead) # Template auto-reload disabled for performance (prevents re-parsing templates on each request) - TEMPLATES_AUTO_RELOAD=false - STRUCTURED_LOGGING_DATABASE_ENABLED=false # Disable DB logging for performance (use true only for debugging) # Audit trail logging - disabled by default for performance # WARNING: Causes a DB write on EVERY API request - can generate millions of rows during load testing! - AUDIT_TRAIL_ENABLED=false # Set to true for compliance requirements (SOC2, HIPAA, etc.) # Security event logging - disabled by default for performance # WARNING: "all" level logs every request and causes massive DB write load - SECURITY_LOGGING_ENABLED=false # Set to true to enable security event logging - SECURITY_LOGGING_LEVEL=failures_only # Options: all, failures_only, high_severity # Performance optimizations - disable CPU-intensive middlewares # NOTE: Keep compression enabled when running without nginx that already has compression # Disabling causes throughput drop due to larger payloads - COMPRESSION_ENABLED=false # Disable optional middlewares for maximum throughput - VALIDATION_MIDDLEWARE_ENABLED=true - JSON_SCHEMA_VALIDATION_STRICT=true - CORRELATION_ID_ENABLED=false - LLMCHAT_ENABLED=true - OBSERVABILITY_ENABLED=false # ═══════════════════════════════════════════════════════════════════════════ # Database Connection Pool Configuration # ═══════════════════════════════════════════════════════════════════════════ # Pool class options: # - "null": NullPool - no application pooling, PgBouncer handles all pooling (recommended) # - "queue": QueuePool - application-side pooling (use with direct PostgreSQL) # - "auto": Automatic - NullPool if PgBouncer detected in URL, else QueuePool # # WITH PgBouncer (default in docker-compose): # Option A: NullPool - safest, eliminates stale connection errors, ~10% slower # - DB_POOL_CLASS=null # Option B: QueuePool + pre_ping - better performance, validates before use - DB_POOL_CLASS=queue - DB_POOL_PRE_PING=true # Validate connections before use (SELECT 1) - DB_POOL_SIZE=20 # Pool size per worker - DB_MAX_OVERFLOW=10 # Extra connections under load - DB_POOL_TIMEOUT=60 # Time to wait for connection before failing - DB_POOL_RECYCLE=60 # Recycle before PgBouncer CLIENT_IDLE_TIMEOUT (half of 120s) # ═══════════════════════════════════════════════════════════════════════════ # Database Startup Resilience (prevents crash-loop on DB outage) # ═══════════════════════════════════════════════════════════════════════════ # With exponential backoff: 2s, 4s, 8s, 16s, 30s (capped), 30s... # 30 retries = ~5 minutes total wait before worker gives up - DB_MAX_RETRIES=30 # Max attempts before worker exits (default: 30) - DB_RETRY_INTERVAL_MS=2000 # Base interval, grows exponentially with jitter - DB_MAX_BACKOFF_SECONDS=30 # Max backoff cap (jitter ±25% applied after) # Tool configuration for high-concurrency load testing - TOOL_TIMEOUT=60 # Seconds before tool invocation times out - MAX_TOOL_RETRIES=3 # Retry attempts for failed tool invocations - TOOL_RATE_LIMIT=60000 # Max tool invocations per minute - TOOL_CONCURRENT_LIMIT=1000 # Max concurrent tool invocations - FEDERATION_TIMEOUT=30 # ═══════════════════════════════════════════════════════════════════════════ # HTTPX Client Connection Pool Configuration # ═══════════════════════════════════════════════════════════════════════════ # Shared HTTP client for all outbound requests (federation, health checks, # A2A, SSO, catalog). Provides ~20x better performance than per-request clients. - HTTPX_MAX_CONNECTIONS=500 # Total connections in pool (increased from 200 for high concurrency) - HTTPX_MAX_KEEPALIVE_CONNECTIONS=300 # Keepalive connections (increased from 100) - HTTPX_KEEPALIVE_EXPIRY=30.0 # Idle connection expiry (seconds) - HTTPX_CONNECT_TIMEOUT=5.0 # TCP connection timeout (seconds) - HTTPX_READ_TIMEOUT=120.0 # Response read timeout (seconds, high for slow tools) - HTTPX_WRITE_TIMEOUT=30.0 # Request write timeout (seconds) - HTTPX_POOL_TIMEOUT=10.0 # Wait for available connection (seconds) - HTTPX_HTTP2_ENABLED=false # HTTP/2 support (requires server support) - HTTPX_ADMIN_READ_TIMEOUT=30.0 # Admin UI/health check timeout (seconds) # ═══════════════════════════════════════════════════════════════════════════ # Gunicorn Configuration (used when HTTP_SERVER=gunicorn) # ═══════════════════════════════════════════════════════════════════════════ - GUNICORN_WORKERS=24 # Worker processes (match CPU cores) - GUNICORN_TIMEOUT=120 # Worker timeout in seconds - GUNICORN_GRACEFUL_TIMEOUT=60 # Grace period for worker shutdown - GUNICORN_KEEP_ALIVE=30 # Keep-alive timeout (matches SSE keepalive) # Worker recycling cleans up MCP SDK stuck task groups (anyio#695 workaround) - GUNICORN_MAX_REQUESTS=1000000 # Recycle workers after 1M requests - GUNICORN_MAX_REQUESTS_JITTER=100000 # ±100000 jitter prevents thundering herd - GUNICORN_BACKLOG=4096 # Connection queue depth # ═══════════════════════════════════════════════════════════════════════════ # Granian Backpressure Configuration (used when HTTP_SERVER=granian) # ═══════════════════════════════════════════════════════════════════════════ # Backpressure provides overload protection by rejecting excess requests with # immediate 503 responses instead of queuing them (which can cause OOM/timeouts). # Total capacity = GRANIAN_WORKERS × GRANIAN_BACKPRESSURE = 16 × 128 = 2048 concurrent # Requests beyond this limit receive immediate 503 (no queuing, no OOM) - GRANIAN_WORKERS=16 - GRANIAN_BACKLOG=4096 - GRANIAN_BACKPRESSURE=128 - GRANIAN_HTTP1_BUFFER_SIZE=524288 - GRANIAN_RESPAWN_FAILED=true # ─────────────────────────────────────────────────────────────────────── # Granian Worker Lifecycle (recycling to prevent resource leaks) # ─────────────────────────────────────────────────────────────────────── # Workaround for granian issue where SSE connections may not be properly # closed after client disconnect, causing CPU spin loops. See: # - https://github.com/emmett-framework/granian/issues/286 # - https://github.com/IBM/mcp-context-forge/issues/2357 # # GRANIAN_WORKERS_LIFETIME: Restart workers after this duration (min 60s) # GRANIAN_WORKERS_MAX_RSS: Restart workers exceeding this memory (MiB) # # Using both provides natural jitter - workers hit memory limits at # different times based on load, with lifetime as a backstop. # - GRANIAN_WORKERS_LIFETIME=3600 # 1 hour max worker lifetime # - GRANIAN_WORKERS_MAX_RSS=512 # 512 MiB max RSS per worker # ─────────────────────────────────────────────────────────────────────── # HTTP/2: Granian supports native HTTP/2 multiplexing, but not useful here because: # - nginx sits in front and downgrades to HTTP/1.1 for upstream connections # - nginx open-source doesn't support HTTP/2 to backends (only nginx Plus does) # - Internal Docker network is fast enough that HTTP/2 gains are negligible # To use HTTP/2, either bypass nginx or use Granian with TLS directly. # - GRANIAN_HTTP=2 # ═══════════════════════════════════════════════════════════════════════════ # MCP Session Pool Configuration # ═══════════════════════════════════════════════════════════════════════════ # Session pooling for MCP ClientSessions reduces per-request overhead from # ~20ms to ~1-2ms (10-20x improvement). Sessions are isolated per user/tenant # via identity hashing to prevent cross-user session sharing. - MCP_SESSION_POOL_ENABLED=true # Enable session pooling (default: false, enabled for docker-compose) - MCP_SESSION_POOL_MAX_PER_KEY=200 # Max sessions per (URL, identity, transport) - increased from 150 for 4000+ users - MCP_SESSION_POOL_TTL=300.0 # Session TTL in seconds (default: 300) - MCP_SESSION_POOL_HEALTH_CHECK_INTERVAL=60.0 # Idle time before health check (default: 60) - MCP_SESSION_POOL_ACQUIRE_TIMEOUT=60.0 # Timeout waiting for session slot (default: 30) - MCP_SESSION_POOL_CREATE_TIMEOUT=30.0 # Timeout creating new session (default: 30) - MCP_SESSION_POOL_CIRCUIT_BREAKER_THRESHOLD=5 # Failures before circuit opens - MCP_SESSION_POOL_CIRCUIT_BREAKER_RESET=60.0 # Seconds before circuit resets - MCP_SESSION_POOL_IDLE_EVICTION=600.0 # Evict idle pool keys after (default: 600) - MCP_SESSION_POOL_TRANSPORT_TIMEOUT=30.0 # Timeout for all HTTP operations (default: 30) - MCP_SESSION_POOL_EXPLICIT_HEALTH_RPC=false # Force RPC on health checks (default: false) # Configurable health check chain - ordered list of methods to try (JSON array) # Options: ping, list_tools, list_prompts, list_resources, skip # - MCP_SESSION_POOL_HEALTH_CHECK_METHODS=["ping", "skip"] # Try ping, skip if unsupported - MCP_SESSION_POOL_HEALTH_CHECK_METHODS=["skip"] # skip, highest performance - MCP_SESSION_POOL_HEALTH_CHECK_TIMEOUT=5.0 # Timeout per health check attempt # ═══════════════════════════════════════════════════════════════════════════ # CPU Spin Loop Mitigation (Issue #2360, anyio#695) # ═══════════════════════════════════════════════════════════════════════════ # These settings mitigate CPU spin loops that can occur when SSE/MCP connections # are cancelled and internal tasks don't respond to CancelledError. The spin # happens in anyio's _deliver_cancellation method. # # See documentation: docs/docs/operations/cpu-spin-loop-mitigation.md # GitHub Issue: https://github.com/IBM/mcp-context-forge/issues/2360 # Upstream Issue: https://github.com/agronholm/anyio/issues/695 # # ───────────────────────────────────────────────────────────────────────── # Layer 1: SSE Connection Protection # ───────────────────────────────────────────────────────────────────────── # Detect and close dead SSE connections before they cause spin loops. - SSE_SEND_TIMEOUT=30.0 # ASGI send() timeout (default: 30.0) - SSE_RAPID_YIELD_WINDOW_MS=1000 # Detection window in ms (default: 1000) - SSE_RAPID_YIELD_MAX=50 # Max yields before disconnect (default: 50, 0=disabled) # ───────────────────────────────────────────────────────────────────────── # Layer 2: Cleanup Timeouts # ───────────────────────────────────────────────────────────────────────── # Limit how long cleanup waits for stuck tasks. Short timeouts (0.5s) reduce # CPU waste during cancelled connection cleanup. Only affects cleanup, not # normal operation. - MCP_SESSION_POOL_CLEANUP_TIMEOUT=0.5 # Session __aexit__ timeout (default: 5.0) - SSE_TASK_GROUP_CLEANUP_TIMEOUT=0.5 # SSE task group timeout (default: 5.0) # ───────────────────────────────────────────────────────────────────────── # Layer 3: EXPERIMENTAL - anyio Monkey-Patch # ───────────────────────────────────────────────────────────────────────── # Last resort: patches anyio to limit _deliver_cancellation iterations. # Enable only if Layers 1-2 don't fully resolve the issue. # WARNING: May be removed when anyio/MCP SDK fix upstream issue. - ANYIO_CANCEL_DELIVERY_PATCH_ENABLED=true # Enable workaround - TESTING - ANYIO_CANCEL_DELIVERY_MAX_ITERATIONS=500 # Max iterations before giving up (~60ms recovery) # ═══════════════════════════════════════════════════════════════════════════ # Execution Metrics Recording # ═══════════════════════════════════════════════════════════════════════════ # Controls tool/resource/prompt/server/A2A execution metrics (one DB row per operation). # Disable when using external observability to improve performance. # Set to true if you need per-operation metrics in the database. # Note: Does NOT affect log aggregation (METRICS_AGGREGATION_ENABLED) or Prometheus. - DB_METRICS_RECORDING_ENABLED=true # ═══════════════════════════════════════════════════════════════════════════ # Metrics Configuration # ═══════════════════════════════════════════════════════════════════════════ # Raw metrics are deleted after hourly rollups exist (default: 1 hour retention). # Rollups preserve all analytics (counts, p50/p95/p99) for 365 days. # # If using external observability (ELK, Datadog, Splunk), raw metrics are # redundant - your external platform handles debugging and audit trails. # # Configurable settings (uncomment to override defaults): # - METRICS_DELETE_RAW_AFTER_ROLLUP=true # Delete raw after rollup (default) # - METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS=1 # Raw retention when rollup exists # - METRICS_CLEANUP_INTERVAL_HOURS=1 # Cleanup frequency (default: hourly) # - METRICS_RETENTION_DAYS=7 # Fallback retention (rollup disabled) # # For debugging without external observability, increase raw retention: # - METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS=168 # Keep raw data 7 days # Phoenix Observability Integration (uncomment when using Phoenix) # - PHOENIX_ENDPOINT=${PHOENIX_ENDPOINT:-http://phoenix:6006} # - OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT:-http://phoenix:4317} # - OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-mcp-gateway} # - OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-otlp} # - OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-otlp} # - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES:-deployment.environment=docker,service.namespace=mcp} # OpenTelemetry (Tempo / OTLP) - enabled automatically by `make monitoring-up` - LOG_FORMAT=${LOG_FORMAT:-text} # text (human) or json (Loki-friendly) - OTEL_ENABLE_OBSERVABILITY=${OTEL_ENABLE_OBSERVABILITY:-false} - OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-otlp} - OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT:-http://tempo:4317} - OTEL_EXPORTER_OTLP_PROTOCOL=${OTEL_EXPORTER_OTLP_PROTOCOL:-grpc} - OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-mcp-gateway} - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES:-deployment.environment=docker,service.namespace=mcp} # TCP kernel tuning for high-concurrency MCP tool invocations # Each tool call creates a new connection → many TIME_WAIT sockets sysctls: - net.ipv4.tcp_fin_timeout=15 # Faster cleanup of FIN_WAIT2 sockets (default: 60) - net.ipv4.ip_local_port_range=1024 65535 # More ephemeral ports (default: 32768-60999) ulimits: nofile: soft: 65535 hard: 65535 depends_on: # Default stack: PgBouncer + Redis (PgBouncer depends on Postgres) pgbouncer: condition: service_healthy # ▶ wait for connection pooler redis: condition: service_started # Direct PostgreSQL (uncomment if bypassing PgBouncer): # postgres: # condition: service_healthy # migration: # condition: service_completed_successfully healthcheck: ## Uncomment for HTTP healthcheck test: ["CMD", "python3", "-c", "import urllib.request; import json; resp = urllib.request.urlopen('http://localhost:4444/health', timeout=5); data = json.loads(resp.read()); exit(0 if data.get('status') == 'healthy' else 1)"] ## Uncomment for HTTPS healthcheck (requires valid SSL cert) # test: ["CMD", "curl", "-f", "https://localhost:4444/health"] # HTTPS healthcheck with SSL validation skipped (self-signed certs) # test: ["CMD", "curl", "-fk", "https://localhost:4444/health"] interval: 30s timeout: 10s retries: 5 start_period: 30s # Scaling options: # - Single instance: use port 4444 directly, replicas: 1 # - Multi-instance: comment out ports, set replicas: 2+, access via nginx:8080 # ────────────────────────────────────────────────────────────────────── # Server Engine Selection (Default: Granian - Rust-based HTTP server) # ────────────────────────────────────────────────────────────────────── # Default is Granian. For Gunicorn with Uvicorn workers: # command: ["./run-gunicorn.sh"] deploy: mode: replicated replicas: 3 resources: limits: cpus: '8' memory: 8G reservations: cpus: '4' memory: 4G # ────────────────────────────────────────────────────────────────────── # Volume Mounts # ────────────────────────────────────────────────────────────────────── # Mount catalog configuration and SSL certificates volumes: - ./mcp-catalog.yml:/app/mcp-catalog.yml:ro # mount catalog configuration # - ./certs:/app/certs:ro # mount certs folder read-only (includes both SSL and JWT keys) # # SSL/TLS Certificate Setup: # 1. Generate certificates: # - Without passphrase: make certs # - With passphrase: make certs-passphrase # 2. Uncomment the volumes mount above # 3. Set SSL environment variables # 4. If using passphrase-protected key, set KEY_FILE_PASSWORD in .env file # # For JWT asymmetric keys: # 1. Generate keys: make certs-jwt # 2. Uncomment volumes mount above # 3. Switch JWT_ALGORITHM to RS256 and uncomment JWT_*_KEY_PATH variables ############################################################################### # DATABASES - enable ONE of these blocks and adjust DATABASE_URL ############################################################################### postgres: image: postgres:18 shm_size: 256m # Increase from 64MB default to prevent shared memory exhaustion under load ulimits: nofile: soft: 8192 hard: 8192 ports: - "5433:5432" # Expose for baseline load testing (5433 to avoid conflict with local postgres) # Performance tuning for high-load testing (3000 sustained users) # WITH PgBouncer (default): 800 connections provides headroom for 700 pool + system overhead # DIRECT connection mode: increase to 4000 for (3 replicas × 16 workers × 80 pool) command: - "postgres" - "-c" - "max_connections=800" # Must exceed PgBouncer MAX_DB_CONNECTIONS (700) + overhead - "-c" - "shared_buffers=512MB" - "-c" - "work_mem=16MB" - "-c" - "effective_cache_size=1536MB" - "-c" - "maintenance_work_mem=128MB" - "-c" - "checkpoint_completion_target=0.9" - "-c" - "wal_buffers=16MB" - "-c" - "random_page_cost=1.1" - "-c" - "effective_io_concurrency=200" - "-c" - "max_worker_processes=8" # Total background workers (must be >= max_parallel_workers) - "-c" - "max_parallel_workers_per_gather=4" # Max workers per query's parallel operation - "-c" - "max_parallel_workers=8" # Total parallel workers available system-wide # === HIGH-CONCURRENCY TUNING (3000 users) === # CRITICAL: idle_in_transaction_session_timeout prevents connection starvation # Application code now properly closes transactions via get_db() commit-on-success pattern # This timeout is a safety net for any edge cases - "-c" - "idle_in_transaction_session_timeout=300s" # Kill stuck transactions after 300s (aligned with PgBouncer) - "-c" - "statement_timeout=120s" # Kill runaway queries after 120s - "-c" - "synchronous_commit=off" # Async WAL writes (2-10x faster commits) - "-c" - "commit_delay=100" # Batch commits within 100μs window # ═══════════════════════════════════════════════════════════════════════════ # AUTOVACUUM TUNING - High-insert workloads (metrics tables) # ═══════════════════════════════════════════════════════════════════════════ # High insert rates cause dead tuple accumulation. These settings help # PostgreSQL keep up with table bloat from metrics writes. # Uncomment if experiencing performance degradation under sustained load: # - "-c" # - "autovacuum_naptime=30s" # Check more frequently (default: 60s) # - "-c" # - "autovacuum_vacuum_scale_factor=0.05" # Vacuum at 5% dead tuples (default: 0.2) # - "-c" # - "autovacuum_vacuum_cost_limit=1000" # More vacuum work per cycle (default: 200) # === PG_STAT_STATEMENTS + AUTO_EXPLAIN === # Query performance tracking and slow query plan logging # NOTE: Both extensions must be in the SAME shared_preload_libraries line! # After enabling, run in psql: # CREATE EXTENSION IF NOT EXISTS pg_stat_statements; # SELECT * FROM pg_stat_statements ORDER BY total_exec_time DESC LIMIT 10; # - "-c" # - "shared_preload_libraries=pg_stat_statements" # - "shared_preload_libraries=pg_stat_statements,auto_explain" # Use this line to enable both # - "-c" # - "pg_stat_statements.track=all" # - "-c" #- "pg_stat_statements.max=10000" # AUTO_EXPLAIN settings (uncomment if using combined shared_preload_libraries above) # - "-c" # - "auto_explain.log_min_duration=1000" # - "-c" # - "auto_explain.log_analyze=on" # === ROLLBACK DEBUGGING (disabled for performance) === # - "-c" # - "log_min_error_statement=error" # - "-c" # - "log_min_messages=warning" # - "-c" # - "log_error_verbosity=verbose" # - "-c" # - "log_line_prefix=%t [%p]: user=%u,db=%d,app=%a,client=%h " # - "-c" # - "log_lock_waits=on" # - "-c" # - "deadlock_timeout=1s" # - "-c" # - "log_temp_files=0" # - "-c" # - "log_checkpoints=on" # - "-c" # - "log_connections=on" # - "-c" # - "log_disconnections=on" # - "-c" # - "idle_in_transaction_session_timeout=60s" environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=mysecretpassword - POSTGRES_DB=mcp volumes: # - pgdata:/var/lib/postgresql/data - pgdata:/var/lib/postgresql # Enable for postgres 18+ networks: [mcpnet] healthcheck: test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER"] interval: 30s timeout: 5s retries: 5 start_period: 20s deploy: resources: limits: cpus: '4' memory: 8G reservations: cpus: '2' memory: 2G # ────────────────────────────────────────────────────────────────────── # PgBouncer - Connection Pooler for PostgreSQL # Reduces connection overhead, improves throughput under high concurrency. # Enable by switching gateway DATABASE_URL to use pgbouncer:6432 instead of postgres:5432 # ────────────────────────────────────────────────────────────────────── pgbouncer: image: edoburu/pgbouncer:latest restart: unless-stopped networks: [mcpnet] ulimits: nofile: soft: 65536 hard: 65536 ports: - "6432:6432" # PgBouncer port (optional external access) environment: # Connection to upstream PostgreSQL - DATABASE_URL=postgres://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp # PgBouncer listen port (default would be 5432, using 6432 to distinguish from PostgreSQL) - LISTEN_PORT=6432 # Pool mode: transaction (recommended), session, or statement # transaction: connection returned after each transaction (best for web apps) - POOL_MODE=transaction # ═══════════════════════════════════════════════════════════════════════════ # Connection Pool Tuning for 3000 Sustained Users # PgBouncer handles connection multiplexing - many app connections share fewer DB connections # ═══════════════════════════════════════════════════════════════════════════ # Client-side limits (from gateway workers via SQLAlchemy) - MAX_CLIENT_CONN=5000 # Max app connections; must exceed (replicas × workers × pool) - DEFAULT_POOL_SIZE=600 # Shared DB connections; sized for ~70 concurrent tx × 8x headroom - MIN_POOL_SIZE=100 # Pre-warmed connections for instant response to load spikes - RESERVE_POOL_SIZE=150 # Emergency pool for burst traffic beyond DEFAULT_POOL_SIZE - RESERVE_POOL_TIMEOUT=2 # Seconds before tapping reserve pool # Server-side limits (to PostgreSQL) - MAX_DB_CONNECTIONS=700 # Max connections to PostgreSQL; must be < PG max_connections - MAX_USER_CONNECTIONS=700 # Per-user limit; typically equals MAX_DB_CONNECTIONS # Connection lifecycle - SERVER_LIFETIME=3600 # Recycle server connections after 1 hour (prevents stale state) - SERVER_IDLE_TIMEOUT=600 # Close unused server connections after 10 min # Timeout settings - QUERY_WAIT_TIMEOUT=60 # Max wait for available connection before failing request - CLIENT_IDLE_TIMEOUT=60 # Close idle client connections (aligned with IDLE_TRANSACTION_TIMEOUT) - SERVER_CONNECT_TIMEOUT=5 # Timeout for new connections to PostgreSQL # Transaction cleanup - critical for avoiding idle-in-transaction buildup # NOTE: In transaction pooling, session-level advisory locks (used by migrations) # can stick unless the reset query clears them; DISCARD ALL is safest. - SERVER_RESET_QUERY=DISCARD ALL # Reset connection state when returned to pool - SERVER_RESET_QUERY_ALWAYS=1 # Always run reset query even after clean transactions - IDLE_TRANSACTION_TIMEOUT=30 # Kill transactions idle > 30s to prevent connection pool exhaustion # Authentication - AUTH_TYPE=scram-sha-256 # Match PostgreSQL auth method depends_on: postgres: condition: service_healthy healthcheck: test: ["CMD", "pg_isready", "-h", "localhost", "-p", "6432"] interval: 10s timeout: 5s retries: 3 start_period: 10s deploy: resources: limits: cpus: '1' memory: 256M reservations: cpus: '0.5' memory: 128M # migration: # #image: ghcr.io/ibm/mcp-context-forge:0.7.0 # Testing migration from 0.7.0 # image: mcpgateway/mcpgateway:latest # Use the local latest image. Run `make docker-prod` to build it. # build: # context: . # dockerfile: Containerfile # environment: # - DATABASE_URL=postgresql+psycopg://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp # command: alembic -c mcpgateway/alembic.ini upgrade head # depends_on: # postgres: # condition: service_healthy # networks: [mcpnet] ############################################################################### # CACHE ############################################################################### redis: image: redis:latest ulimits: nofile: soft: 65536 hard: 65536 # Performance tuning for 1000+ RPS high-concurrency load testing command: - "redis-server" - "--maxmemory" - "1gb" - "--maxmemory-policy" - "allkeys-lru" - "--tcp-backlog" - "2048" - "--timeout" - "0" - "--tcp-keepalive" - "300" - "--maxclients" - "10000" ports: - "6379:6379" # expose only if you want host access networks: [mcpnet] deploy: resources: limits: cpus: '2' memory: 2G reservations: cpus: '1' memory: 1G ############################################################################### # MONITORING STACK (enabled with --profile monitoring) # Usage: docker compose --profile monitoring up -d # Access: Grafana http://localhost:3000 (admin/changeme) # Prometheus http://localhost:9090 ############################################################################### # ────────────────────────────────────────────────────────────────────── # Prometheus PostgreSQL Exporter - Database metrics # Metrics: connections, query duration, locks, cache hit ratio # ────────────────────────────────────────────────────────────────────── postgres_exporter: image: quay.io/prometheuscommunity/postgres-exporter:latest restart: unless-stopped networks: [mcpnet] ports: - "9187:9187" # http://localhost:9187/metrics environment: - DATA_SOURCE_NAME=postgresql://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@postgres:5432/mcp?sslmode=disable - PG_EXPORTER_AUTO_DISCOVER_DATABASES=true depends_on: postgres: condition: service_healthy profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Prometheus Redis Exporter - Cache metrics # Metrics: memory, clients, commands/sec, keyspace stats # ────────────────────────────────────────────────────────────────────── redis_exporter: image: oliver006/redis_exporter:latest restart: unless-stopped networks: [mcpnet] ports: - "9121:9121" # http://localhost:9121/metrics environment: - REDIS_ADDR=redis://redis:6379 depends_on: redis: condition: service_started profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Prometheus PgBouncer Exporter - Connection pool metrics # Metrics: active/waiting clients, server connections, pool stats # ────────────────────────────────────────────────────────────────────── pgbouncer_exporter: image: prometheuscommunity/pgbouncer-exporter:latest restart: unless-stopped networks: [mcpnet] ports: - "9127:9127" # http://localhost:9127/metrics environment: - PGBOUNCER_EXPORTER_CONNECTION_STRING=postgres://postgres:${POSTGRES_PASSWORD:-mysecretpassword}@pgbouncer:6432/pgbouncer?sslmode=disable depends_on: pgbouncer: condition: service_healthy profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Prometheus Nginx Exporter - Proxy metrics # Metrics: active connections, requests/sec, response codes # Requires stub_status enabled in nginx.conf (location /nginx_status) # ────────────────────────────────────────────────────────────────────── nginx_exporter: image: nginx/nginx-prometheus-exporter:latest restart: unless-stopped networks: [mcpnet] ports: - "9113:9113" # http://localhost:9113/metrics command: - '-nginx.scrape-uri=http://nginx:80/nginx_status' depends_on: nginx: condition: service_healthy profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # cAdvisor - Container metrics (CPU, memory, network, disk I/O) # Metrics: container_cpu_usage_seconds_total, container_memory_usage_bytes # Dashboard: Grafana ID 14282 (Docker and cAdvisor) # ────────────────────────────────────────────────────────────────────── cadvisor: image: gcr.io/cadvisor/cadvisor:latest restart: unless-stopped networks: [mcpnet] ports: - "8085:8080" # http://localhost:8085/metrics privileged: true volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Prometheus - Metrics collection and storage # Scrapes: gateway, postgres, redis, nginx, cadvisor # Retention: 7 days (configurable via --storage.tsdb.retention.time) # ────────────────────────────────────────────────────────────────────── prometheus: image: prom/prometheus:latest restart: unless-stopped networks: [mcpnet] ports: - "9090:9090" # http://localhost:9090 volumes: - ./infra/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheusdata:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.retention.time=7d' - '--web.enable-lifecycle' depends_on: - postgres_exporter - redis_exporter - nginx_exporter - cadvisor profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Loki - Log aggregation system (like Prometheus, but for logs) # Query logs with LogQL in Grafana # ────────────────────────────────────────────────────────────────────── loki: image: grafana/loki:latest restart: unless-stopped networks: [mcpnet] user: "0" # Run as root to avoid permission issues ports: - "3100:3100" # http://localhost:3100/ready volumes: - ./infra/monitoring/loki/loki-config.yaml:/etc/loki/local-config.yaml:ro - lokidata:/loki command: -config.file=/etc/loki/local-config.yaml profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Tempo - Distributed tracing backend (OTLP receiver + TraceQL query API) # Completes the Grafana observability stack: Prometheus (metrics) + Loki (logs) + Tempo (traces) # Query traces via Grafana Explore → Tempo datasource using TraceQL # ────────────────────────────────────────────────────────────────────── tempo: image: grafana/tempo:latest restart: unless-stopped networks: [mcpnet] ports: - "3200:3200" # Tempo HTTP API / query frontend - "4317:4317" # OTLP gRPC receiver - "4318:4318" # OTLP HTTP receiver command: ["-config.file=/etc/tempo.yaml"] volumes: - ./infra/monitoring/tempo/tempo.yaml:/etc/tempo.yaml:ro - tempodata:/var/tempo profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Promtail - Log collector for Loki # Collects logs from all containers via Docker socket # ────────────────────────────────────────────────────────────────────── promtail: image: grafana/promtail:latest restart: unless-stopped networks: [mcpnet] volumes: - ./infra/monitoring/loki/promtail-config.yaml:/etc/promtail/config.yaml:ro - /var/run/docker.sock:/var/run/docker.sock:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro command: -config.file=/etc/promtail/config.yaml depends_on: - loki profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Grafana - Dashboard visualization # Default login: admin / changeme # Recommended dashboards: # - Docker/cAdvisor: 14282 # - PostgreSQL: 9628 # - Redis: 763 # - Nginx: 12708 # ────────────────────────────────────────────────────────────────────── grafana: image: grafana/grafana:latest restart: unless-stopped networks: [mcpnet] user: "0" # Run as root to avoid permission issues with provisioning ports: - "3000:3000" # http://localhost:3000 environment: - GF_SECURITY_ADMIN_PASSWORD=changeme - GF_USERS_ALLOW_SIGN_UP=false volumes: - grafanadata:/var/lib/grafana - ./infra/monitoring/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro - ./infra/monitoring/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro depends_on: - prometheus - loki - tempo profiles: ["monitoring"] ############################################################################### # OPTIONAL ADMIN TOOLS - handy web UIs for DB & cache (disabled by default) ############################################################################### pgadmin: # 🔧 Postgres admin UI image: dpage/pgadmin4:9.11.0 environment: - PGADMIN_DEFAULT_EMAIL=admin@example.com - PGADMIN_DEFAULT_PASSWORD=changeme ports: - "5050:80" # http://localhost:5050 volumes: - pgadmindata:/var/lib/pgadmin networks: [mcpnet] depends_on: postgres: condition: service_healthy profiles: ["monitoring"] # ────────────────────────────────────────────────────────────────────── # Redis Commander - a web-based Redis GUI # ────────────────────────────────────────────────────────────────────── redis_commander: # 🔧 Redis key browser image: rediscommander/redis-commander:latest restart: unless-stopped networks: [mcpnet] depends_on: redis: condition: service_started ports: - "8081:8081" # http://localhost:8081 environment: - REDIS_HOSTS=local:redis:6379 - HTTP_USER=admin - HTTP_PASSWORD=changeme profiles: ["monitoring"] # # ────────────────────────────────────────────────────────────────────── # # Redis Insight - a powerful Redis GUI (recently updated) # # ────────────────────────────────────────────────────────────────────── # redis_insight: # 🔧 Redis Insight GUI # image: redis/redisinsight:latest # container_name: redisinsight # restart: unless-stopped # networks: [mcpnet] # ports: # - "5540:5540" # Redis Insight UI (default 5540) # depends_on: # Default stack: Postgres + Redis # redis: # condition: service_started # # ────────────────────────────────────────────────────────────────────── # # Persist data (config, logs, history) between restarts # # ────────────────────────────────────────────────────────────────────── # # volumes: # # - ./redisinsight_data:/data # volumes: # - redisinsight_data:/data # <- persist data in named volume # # ────────────────────────────────────────────────────────────────────── # # Preconfigure Redis connection(s) via env vars # # ────────────────────────────────────────────────────────────────────── # environment: # # Single connection (omit "*" since only one): # - RI_REDIS_HOST=redis # <- your Redis hostname # - RI_REDIS_PORT=6379 # <- your Redis port # - RI_REDIS_USERNAME=default # <- ACL/username (Redis 6+) # #- RI_REDIS_PASSWORD=changeme # <- Redis AUTH password # #- RI_REDIS_TLS=true # <- enable TLS # # Optional: validate self-signed CA instead of trusting all: # # - RI_REDIS_TLS_CA_PATH=/certs/selfsigned.crt # # - RI_REDIS_TLS_CERT_PATH=/certs/client.crt # # - RI_REDIS_TLS_KEY_PATH=/certs/client.key # # - RI_REDIS_TLS=true # (already set above) # # ────────────────────────────────────────────────────────────────── # # Core Redis Insight settings # # ────────────────────────────────────────────────────────────────── # - RI_APP_HOST=0.0.0.0 # <- listen on all interfaces # - RI_APP_PORT=5540 # <- UI port (container-side) ############################################################################### # OPTIONAL MCP SERVERS - drop-in helpers the Gateway can call ############################################################################### ############################################################################### # Fast Time Server - High-performance time/timezone service for MCP # Uses pre-built image by default. On ARM64, build locally: # FAST_TIME_IMAGE=mcpgateway/fast-time-server:local docker compose build fast_time_server ############################################################################### fast_time_server: image: ${FAST_TIME_IMAGE:-ghcr.io/ibm/fast-time-server:latest} build: context: ./mcp-servers/go/fast-time-server dockerfile: Dockerfile restart: unless-stopped networks: [mcpnet] ports: - "8888:8080" # Map host port 8888 to container port 8080 # Use dual mode for both SSE (/sse) and Streamable HTTP (/http) endpoints command: ["-transport=dual", "-listen=0.0.0.0", "-port=8080", "-log-level=info"] ############################################################################### # Auto-registration service - registers fast_time_server with gateway ############################################################################### register_fast_time: image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} networks: [mcpnet] depends_on: gateway: condition: service_healthy fast_time_server: condition: service_started environment: - JWT_SECRET_KEY=my-test-key # This is a one-shot container that exits after registration restart: "no" entrypoint: ["/bin/sh", "-c"] command: - | echo "Using latest gateway image with current JWT utility..." echo "Waiting for services to be ready..." # Wait for gateway to be ready using Python python3 -c " import time import urllib.request import urllib.error for i in range(1, 61): try: with urllib.request.urlopen('http://gateway:4444/health', timeout=2) as response: if response.status == 200: print('✅ gateway is healthy') break except: pass print(f'Waiting for gateway... ({i}/60)') time.sleep(2) else: print('❌ Gateway failed to become healthy') exit(1) " # Wait for fast_time_server to be ready using Python python3 -c " import time import urllib.request import urllib.error for i in range(1, 31): try: with urllib.request.urlopen('http://fast_time_server:8080/health', timeout=2) as response: if response.status == 200: print('✅ fast_time_server is healthy') break except: pass print(f'Waiting for fast_time_server... ({i}/30)') time.sleep(2) else: print('❌ Fast time server failed to become healthy') exit(1) " echo "Generating JWT token..." echo "Environment: JWT_SECRET_KEY=$$JWT_SECRET_KEY" echo "Running: python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256" # Only capture stdout (the token), let warnings go to stderr export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) echo "Generated token: $$MCPGATEWAY_BEARER_TOKEN" # Decode the token to verify it has expiration echo "Decoding token to verify claims..." python3 -m mcpgateway.utils.create_jwt_token --decode "$$MCPGATEWAY_BEARER_TOKEN" 2>/dev/null || echo "Failed to decode token" # Test authentication first echo "Testing authentication..." # Use Python to make HTTP requests python3 -c " import urllib.request import urllib.error import json import sys import os import time token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '') def api_request(method, path, data=None): '''Helper to make authenticated API requests.''' url = f'http://gateway:4444{path}' req = urllib.request.Request(url, method=method) req.add_header('Authorization', f'Bearer {token}') req.add_header('Content-Type', 'application/json') if data: req.data = json.dumps(data).encode('utf-8') with urllib.request.urlopen(req) as response: return json.loads(response.read().decode('utf-8')) # Test version endpoint without auth print('Checking gateway config...') try: with urllib.request.urlopen('http://gateway:4444/version') as response: data = response.read().decode('utf-8') print(f'Gateway version response (no auth): {data[:200]}') except Exception as e: print(f'Version check failed: {e}') # Test version endpoint with auth print('Testing authentication...') try: req = urllib.request.Request('http://gateway:4444/version') req.add_header('Authorization', f'Bearer {token}') with urllib.request.urlopen(req) as response: data = response.read().decode('utf-8') print(f'Auth test response: SUCCESS') auth_success = True except Exception as e: print(f'Auth test response: FAILED - {e}') auth_success = False # Register fast_time_server with gateway using Streamable HTTP transport print('Registering fast_time_server with gateway (Streamable HTTP)...') # First check if gateway already exists and delete it gateway_id = None try: gateways = api_request('GET', '/gateways') for gw in gateways: if gw.get('name') == 'fast_time': print(f'Found existing gateway {gw[\"id\"]}, deleting...') api_request('DELETE', f'/gateways/{gw[\"id\"]}') print('Deleted existing gateway') except Exception as e: print(f'Note: Could not check/delete existing gateway: {e}') # Delete existing virtual server if present (using fixed ID) VIRTUAL_SERVER_ID = '9779b6698cbd4b4995ee04a4fab38737' try: api_request('DELETE', f'/servers/{VIRTUAL_SERVER_ID}') print(f'Deleted existing virtual server {VIRTUAL_SERVER_ID}') except Exception as e: print(f'Note: No existing virtual server to delete (or error: {e})') # Register the gateway try: result = api_request('POST', '/gateways', { 'name': 'fast_time', 'url': 'http://fast_time_server:8080/http', 'transport': 'STREAMABLEHTTP' }) print(f'Registration response: {result}') if 'id' in result: gateway_id = result['id'] print(f'✅ Successfully registered fast_time_server (gateway_id: {gateway_id})') else: print('❌ Registration failed - no ID in response') sys.exit(1) except Exception as e: print(f'❌ Registration failed: {e}') sys.exit(1) # Wait for tools to be synced from the gateway print('Waiting for tools/resources/prompts to sync...') for i in range(30): time.sleep(1) try: tools = api_request('GET', '/tools') # Filter tools from fast_time gateway (note: camelCase gatewayId) fast_time_tools = [t for t in tools if t.get('gatewayId') == gateway_id] if fast_time_tools: print(f'Found {len(fast_time_tools)} tools from fast_time gateway') break except Exception as e: pass print(f'Waiting for sync... ({i+1}/30)') else: print('⚠️ No tools synced, continuing anyway...') # Fetch all tools, resources, and prompts # Note: Tools use gatewayId (camelCase), resources/prompts from catalog have no gatewayId tool_ids = [] resource_ids = [] prompt_ids = [] try: tools = api_request('GET', '/tools') # Get tools from the fast_time gateway tool_ids = [t['id'] for t in tools if t.get('gatewayId') == gateway_id] print(f'Found tools: {[t[\"name\"] for t in tools if t.get(\"gatewayId\") == gateway_id]}') except Exception as e: print(f'Failed to fetch tools: {e}') try: resources = api_request('GET', '/resources') # Include all resources (from catalog) resource_ids = [r['id'] for r in resources] print(f'Found resources: {[r[\"name\"] for r in resources]}') except Exception as e: print(f'Failed to fetch resources: {e}') try: prompts = api_request('GET', '/prompts') # Include all prompts (from catalog) prompt_ids = [p['id'] for p in prompts] print(f'Found prompts: {[p[\"name\"] for p in prompts]}') except Exception as e: print(f'Failed to fetch prompts: {e}') # Create virtual server with all tools, resources, and prompts print('Creating virtual server...') try: # API expects payload wrapped in 'server' key # Use fixed UUID for consistent server ID across restarts server_payload = { 'server': { 'id': '9779b6698cbd4b4995ee04a4fab38737', 'name': 'Fast Time Server', 'description': 'Virtual server exposing Fast Time MCP tools, resources, and prompts', 'associated_tools': tool_ids, 'associated_resources': resource_ids, 'associated_prompts': prompt_ids } } result = api_request('POST', '/servers', server_payload) print(f'Virtual server created: {result}') print(f'✅ Successfully created virtual server with {len(tool_ids)} tools, {len(resource_ids)} resources, {len(prompt_ids)} prompts') except Exception as e: print(f'❌ Failed to create virtual server: {e}') sys.exit(1) " # Write the bearer token to a file for load testing echo "Writing bearer token to /tmp/gateway-token.txt..." echo "$$MCPGATEWAY_BEARER_TOKEN" > /tmp/gateway-token.txt echo "Token written to /tmp/gateway-token.txt" echo "✅ Setup complete!" ############################################################################### # Fast Test Server - Ultra-fast Rust MCP server for performance testing # Provides: echo, get_system_time, get_stats tools via MCP Streamable HTTP # Also exposes REST API endpoints for baseline comparison # Usage: docker compose --profile testing up -d ############################################################################### fast_test_server: build: context: ./mcp-servers/rust/fast-test-server dockerfile: Containerfile image: mcpgateway/fast-test-server:latest restart: unless-stopped networks: [mcpnet] ports: - "8880:8880" # Port 8880 (avoids conflict with benchmark servers on 9000+) environment: - BIND_ADDRESS=0.0.0.0:8880 - RUST_LOG=info # TCP kernel tuning for high-concurrency load testing sysctls: - net.ipv4.tcp_fin_timeout=15 # Faster cleanup of FIN_WAIT2 sockets - net.ipv4.ip_local_port_range=1024 65535 # More ephemeral ports - net.core.somaxconn=65535 # Max listen backlog ulimits: nofile: soft: 65535 hard: 65535 healthcheck: test: ["CMD", "curl", "-sf", "http://localhost:8880/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s deploy: resources: limits: cpus: '2' memory: 1G reservations: cpus: '0.5' memory: 128M profiles: ["testing", "monitoring"] ############################################################################### # Auto-registration service - registers fast_test_server with gateway ############################################################################### register_fast_test: image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} networks: [mcpnet] depends_on: gateway: condition: service_healthy fast_test_server: condition: service_healthy environment: - JWT_SECRET_KEY=my-test-key restart: "no" entrypoint: ["/bin/sh", "-c"] command: - | echo "Registering fast_test_server with gateway..." # Generate JWT token export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) # Register using Python python3 -c " import urllib.request import json import os import time token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '') def api_request(method, path, data=None): url = f'http://gateway:4444{path}' req = urllib.request.Request(url, method=method) req.add_header('Authorization', f'Bearer {token}') req.add_header('Content-Type', 'application/json') if data: req.data = json.dumps(data).encode('utf-8') with urllib.request.urlopen(req) as response: return json.loads(response.read().decode('utf-8')) # Delete existing gateway if present try: gateways = api_request('GET', '/gateways') for gw in gateways: if gw.get('name') == 'fast_test': print(f'Deleting existing gateway {gw[\"id\"]}...') api_request('DELETE', f'/gateways/{gw[\"id\"]}') except Exception as e: print(f'Note: {e}') # Register the gateway try: result = api_request('POST', '/gateways', { 'name': 'fast_test', 'url': 'http://fast_test_server:8880/mcp', 'transport': 'STREAMABLEHTTP' }) print(f'✅ Registered fast_test_server: {result.get(\"id\", \"unknown\")}') except Exception as e: print(f'❌ Registration failed: {e}') exit(1) " echo "✅ Registration complete!" profiles: ["testing", "monitoring"] ############################################################################### # A2A Echo Agent - Lightweight A2A-compatible agent for end-to-end testing # Provides a simple JSON-RPC endpoint and A2A discovery card (no LLM dependency) # Usage: docker compose --profile testing up -d ############################################################################### a2a_echo_agent: build: context: ./a2a-agents/go/a2a-echo-agent dockerfile: Dockerfile image: mcpgateway/a2a-echo-agent:latest restart: unless-stopped networks: [mcpnet] ports: - "9100:9100" environment: - A2A_ECHO_ADDR=0.0.0.0:9100 - A2A_ECHO_NAME=a2a-echo-agent - A2A_ECHO_LOG_LEVEL=info healthcheck: test: ["CMD", "wget", "-qO-", "http://localhost:9100/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s deploy: resources: limits: cpus: '1' memory: 256M reservations: cpus: '0.25' memory: 64M profiles: ["testing"] ############################################################################### # Auto-registration service - registers a2a_echo_agent with gateway ############################################################################### register_a2a_echo: image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} networks: [mcpnet] depends_on: gateway: condition: service_healthy a2a_echo_agent: condition: service_healthy environment: - JWT_SECRET_KEY=my-test-key restart: "no" entrypoint: ["/bin/sh", "-c"] command: - | echo "Registering a2a_echo_agent with gateway..." # Generate JWT token (stdout only) export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) python3 -c " import json import os import urllib.request token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '') def api_request(method, path, data=None): url = f'http://gateway:4444{path}' body = json.dumps(data).encode('utf-8') if data is not None else None req = urllib.request.Request(url, data=body, method=method) req.add_header('Authorization', f'Bearer {token}') req.add_header('Content-Type', 'application/json') with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read().decode('utf-8')) # Delete existing agent if present try: agents = api_request('GET', '/a2a') items = agents if isinstance(agents, list) else agents.get('agents', agents.get('items', [])) for a in items: if a.get('name') == 'a2a-echo-agent': print(f'Deleting existing A2A agent {a.get(\"id\")}...') api_request('DELETE', f'/a2a/{a.get(\"id\")}') except Exception as e: print(f'Note: {e}') # Register agent (JSON-RPC endpoint at /) payload = { 'agent': { 'name': 'a2a-echo-agent', 'description': 'Lightweight A2A echo agent for docker-compose testing', 'endpoint_url': 'http://a2a_echo_agent:9100/', 'agent_type': 'jsonrpc', 'protocol_version': '0.3.0', 'capabilities': {'echo': True, 'preferredTransport': 'JSONRPC'}, 'tags': ['testing', 'a2a', 'echo'] }, 'visibility': 'public' } result = api_request('POST', '/a2a', payload) print(f'✅ Registered a2a_echo_agent: {result.get(\"id\", \"unknown\")}') " echo "✅ Registration complete!" profiles: ["testing"] ############################################################################### # Locust JWT token generator - produces a gateway JWT for containerized load tests ############################################################################### locust_token: image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} networks: [mcpnet] restart: "no" # The gateway image runs as non-root (uid 1001). Docker named volumes are # root-owned by default, so writing /tokens/gateway.jwt can fail unless we # run this one-shot init container as root. user: "0" volumes: - locust_token:/tokens entrypoint: ["/bin/sh", "-c"] command: - | set -eu echo "Generating JWT token for Locust..." TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) printf "%s" "$$TOKEN" > /tokens/gateway.jwt echo "" echo "✅ Token written to /tokens/gateway.jwt" profiles: ["testing"] ############################################################################### # Locust - Containerized load testing with web UI (master + optional workers) # Web UI: http://localhost:8089 # Target: http://nginx:80 (internal network) ############################################################################### locust: image: locustio/locust:latest restart: unless-stopped networks: [mcpnet] # Run as the host UID/GID (set by Makefile) so reports written to ./reports # don't end up root-owned and so non-1000 host users don't hit EACCES. user: "${HOST_UID:-1000}:${HOST_GID:-1000}" ports: - "8089:8089" working_dir: /mnt/locust volumes: - ./tests/loadtest:/mnt/locust:ro - locust_token:/tokens:ro - ./reports:/mnt/reports depends_on: gateway: condition: service_healthy nginx: condition: service_healthy locust_token: condition: service_completed_successfully entrypoint: ["/bin/sh", "-c"] command: - | set -eu while [ ! -s /tokens/gateway.jwt ]; do echo "Waiting for gateway JWT..."; sleep 0.5; done export MCPGATEWAY_BEARER_TOKEN="$$(cat /tokens/gateway.jwt)" MODE="$${LOCUST_MODE:-master}" if [ "$$MODE" = "headless" ]; then exec locust -f /mnt/locust/locustfile.py \ --host=http://nginx:80 \ --users="$${LOCUST_USERS:-100}" \ --spawn-rate="$${LOCUST_SPAWN_RATE:-10}" \ --run-time="$${LOCUST_RUN_TIME:-5m}" \ --headless \ --html=/mnt/reports/locust_report.html \ --csv=/mnt/reports/locust \ --only-summary fi exec locust -f /mnt/locust/locustfile.py \ --host=http://nginx:80 \ --web-host=0.0.0.0 --web-port=8089 \ --master --expect-workers="$${LOCUST_EXPECT_WORKERS:-1}" \ --class-picker environment: - HOME=/tmp - LOCUST_EXPECT_WORKERS=${LOCUST_EXPECT_WORKERS:-1} - LOCUST_MODE=${LOCUST_MODE:-master} # master (default) or headless - LOCUST_USERS=${LOCUST_USERS:-100} - LOCUST_SPAWN_RATE=${LOCUST_SPAWN_RATE:-10} - LOCUST_RUN_TIME=${LOCUST_RUN_TIME:-5m} deploy: resources: limits: cpus: '2' memory: 1G reservations: cpus: '0.5' memory: 128M profiles: ["testing"] locust_worker: image: locustio/locust:latest restart: unless-stopped networks: [mcpnet] user: "${HOST_UID:-1000}:${HOST_GID:-1000}" working_dir: /mnt/locust volumes: - ./tests/loadtest:/mnt/locust:ro - locust_token:/tokens:ro depends_on: locust: condition: service_started locust_token: condition: service_completed_successfully entrypoint: ["/bin/sh", "-c"] command: - | set -eu while [ ! -s /tokens/gateway.jwt ]; do echo "Waiting for gateway JWT..."; sleep 0.5; done export MCPGATEWAY_BEARER_TOKEN="$$(cat /tokens/gateway.jwt)" exec locust -f /mnt/locust/locustfile.py \ --host=http://nginx:80 \ --worker --master-host=locust environment: - HOME=/tmp deploy: resources: limits: cpus: '2' memory: 1G reservations: cpus: '0.5' memory: 128M profiles: ["testing"] ############################################################################### # Benchmark Server - Multi-server MCP benchmark tool # Spawns multiple lightweight MCP servers for load testing # Usage: make benchmark-up (or: docker compose --profile benchmark up -d) # # Environment variables: # BENCHMARK_SERVER_COUNT - Number of servers to spawn (default: 10) # BENCHMARK_START_PORT - Starting port number (default: 9000) ############################################################################### benchmark_server: build: context: ./mcp-servers/go/benchmark-server dockerfile: Dockerfile image: mcpgateway/benchmark-server:latest restart: unless-stopped networks: [mcpnet] command: - "-transport=http" - "-server-count=${BENCHMARK_SERVER_COUNT:-10}" - "-start-port=${BENCHMARK_START_PORT:-9000}" - "-tools=50" - "-resources=20" - "-prompts=10" ports: # Port range supports up to 100 servers (9000-9099) # Actual servers spawned controlled by BENCHMARK_SERVER_COUNT - "9000-9099:9000-9099" # Note: No healthcheck - scratch-based Go image has no shell # Verify health via: curl http://localhost:9000/health deploy: resources: limits: cpus: '2' memory: 1G reservations: cpus: '0.5' memory: 256M profiles: ["benchmark"] ############################################################################### # Auto-registration service - registers benchmark servers with gateway # Uses BENCHMARK_SERVER_COUNT and BENCHMARK_START_PORT environment variables ############################################################################### register_benchmark: image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} networks: [mcpnet] depends_on: gateway: condition: service_healthy benchmark_server: condition: service_started environment: - JWT_SECRET_KEY=my-test-key - BENCHMARK_SERVER_COUNT=${BENCHMARK_SERVER_COUNT:-10} - BENCHMARK_START_PORT=${BENCHMARK_START_PORT:-9000} restart: "no" entrypoint: ["/bin/sh", "-c"] command: - | echo "Registering benchmark servers with gateway..." # Wait for benchmark servers to start (no healthcheck available) echo "Waiting for benchmark servers to start..." sleep 5 # Generate JWT token export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) # Register benchmark servers using environment variables python3 -c " import urllib.request import json import os token = os.environ.get('MCPGATEWAY_BEARER_TOKEN', '') server_count = int(os.environ.get('BENCHMARK_SERVER_COUNT', '10')) start_port = int(os.environ.get('BENCHMARK_START_PORT', '9000')) headers = { 'Authorization': f'Bearer {token}', 'Content-Type': 'application/json' } def api_request(method, path, data=None): url = f'http://gateway:4444{path}' body = json.dumps(data).encode() if data else None req = urllib.request.Request(url, data=body, headers=headers, method=method) with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read().decode()) # Register benchmark servers print(f'Registering {server_count} benchmark servers (ports {start_port}-{start_port + server_count - 1})...') registered = 0 for port in range(start_port, start_port + server_count): name = f'benchmark-{port}' try: result = api_request('POST', '/gateways', { 'name': name, 'url': f'http://benchmark_server:{port}/mcp', 'transport': 'STREAMABLEHTTP' }) print(f'✅ Registered {name}: {result.get(\"id\", \"unknown\")}') registered += 1 except urllib.error.HTTPError as e: if e.code == 409: print(f'⏭️ {name} already registered') registered += 1 else: print(f'❌ Failed to register {name}: HTTP {e.code}') except Exception as e: print(f'❌ Failed to register {name}: {e}') print(f'✅ Registration complete: {registered}/{server_count} benchmark servers') " profiles: ["benchmark"] ############################################################################### # TLS PROFILE - Zero-config HTTPS via Nginx (enabled with --profile tls) # Usage: make compose-tls (or: docker compose --profile tls up -d) # # Features: # - Auto-generates self-signed certificates on first run # - Supports custom certificates (CA-signed or your own) # - Supports passphrase-protected keys (auto-decrypted for nginx) # - HTTPS on port 8443, HTTP on port 8080 (both available) # - Compatible with other profiles: --profile tls --profile monitoring # # ═══════════════════════════════════════════════════════════════════════════ # Bringing Your Own Certificates # ═══════════════════════════════════════════════════════════════════════════ # # Option 1: Unencrypted Private Key (no passphrase) # ─────────────────────────────────────────────────────────────────────────── # mkdir -p certs # cp /path/to/your/certificate.pem certs/cert.pem # cp /path/to/your/private-key.pem certs/key.pem # make compose-tls # # Option 2: Passphrase-Protected Private Key # ─────────────────────────────────────────────────────────────────────────── # mkdir -p certs # cp /path/to/your/certificate.pem certs/cert.pem # cp /path/to/your/encrypted-key.pem certs/key-encrypted.pem # echo "KEY_FILE_PASSWORD=your-passphrase" >> .env # make compose-tls # # The cert_init service will automatically decrypt key-encrypted.pem to # key.pem for nginx (nginx doesn't support passphrase-protected keys). # # Option 3: Generate Self-Signed with Passphrase # ─────────────────────────────────────────────────────────────────────────── # make certs-passphrase # Generates cert + key-encrypted.pem # echo "KEY_FILE_PASSWORD=your-passphrase" >> .env # make compose-tls # Auto-decrypts for nginx ############################################################################### # ────────────────────────────────────────────────────────────────────── # Certificate Initialization - Auto-generates self-signed certs if missing # Supports passphrase-protected keys via KEY_FILE_PASSWORD # ────────────────────────────────────────────────────────────────────── cert_init: image: alpine/openssl:latest volumes: - ./certs:/certs environment: - KEY_FILE_PASSWORD=${KEY_FILE_PASSWORD:-} entrypoint: ["/bin/sh", "-c"] command: - | # Check if we have an encrypted key that needs decryption if [ -f /certs/key-encrypted.pem ] && [ -n "${KEY_FILE_PASSWORD}" ]; then # Validate: encrypted key requires matching certificate if [ ! -f /certs/cert.pem ]; then echo "❌ Found key-encrypted.pem but cert.pem is missing" echo " Please provide both files: cert.pem and key-encrypted.pem" exit 1 fi echo "🔓 Decrypting passphrase-protected key for nginx..." if [ -f /certs/key.pem ]; then echo "⚠️ Overwriting existing key.pem with decrypted version" fi # Decrypt the key for nginx (nginx doesn't support passphrase-protected keys) # Using env: prefix to avoid exposing password in process listing openssl rsa -in /certs/key-encrypted.pem -out /certs/key.pem -passin env:KEY_FILE_PASSWORD if [ $? -eq 0 ]; then chmod 640 /certs/key.pem echo "✅ Successfully decrypted key-encrypted.pem to key.pem" else echo "❌ Failed to decrypt key-encrypted.pem - check KEY_FILE_PASSWORD" exit 1 fi fi # Check if we already have unencrypted certs if [ -f /certs/cert.pem ] && [ -f /certs/key.pem ]; then echo "✅ Certificates found in ./certs - using existing" exit 0 fi # Generate new self-signed certificate (without passphrase for nginx) echo "🔏 Generating self-signed TLS certificate..." mkdir -p /certs openssl req -x509 -newkey rsa:4096 -sha256 -days 365 -nodes \ -keyout /certs/key.pem -out /certs/cert.pem \ -subj "/CN=localhost" \ -addext "subjectAltName=DNS:localhost,DNS:gateway,DNS:nginx,IP:127.0.0.1" chmod 644 /certs/cert.pem chmod 640 /certs/key.pem echo "✅ TLS certificate generated in ./certs" profiles: ["tls"] # ────────────────────────────────────────────────────────────────────── # Nginx TLS - HTTPS-enabled reverse proxy (overrides default nginx) # ────────────────────────────────────────────────────────────────────── nginx_tls: build: context: ./infra/nginx dockerfile: Dockerfile image: mcpgateway/nginx-cache:latest restart: unless-stopped ports: - "8080:80" # HTTP caching proxy (public-facing) - "8443:443" # HTTPS caching proxy (public-facing) networks: [mcpnet] environment: # Set to "true" to force all HTTP requests to redirect to HTTPS - NGINX_FORCE_HTTPS=${NGINX_FORCE_HTTPS:-false} depends_on: gateway: condition: service_healthy cert_init: condition: service_completed_successfully volumes: - nginx_cache:/var/cache/nginx # Persistent cache storage - ./infra/nginx/nginx-tls.conf:/etc/nginx/nginx.conf:ro # TLS-enabled config - ./certs:/app/certs:ro # Mount SSL certs # TCP kernel tuning for 3000 concurrent connections sysctls: - net.ipv4.tcp_fin_timeout=15 - net.ipv4.ip_local_port_range=1024 65535 ulimits: nofile: soft: 65535 hard: 65535 healthcheck: test: ["CMD", "curl", "-fk", "https://localhost/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s deploy: resources: limits: cpus: '4' memory: 1G reservations: cpus: '2' memory: 512M profiles: ["tls"] ############################################################################### # MCP INSPECTOR - Interactive MCP client for debugging and testing # Usage: make inspector-up (or: docker compose --profile inspector up -d) # Access: http://localhost:6274 # # Connect to the gateway's virtual server from the Inspector UI: # 1. Transport: Streamable HTTP # 2. URL: http://nginx:80/servers/9779b6698cbd4b4995ee04a4fab38737/mcp # 3. Add header: Authorization: Bearer <your-jwt-token> # # Generate a JWT token: # python -m mcpgateway.utils.create_jwt_token \ # --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 ############################################################################### mcp_inspector: image: ghcr.io/modelcontextprotocol/inspector:latest restart: unless-stopped networks: [mcpnet] ports: - "6274:6274" # Inspector web UI - "6277:6277" # Inspector MCP proxy server environment: - HOST=0.0.0.0 # Bind to all interfaces (required in Docker) - MCP_AUTO_OPEN_ENABLED=false # Don't attempt to open browser in container - DANGEROUSLY_OMIT_AUTH=true # Skip proxy token (safe: local dev only) depends_on: gateway: condition: service_healthy deploy: resources: limits: cpus: '1' memory: 512M reservations: cpus: '0.25' memory: 128M profiles: ["inspector"]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/IBM/mcp-context-forge'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

docker-compose.yml•100 KiB