docker-compose.yaml•6.2 kB
version: "3.8"
services:
# Redis for task queuing and caching
redis:
image: redis:7-alpine
container_name: mcp-redis
ports:
- "6379:6379"
volumes:
- redis-data:/data
command: redis-server --appendonly yes
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
networks:
- mcp-network
# ClickHouse for telemetry and analytics
clickhouse:
image: clickhouse/clickhouse-server:latest
container_name: mcp-clickhouse
ports:
- "8123:8123" # HTTP interface
- "9000:9000" # Native protocol
volumes:
- clickhouse-data:/var/lib/clickhouse
- ./scripts/init_clickhouse.sql:/docker-entrypoint-initdb.d/init.sql:ro
environment:
CLICKHOUSE_DB: mcp
CLICKHOUSE_USER: mcp_user
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-mcp_secure_pass}
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
ulimits:
nofile:
soft: 262144
hard: 262144
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 15s
timeout: 5s
retries: 5
networks:
- mcp-network
# AI Envoy Gateway - Official Envoy with AI Gateway extensions
# https://github.com/envoyproxy/ai-gateway
envoy:
image: envoyproxy/envoy:v1.28-latest
container_name: mcp-envoy
ports:
- "10000:10000" # Admin interface
- "8080:8080" # Proxy port
- "9901:9901" # Prometheus metrics
volumes:
- ./envoy.yaml:/etc/envoy/envoy.yaml:ro
- envoy-logs:/var/log/envoy
command: ["-c", "/etc/envoy/envoy.yaml", "--log-level", "info"]
depends_on:
mcp-api:
condition: service_healthy
clickhouse:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:10000/ready"]
interval: 10s
timeout: 5s
retries: 3
networks:
- mcp-network
# MCP API Service
mcp-api:
build:
context: .
dockerfile: Dockerfile
container_name: mcp-api
ports:
- "8000:8000"
environment:
# Redis
REDIS_URL: redis://redis:6379/0
# ClickHouse
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 9000
CLICKHOUSE_DB: mcp
CLICKHOUSE_USER: mcp_user
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-mcp_secure_pass}
# API Keys (set in .env file)
RUNPOD_API_KEY: ${RUNPOD_API_KEY}
VASTAI_API_KEY: ${VASTAI_API_KEY}
# Envoy Gateway
ENVOY_GATEWAY_URL: http://envoy:8080
# Telemetry
PHOENIX_ENDPOINT: ${PHOENIX_ENDPOINT:-}
OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318}
# Service Config
LOG_LEVEL: ${LOG_LEVEL:-INFO}
WORKERS: ${WORKERS:-4}
# SkyPilot specific
SKY_LOGS: /app/logs/skypilot
SKY_USER_FILE_CACHE: /app/.sky/cache
volumes:
- ./src:/app/src:ro
- ./scripts:/app/scripts:ro
- skypilot-data:/root/.sky # SkyPilot state and config
- skypilot-cache:/app/.sky/cache # SkyPilot file cache
- skypilot-logs:/app/logs/skypilot # SkyPilot logs
depends_on:
redis:
condition: service_healthy
clickhouse:
condition: service_healthy
networks:
- mcp-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
restart: unless-stopped
# Heartbeat worker (runs heartbeat.py script periodically)
heartbeat-worker:
build:
context: .
dockerfile: Dockerfile
container_name: mcp-heartbeat
command: python scripts/heartbeat.py
environment:
REDIS_URL: redis://redis:6379/0
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 9000
CLICKHOUSE_DB: mcp
CLICKHOUSE_USER: mcp_user
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-mcp_secure_pass}
HEARTBEAT_INTERVAL: ${HEARTBEAT_INTERVAL:-60}
RUNPOD_API_KEY: ${RUNPOD_API_KEY}
VASTAI_API_KEY: ${VASTAI_API_KEY}
volumes:
- ./src:/app/src:ro
- ./scripts:/app/scripts:ro
- skypilot-data:/root/.sky
depends_on:
- redis
- clickhouse
- mcp-api
networks:
- mcp-network
restart: unless-stopped
# Optional: OpenTelemetry Collector for observability
otel-collector:
image: otel/opentelemetry-collector:latest
container_name: mcp-otel-collector
command: ["--config=/etc/otel-collector-config.yaml"]
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
- "8888:8888" # Prometheus metrics
networks:
- mcp-network
profiles:
- observability
# Optional: Prometheus for metrics
prometheus:
image: prom/prometheus:latest
container_name: mcp-prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
networks:
- mcp-network
profiles:
- observability
# Optional: Grafana for visualization
grafana:
image: grafana/grafana:latest
container_name: mcp-grafana
ports:
- "3000:3000"
environment:
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-admin}
GF_INSTALL_PLUGINS: grafana-clickhouse-datasource
volumes:
- grafana-data:/var/lib/grafana
networks:
- mcp-network
profiles:
- observability
# Optional: ClickHouse UI for debugging
clickhouse-ui:
image: spoonest/clickhouse-tabix-web-client:latest
container_name: mcp-clickhouse-ui
ports:
- "8124:80"
networks:
- mcp-network
profiles:
- debug
volumes:
redis-data:
driver: local
clickhouse-data:
driver: local
skypilot-data:
driver: local
skypilot-cache:
driver: local
skypilot-logs:
driver: local
envoy-logs:
driver: local
prometheus-data:
driver: local
grafana-data:
driver: local
networks:
mcp-network:
driver: bridge