Crawl4AI+SearXNG MCP Server

docker-compose.example.yml•9.75 KiB

# Docker Compose configuration for Crawl4AI MCP Server # Note: The 'version' field is intentionally omitted as it's obsolete in modern Docker Compose # See: https://docs.docker.com/compose/compose-file/ name: crawl4ai_mcp # Optional: Explicitly set project name # ============================================ # Networks # ============================================ networks: crawl4ai-network: driver: bridge # ============================================ # Volumes # ============================================ volumes: qdrant-data: neo4j-data: valkey-data: searxng-cache: neo4j-logs: # ============================================ # Services # ============================================ services: # ------------------------------------------ # Core Services (always required) # ------------------------------------------ mcp-crawl4ai: image: ${REGISTRY:-docker.io}/krashnicov/crawl4ai-mcp:${VERSION:-latest} build: context: . dockerfile: Dockerfile target: production cache_from: - ${REGISTRY:-docker.io}/krashnicov/crawl4ai-mcp:buildcache container_name: mcp-crawl4ai profiles: ["core", "full", "dev"] restart: unless-stopped ports: - "${PORT:-8051}:8051" environment: - TRANSPORT=${TRANSPORT:-http} - HOST=0.0.0.0 - PORT=${PORT:-8051} - SEARXNG_URL=${SEARXNG_URL:-http://searxng:8080} - QDRANT_URL=http://qdrant:6333 - NEO4J_URI=bolt://neo4j:7687 - VALKEY_URL=redis://valkey:6379 - VECTOR_DATABASE=${VECTOR_DATABASE:-qdrant} - OPENAI_API_KEY=${OPENAI_API_KEY} - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - USE_RERANKING=${USE_RERANKING:-true} - ENHANCED_CONTEXT=${ENHANCED_CONTEXT:-true} - USE_AGENTIC_RAG=${USE_AGENTIC_RAG:-false} # Agentic Search Configuration - AGENTIC_SEARCH_ENABLED=${AGENTIC_SEARCH_ENABLED:-false} - AGENTIC_SEARCH_COMPLETENESS_THRESHOLD=${AGENTIC_SEARCH_COMPLETENESS_THRESHOLD:-0.95} - AGENTIC_SEARCH_MAX_ITERATIONS=${AGENTIC_SEARCH_MAX_ITERATIONS:-3} - AGENTIC_SEARCH_MAX_URLS_PER_ITERATION=${AGENTIC_SEARCH_MAX_URLS_PER_ITERATION:-5} - AGENTIC_SEARCH_MAX_PAGES_PER_ITERATION=${AGENTIC_SEARCH_MAX_PAGES_PER_ITERATION:-50} - AGENTIC_SEARCH_URL_SCORE_THRESHOLD=${AGENTIC_SEARCH_URL_SCORE_THRESHOLD:-0.7} - AGENTIC_SEARCH_USE_SEARCH_HINTS=${AGENTIC_SEARCH_USE_SEARCH_HINTS:-false} - AGENTIC_SEARCH_ENABLE_URL_FILTERING=${AGENTIC_SEARCH_ENABLE_URL_FILTERING:-true} - AGENTIC_SEARCH_MAX_URLS_TO_RANK=${AGENTIC_SEARCH_MAX_URLS_TO_RANK:-20} - AGENTIC_SEARCH_LLM_TEMPERATURE=${AGENTIC_SEARCH_LLM_TEMPERATURE:-0.3} - AGENTIC_SEARCH_MAX_QDRANT_RESULTS=${AGENTIC_SEARCH_MAX_QDRANT_RESULTS:-10} # Test Configuration - TEST_MODEL_CHOICE=${TEST_MODEL_CHOICE:-gpt-4.1-nano} - TEST_OPENAI_API_KEY=${TEST_OPENAI_API_KEY:-} - ALLOW_OPENAI_TESTS=${ALLOW_OPENAI_TESTS:-false} env_file: .env volumes: - ./data:/app/data:rw - ./logs:/app/logs:rw - ./analysis_scripts:/app/analysis_scripts:rw - /tmp:/app/tmp_scripts:rw networks: - crawl4ai-network depends_on: qdrant: condition: service_healthy valkey: condition: service_healthy searxng: condition: service_started healthcheck: test: ["CMD", "python", "-c", "import socket; s = socket.socket(); s.settimeout(1); s.connect(('localhost', ${PORT:-8051})); s.close()"] interval: 30s timeout: 3s start_period: 10s retries: 3 user: "1000:1000" # Non-root user security_opt: - no-new-privileges:true cap_drop: - ALL cap_add: - NET_BIND_SERVICE logging: driver: "json-file" options: max-size: "10m" max-file: "3" develop: watch: - action: sync path: ./src target: /app/src - action: sync path: ./knowledge_graphs target: /app/knowledge_graphs - action: rebuild path: ./pyproject.toml # ------------------------------------------ # Vector Database # ------------------------------------------ qdrant: image: qdrant/qdrant:v1.15.1 container_name: qdrant profiles: ["core", "full", "dev"] restart: unless-stopped ports: - "${QDRANT_PORT:-6333}:6333" - "${QDRANT_GRPC_PORT:-6334}:6334" volumes: - qdrant-data:/qdrant/storage:rw # - ./docker/qdrant/config.yaml:/qdrant/config/config.yaml:ro environment: - QDRANT__SERVICE__API_KEY=${QDRANT_API_KEY} - QDRANT__SERVICE__GRPC_PORT=6334 - QDRANT__LOG_LEVEL=INFO - QDRANT__SERVICE__ENABLE_TLS=false networks: - crawl4ai-network healthcheck: test: ["CMD", "bash", "-c", "exec 3<>/dev/tcp/127.0.0.1/6333 && echo -e 'GET /readyz HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n' >&3 && grep -q 'HTTP/1.1 200' <&3"] interval: 30s timeout: 5s retries: 3 # user: "1000:1000" # Would be nice to implement this but it causes all sorts of permissions issues. security_opt: - no-new-privileges:true logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ------------------------------------------ # Cache Layer # ------------------------------------------ valkey: image: valkey/valkey:8-alpine container_name: valkey profiles: ["core", "full", "dev"] restart: unless-stopped command: > valkey-server --maxmemory 256mb --maxmemory-policy allkeys-lru --save 60 1 --save 300 10 --save 900 100 ports: - "${VALKEY_PORT:-6379}:6379" volumes: - valkey-data:/data:rw networks: - crawl4ai-network healthcheck: test: ["CMD", "valkey-cli", "ping"] interval: 30s timeout: 5s retries: 3 user: "999:999" security_opt: - no-new-privileges:true logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ------------------------------------------ # Search Engine # ------------------------------------------ searxng: image: searxng/searxng:latest container_name: searxng profiles: ["core", "full", "dev"] restart: unless-stopped ports: - "${SEARXNG_PORT:-8080}:8080" volumes: - ./docker/searxng:/etc/searxng:ro - searxng-cache:/var/cache/searxng:rw environment: - SEARXNG_BASE_URL=${SEARXNG_BASE_URL:-http://localhost:8080/} - SEARXNG_SECRET_KEY=${SEARXNG_SECRET_KEY:-ultrasecretkey} networks: - crawl4ai-network healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/healthz"] interval: 30s timeout: 5s retries: 3 cap_drop: - ALL cap_add: - CHOWN - SETGID - SETUID - DAC_OVERRIDE security_opt: - no-new-privileges:true logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ------------------------------------------ # Graph Database (Advanced Features) # ------------------------------------------ neo4j: image: neo4j:5.25-community container_name: neo4j profiles: ["core", "full", "dev"] # Only in full/dev profiles restart: unless-stopped ports: - "${NEO4J_HTTP_PORT:-7474}:7474" - "${NEO4J_BOLT_PORT:-7687}:7687" environment: - NEO4J_AUTH=${NEO4J_USERNAME:-neo4j}/${NEO4J_PASSWORD:-password} - NEO4J_server_memory_heap_initial__size=512M - NEO4J_server_memory_heap_max__size=1G - NEO4J_server_memory_pagecache_size=512M - NEO4J_dbms_security_procedures_unrestricted=apoc.* - NEO4J_dbms_security_procedures_allowlist=apoc.* volumes: - neo4j-data:/data:rw - ./docker/neo4j/import:/import:ro - neo4j-logs:/logs:rw networks: - crawl4ai-network healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:7474"] interval: 30s timeout: 10s start_period: 60s retries: 5 user: "7474:7474" security_opt: - no-new-privileges:true logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ------------------------------------------ # Development Tools (dev profile only) # ------------------------------------------ mailhog: image: mailhog/mailhog:latest container_name: mailhog profiles: ["dev"] # Only in development restart: "no" # Don't restart development tools ports: - "1025:1025" # SMTP - "8025:8025" # Web UI networks: - crawl4ai-network security_opt: - no-new-privileges:true logging: driver: "json-file" options: max-size: "10m" max-file: "3" # Jupyter notebook for development jupyter: image: jupyter/datascience-notebook:2024-07-29 # Pinned version for stability container_name: jupyter profiles: ["dev"] restart: "no" # Don't restart development tools ports: - "8888:8888" environment: - JUPYTER_ENABLE_LAB=yes - JUPYTER_TOKEN=${JUPYTER_TOKEN:-crawl4ai} volumes: - ./notebooks:/home/jovyan/work:rw - ./data:/home/jovyan/data:ro networks: - crawl4ai-network security_opt: - no-new-privileges:true logging: driver: "json-file" options: max-size: "10m" max-file: "3" # ============================================ # Profile Usage: # - core: Minimal setup (MCP, Qdrant, Valkey, SearXNG) # - full: Complete setup (core + Neo4j) # - dev: Development setup (full + Mailhog, Jupyter) # # Examples: # docker compose --profile core up -d # Production minimal # docker compose --profile full up -d # Production complete # docker compose --profile dev up -d # Development environment # ============================================

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-enthusiasts/crawl4ai-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

docker-compose.example.yml•9.75 KiB