.env.example•9.65 kB
# Open Search MCP Configuration
# Copy this file to .env and fill in your API keys (all optional)
# =============================================================================
# AI MODEL CONFIGURATION (Deep Research Features)
# =============================================================================
# Ollama Configuration (Local AI Models)
OLLAMA_HOST=http://localhost:11434
DEFAULT_AI_MODEL=llama3.2
AI_TEMPERATURE=0.7
AI_MAX_TOKENS=4096
AI_TIMEOUT=30000
# Hugging Face Fallback (Free API)
# Get your API key: https://huggingface.co/settings/tokens
# HUGGINGFACE_API_KEY=your_huggingface_api_key_here
# AI Processing Options
ENABLE_AI_OPTIMIZATION=true
ENABLE_SEMANTIC_SEARCH=true
ENABLE_QUERY_EXPANSION=true
ENABLE_CONTENT_ANALYSIS=true
# =============================================================================
# DEEP RESEARCH CONFIGURATION
# =============================================================================
# Search Scope (Claude 4 / Gemini 2.5 Pro level)
MAX_SOURCES_PER_SEARCH=200
MAX_CONCURRENT_SEARCHES=50
SEARCH_TIMEOUT=30000
ENABLE_MULTI_ROUND_SEARCH=true
# Interactive Deep Research Engine
ENABLE_INTERACTIVE_RESEARCH=true
ENABLE_THINKING_VISUALIZATION=true
ENABLE_USER_COLLABORATION=true
ENABLE_EVIDENCE_COLLECTION=true
RESEARCH_STEP_TIMEOUT=60000
MAX_THINKING_NODES=1000
MERMAID_DIAGRAM_MAX_SIZE=50000
# Content Processing
ENABLE_PDF_PROCESSING=true
ENABLE_OCR=true
OCR_LANGUAGES=eng,chi_sim,chi_tra
MAX_PDF_SIZE_MB=50
# Quality Control
MINIMUM_RELEVANCE_SCORE=0.6
MINIMUM_QUALITY_SCORE=0.7
ENABLE_FACT_CHECKING=true
ENABLE_CREDIBILITY_ASSESSMENT=true
# Vector Database (for semantic search)
ENABLE_VECTOR_CACHE=true
VECTOR_CACHE_SIZE=10000
CHROMA_HOST=localhost
CHROMA_PORT=8000
# =============================================================================
# SEARCH ENGINE APIs (Optional - for backup/enhanced search)
# =============================================================================
# Google Custom Search API (100 free queries per day)
# Get your API key: https://developers.google.com/custom-search/v1/introduction
# GOOGLE_API_KEY=your_google_api_key_here
# GOOGLE_SEARCH_ENGINE_ID=your_search_engine_id_here
# Bing Search API (Free tier available)
# Get your API key: https://www.microsoft.com/en-us/bing/apis/bing-web-search-api
# BING_API_KEY=your_bing_api_key_here
# =============================================================================
# SOCIAL & PROFESSIONAL APIS (Optional - for enhanced features)
# =============================================================================
# GitHub API Token (Higher rate limits, 5000 requests/hour)
# Generate token: https://github.com/settings/tokens
# GITHUB_TOKEN=your_github_token_here
# =============================================================================
# LOGGING & DEBUGGING
# =============================================================================
# Log level: debug, info, warn, error
LOG_LEVEL=info
# Enable detailed request logging
DEBUG_REQUESTS=false
# Enable performance monitoring
ENABLE_METRICS=true
# =============================================================================
# CACHING CONFIGURATION
# =============================================================================
# Cache time-to-live in seconds
CACHE_TTL=3600
# Maximum memory cache size
MAX_MEMORY_CACHE_SIZE=100MB
# Maximum file cache size
MAX_FILE_CACHE_SIZE=1GB
# Cache directory (relative to project root)
CACHE_DIR=./cache
# =============================================================================
# CRAWLER CONFIGURATION
# =============================================================================
# Request timeout in milliseconds
CRAWLER_TIMEOUT=30000
# Maximum retries for failed requests
CRAWLER_RETRIES=3
# Delay between requests in milliseconds
CRAWLER_DELAY=1000
# Enable stealth mode (anti-detection)
CRAWLER_STEALTH=true
# Custom User-Agent (leave empty for random rotation)
# CUSTOM_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
# =============================================================================
# RATE LIMITING
# =============================================================================
# Global rate limit (requests per minute)
RATE_LIMIT_GLOBAL=60
# Per-engine rate limit (requests per minute)
RATE_LIMIT_PER_ENGINE=30
# Per-user rate limit (requests per minute)
RATE_LIMIT_PER_USER=10
# =============================================================================
# SEARCH ENGINE PREFERENCES
# =============================================================================
# Primary search engine: duckduckgo, google, bing
PRIMARY_SEARCH_ENGINE=duckduckgo
# Enable search engine fallback
ENABLE_FALLBACK=true
# Maximum results per search
MAX_SEARCH_RESULTS=20
# Default number of results
DEFAULT_SEARCH_RESULTS=5
# =============================================================================
# CONTENT EXTRACTION
# =============================================================================
# Maximum content length in characters
MAX_CONTENT_LENGTH=10000
# Default content length
DEFAULT_CONTENT_LENGTH=5000
# Enable content summarization
ENABLE_SUMMARIZATION=true
# Enable image extraction
ENABLE_IMAGE_EXTRACTION=false
# =============================================================================
# ACADEMIC SEARCH CONFIGURATION
# =============================================================================
# Maximum academic results per search
MAX_ACADEMIC_RESULTS=50
# Default academic results
DEFAULT_ACADEMIC_RESULTS=10
# Enable full-text extraction for papers
ENABLE_FULLTEXT_EXTRACTION=true
# =============================================================================
# PROXY CONFIGURATION (Optional)
# =============================================================================
# HTTP proxy for web requests
# HTTP_PROXY=http://proxy.example.com:8080
# HTTPS proxy for secure requests
# HTTPS_PROXY=https://proxy.example.com:8080
# Proxy authentication
# PROXY_USERNAME=your_username
# PROXY_PASSWORD=your_password
# =============================================================================
# SECURITY SETTINGS
# =============================================================================
# Enable request sanitization
ENABLE_REQUEST_SANITIZATION=true
# Enable response filtering
ENABLE_RESPONSE_FILTERING=true
# Maximum request size in bytes
MAX_REQUEST_SIZE=1048576
# Allowed domains for crawling (comma-separated, empty = all allowed)
# ALLOWED_DOMAINS=example.com,trusted-site.org
# Blocked domains for crawling (comma-separated)
# BLOCKED_DOMAINS=malicious-site.com,spam-site.org
# =============================================================================
# OPEN SEARCH MCP v2.0 CONFIGURATION
# =============================================================================
# v2.0 Phase 1: Multi-API Aggregation & Intelligent Scheduling
# =============================================================================
# API AGGREGATOR CONFIGURATION
# =============================================================================
# Load balancing strategy (round-robin, weighted, least-connections, health-based)
V2_LOAD_BALANCING_STRATEGY=health-based
# Health check interval in milliseconds
V2_HEALTH_CHECK_INTERVAL=30000
# Failover threshold (consecutive failures before marking unhealthy)
V2_FAILOVER_THRESHOLD=3
# Maximum fallback attempts
V2_MAX_FALLBACK_ATTEMPTS=2
# Fallback delay in milliseconds
V2_FALLBACK_DELAY=1000
# =============================================================================
# SEARX CLUSTER CONFIGURATION
# =============================================================================
# Searx cluster nodes (comma-separated URLs)
V2_SEARX_NODES=http://localhost:8080,http://localhost:8081,http://localhost:8082
# Searx cluster load balancing strategy (round-robin, least-connections, response-time)
V2_SEARX_LOAD_BALANCING_STRATEGY=response-time
# Searx health check configuration
V2_SEARX_HEALTH_CHECK_INTERVAL=30000
V2_SEARX_HEALTH_CHECK_TIMEOUT=5000
V2_SEARX_HEALTH_CHECK_RETRIES=3
# Searx cluster secret key (CHANGE THIS IN PRODUCTION!)
V2_SEARXNG_SECRET=your-very-long-secret-key-here-please-change-this-to-something-secure
# =============================================================================
# INTELLIGENT CACHE CONFIGURATION
# =============================================================================
# Multi-layer cache configuration
V2_CACHE_ENABLED=true
V2_CACHE_TTL=3600
V2_CACHE_MAX_SIZE=1000
# Local cache configuration
V2_LOCAL_CACHE_ENABLED=true
V2_LOCAL_CACHE_MAX_KEYS=1000
V2_LOCAL_CACHE_TTL=300
# Redis cache configuration (optional)
V2_REDIS_ENABLED=false
V2_REDIS_HOST=localhost
V2_REDIS_PORT=6379
V2_REDIS_PASSWORD=
V2_REDIS_DB=0
V2_REDIS_TTL=7200
# =============================================================================
# MONITORING & METRICS CONFIGURATION
# =============================================================================
# Monitoring configuration
V2_MONITORING_ENABLED=true
V2_MONITORING_PORT=9090
V2_MONITORING_PATH=/metrics
V2_MONITORING_COLLECT_INTERVAL=10000
V2_MONITORING_RETENTION_DAYS=7
# Grafana configuration
V2_GRAFANA_PASSWORD=admin
# =============================================================================
# v2.0 DEVELOPMENT CONFIGURATION
# =============================================================================
# Enable v2.0 features
V2_FEATURES_ENABLED=true
# Enable v2.0 debug mode
V2_DEBUG_MODE=true
# Enable v2.0 verbose logging
V2_VERBOSE_LOGGING=true
# Enable v2.0 performance tracking
V2_PERFORMANCE_TRACKING=true