.env.example•12.4 kB
# EX MCP Server - Example Environment Variables
# Copy this file to .env and fill in the values you plan to use.
# At least ONE of the following is required: KIMI_API_KEY, GLM_API_KEY, OPENROUTER_API_KEY, or CUSTOM_API_URL
# Moonshot (Kimi) - Recommended
# Remote server settings (optional for network access)
MCP_REMOTE_HOST=0.0.0.0
MCP_REMOTE_PORT=7800
MCP_BASE_PATH=/mcp
# Set a strong shared secret for remote access auth
MCP_AUTH_TOKEN=
# CORS origins: comma-separated list or *
CORS_ORIGINS=*
KIMI_API_KEY=your_kimi_api_key_here
# ZhipuAI GLM
GLM_API_KEY=your_glm_api_key_here
# OpenRouter (optional catch-all provider)
OPENROUTER_API_KEY=your_openrouter_api_key_here
# Optional OpenRouter metadata & allowlist
OPENROUTER_ALLOWED_MODELS=o3-mini,pro,flash,o4-mini,o3
OPENROUTER_REFERER=https://github.com/BeehiveInnovations/zen-mcp-server
OPENROUTER_TITLE=Zen MCP Server
# Note: This deployment does not use OpenRouter by default; leave OPENROUTER_API_KEY empty unless you opt in.
# Tests: Set OPENROUTER_TESTS_ENABLED=true to enable optional OpenRouter-related tests.
# Custom OpenAI-compatible endpoint (local/self-hosted)
# Example: http://localhost:11434/v1 for Ollama (with OpenAI-compatible plugin)
CUSTOM_API_URL=
# Optional: default model name for custom endpoint, e.g., llama3.2
CUSTOM_MODEL_NAME=
# Provider gating (comma-separated). Allowed: KIMI, GLM, OPENROUTER, CUSTOM, GOOGLE, OPENAI, XAI, DIAL
# Example: disable GOOGLE, OPENAI, XAI (kept disabled in this deployment by default)
DISABLED_PROVIDERS=GOOGLE,OPENAI,XAI,DIAL
# Optional settings
# Enable startup config validation (recommended)
ENABLE_CONFIG_VALIDATOR=true
# Prefer free-tier models when available (if supported by providers)
PREFER_FREE_TIER=false
# Enable metadata-informed model selection (experimental)
ENABLE_METADATA_SELECTION=false
# Direct API Keys for Kimi and GLM
KIMI_API_KEY=sk-ixnmvSRDJwVKppxYHMFo51DU8UENg3JDh7GLJOoEScwDgRyf
GLM_API_KEY=3a72b841ece84ba0b899802cb410546a.rZRBFza6DUtYQ8BR
# Custom API endpoints configuration
KIMI_API_URL=https://api.moonshot.ai/v1
GLM_API_URL=https://api.z.ai/api/paas/v4
# Server Configuration
DEFAULT_MODEL=auto
LOCALE=en-AU
# AU date example override (default remains %Y-%m-%d)
DATE_FORMAT=%d/%m/%Y
INJECT_CURRENT_DATE=true
ENABLE_INTELLIGENT_SELECTION=true
ENABLE_CONSENSUS_AUTOMODE=true
THINK_ROUTING_ENABLED=true
MIN_CONSENSUS_MODELS=2
MAX_CONSENSUS_MODELS=3
# Provider-native web browsing (env-gated)
# Kimi requires an OpenAI function tool named "web_search" with a string "query" parameter
KIMI_ENABLE_INTERNET_TOOL=false
# Example minimal schema (override with your own):
# KIMI_INTERNET_TOOL_SPEC={"type":"function","function":{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string"}},"required":["query"]}}}
# Legacy flag (kept for backward compatibility)
KIMI_ENABLE_INTERNET_SEARCH=true
# GLM requires tools=[{"type":"web_search","web_search":{}}] when browsing is enabled
GLM_ENABLE_WEB_BROWSING=true
# EX unified web search controls
EX_WEBSEARCH_ENABLED=true
EX_WEBSEARCH_MAX_RESULTS=5
EX_WEBSEARCH_LOCALE=en-US
EX_WEBSEARCH_SAFETY_LEVEL=standard
EX_WEBSEARCH_QUERY_TIMEOUT_MS=8000
EX_WEBSEARCH_TOTAL_TIMEOUT_MS=15000
EX_WEBSEARCH_CACHE_TTL_S=300
# Optional domain controls (comma-separated)
EX_WEBSEARCH_ALLOWED_DOMAINS=
EX_WEBSEARCH_BLOCKED_DOMAINS=
# Tool-call visibility and logging
EX_TOOLCALL_LOG_LEVEL=info
# Set a path to enable JSONL logging; empty disables file writes
EX_TOOLCALL_LOG_PATH=.logs/toolcalls.jsonl
# Redact query params and PII-like data in tool args (recommended)
EX_TOOLCALL_REDACTION=true
# Default websearch opt-in behavior
EX_WEBSEARCH_DEFAULT_ON=true
# If Auggie UI seems slow listing tools (triage only):
DISABLE_TOOL_ANNOTATIONS=true
SLIM_SCHEMAS=true
DEFAULT_THINKING_MODE_THINKDEEP=high
# --- Core runtime ---
# LOG_LEVEL=INFO (overridden by DEBUG below)
LOG_FORMAT=json
# --- Timeouts and watchdog (stability) ---
# Default HTTP timeout (seconds) for provider calls
EX_HTTP_TIMEOUT_SECONDS=60
# Max duration (seconds) for a single tool execution before timeout
EX_TOOL_TIMEOUT_SECONDS=120
# Heartbeat interval (seconds) to emit progress during long calls
EX_HEARTBEAT_SECONDS=10
# Watchdog warn and error thresholds (seconds) without completion
EX_WATCHDOG_WARN_SECONDS=30
EX_WATCHDOG_ERROR_SECONDS=90
# Mirror boundary tool-call start/end to JSONL (requires EX_TOOLCALL_LOG_PATH)
EX_MIRROR_ACTIVITY_TO_JSONL=false
# --- WebSocket daemon/shim timeouts (align client with server) ---
# Per-call timeout enforced by WS daemon (seconds)
EXAI_WS_CALL_TIMEOUT=180
# Shim RPC timeout (seconds). The shim extends by ACK timeout from daemon, then allows an extra grace window.
EXAI_SHIM_RPC_TIMEOUT=150
# Extra time granted after daemon ACKs a call (seconds)
EXAI_SHIM_ACK_GRACE_SECS=120
# Workflow step timeout (seconds)
WORKFLOW_STEP_TIMEOUT_SECS=120
# Expert analysis timeout (seconds)
EXPERT_ANALYSIS_TIMEOUT_SECS=90
# Heartbeat interval for expert analysis (seconds)
EXPERT_HEARTBEAT_INTERVAL_SECS=5
# Expert fallback behavior during final analysis (disable to avoid concurrent provider fan-out)
EXPERT_FALLBACK_ENABLED=false
# Time before considering fallback (kept for documentation; ignored when disabled)
EXPERT_FALLBACK_AFTER_SECS=12
# --- Expert-phase server-side mitigation toggles (feature-gated; default OFF) ---
# 1) Optional high-frequency keepalive specific to expert-phase (milliseconds). If set (>0),
# overrides EXPERT_HEARTBEAT_INTERVAL_SECS during expert analysis only.
# Use 1000–2000 to satisfy idle-sensitive clients that cancel <10s.
# Empty or 0 disables this override.
EXAI_WS_EXPERT_KEEPALIVE_MS=
# 2) Soft deadline for expert analysis in seconds. If >0 and elapsed >= this value,
# the server returns a safe partial result early (status=analysis_partial) so the
# client doesn't cancel the call. The final detailed analysis can be resumed via
# continuation on the next call.
# Set to 0 to disable; recommended 60–150 when clients have shorter timeouts.
EXAI_WS_EXPERT_SOFT_DEADLINE_SECS=0
# 3) Micro-step expert mode. When true, the server returns a quick "draft" partial
# result for the expert phase (status=analysis_partial, microstep=draft) and defers
# the heavy validation to a follow-up call. This avoids long blocking expert calls
# on clients with strict timeouts. Default false for safety.
EXAI_WS_EXPERT_MICROSTEP=false
# Daemon progress heartbeat interval (seconds) - keep <=10s to satisfy idle-sensitive clients
EXAI_WS_PROGRESS_INTERVAL_SECS=5.0
# Disable semantic coalescing for specific tools (comma-separated, case-insensitive)
# Optional: inflight duplicate TTL and capacity retry hint
EXAI_WS_INFLIGHT_TTL_SECS=180
EXAI_WS_RETRY_AFTER_SECS=1
# When set, the daemon appends a UUID to the semantic call_key to avoid coalescing parallel calls.
EXAI_WS_DISABLE_COALESCE_FOR_TOOLS=kimi_chat_with_tools,analyze,codereview,testgen,debug,thinkdeep
# --- Intelligent selection (quality vs speed) ---
# Enable quality/speed tiering by tool category (default true)
# ENABLE_INTELLIGENT_SELECTION=true (already set above)
KIMI_QUALITY_MODEL=kimi-k2-0711-preview
GLM_QUALITY_MODEL=glm-4.5
KIMI_SPEED_MODEL=kimi-k2-turbo-preview
GLM_SPEED_MODEL=glm-4.5-flash
# --- Metadata-aware ordering (opt-in; non-destructive) ---
ENABLE_METADATA_SELECTION=true
MODEL_METADATA_JSON=./docs/ex-mcp/MODEL_METADATA.example.json
# --- UX and validation toggles ---
VALIDATE_DEFAULT_MODEL=true
SUGGEST_TOOL_ALIASES=true
# Real-time progress visibility (recommended)
STREAM_PROGRESS=true
ACTIVITY_LOG=true
LOG_LEVEL=INFO
LOG_FORMAT=plain
ENABLE_CONFIG_VALIDATOR=true
ACTIVITY_LOG=true
# --- Tool surface & diagnostics (optional) ---
LEAN_MODE=false
# LEAN_TOOLS=thinkdeep,analyze,consensus,version
# DISABLED_TOOLS=
# DISABLED_PROVIDERS=
# DIAGNOSTICS=false (overridden by DIAGNOSTICS=true below)
# --- Metrics (optional; requires prometheus_client in your env) ---
PROMETHEUS_ENABLED=false
METRICS_PORT=9108
# --- Auggie integration (optional) ---
AUGGIE_CLI=true
ALLOW_AUGGIE=true
# AUGGIE_CONFIG=./ex-mcp-server/examples/auggie-config.example.json
# Optional: Model restrictions if you want to limit usage
# KIMI_ALLOWED_MODELS=kimi-k2,kimi-k2-turbo,kimi-k2-thinking
# GLM_ALLOWED_MODELS=glm-4.5,glm-4.5-air,glm-4.5-flash
KIMI_ALLOWED_MODELS=kimi-k2-0905-preview,kimi-k2-0905,kimi-k2-0711-preview,moonshot-v1-8k,moonshot-v1-32k
GLM_ALLOWED_MODELS=glm-4.5-flash,glm-4.5-air,glm-4.5
# Conversation settings
CONVERSATION_TIMEOUT_HOURS=3
MAX_CONVERSATION_TURNS=20
# Tool selection (optional - comment out to enable all tools)
# DISABLED_TOOLS=
# Cost-aware and free-tier preferences (optional)
COST_AWARE_ROUTING_ENABLED=true
# Relative cost ordering for intra-provider sorting (lower = cheaper). Adjust as you learn.
MODEL_COSTS_JSON={"glm-4.5-flash":0.0,"glm-4.5-air":1.1,"glm-4.5":2.2,"glm-4.5-airx":4.5,"glm-4.5v":1.8,"glm-4.5-x":8.9,"kimi-k2-turbo-preview":2.0,"kimi-k2-0711-preview":2.5,"kimi-k2-thinking":2.5}
MAX_COST_PER_REQUEST=5.0
FREE_TIER_PREFERENCE_ENABLED=true
FREE_MODEL_LIST=glm-4.5-flash
# Health/observability (optional)
HEALTH_CHECKS_ENABLED=true
HEALTH_LOG_ONLY=true
LOG_LEVEL=DEBUG
# CIRCUIT_BREAKER_ENABLED=false
# LOG_FORMAT=json
LOG_MAX_SIZE=10MB
#AUGGIE CLI CREDENTIALS
ACCESSTOKEN="51c1ebe0192b4c79baba056652792cab3664fcf356821f18b9c9d7135369861b"
TENANTURL="https://d18.api.augmentcode.com/"
SCOPE="read write"
# Enable diagnostics-only self-check tool for validation
DIAGNOSTICS=true
# --- EX full functionality additions ---
INJECT_CURRENT_DATE=true
CACHE_BACKEND=memory
CACHE_TTL_SEC=10800
CACHE_MAX_ITEMS=1000
ENABLE_SMART_WEBSEARCH=false
# --- MCP client/server identity & discovery ---
# ID used by MCP clients to identify this server (VS Code/Claude can override)
MCP_SERVER_ID=ex-server
# Friendly name advertised to clients
MCP_SERVER_NAME=exai
# Optional: explicit path to .env for bootstrap
ENV_FILE=
# Gate stderr breadcrumbs to avoid noisy strict clients (set true only for debugging)
STDERR_BREADCRUMBS=false
# Alternative SSE base paths accepted by remote server (comma-separated)
MCP_ALT_PATHS=/sse,/v1/sse
# --- Claude client defaults & tool visibility ---
# Allow/deny lists apply ONLY when the client is Claude/Anthropic
CLAUDE_TOOL_ALLOWLIST=thinkdeep,chat,version,listmodels
CLAUDE_TOOL_DENYLIST=
# Claude default behavior
CLAUDE_DEFAULTS_USE_WEBSEARCH=true
CLAUDE_DEFAULT_THINKING_MODE=medium
CLAUDE_MAX_WORKFLOW_STEPS=3
# --- Workflow auto-continue (server orchestrated) ---
EX_AUTOCONTINUE_WORKFLOWS=true
EX_AUTOCONTINUE_ONLY_THINKDEEP=true
EX_AUTOCONTINUE_MAX_STEPS=3
# --- Path safety & convenience ---
# Allow relative paths from clients; they will be resolved within the project root
EX_ALLOW_RELATIVE_PATHS=true
# --- Long-context preference & defaults ---
EX_PREFER_LONG_CONTEXT=true
# Default Kimi model when locale/content indicates CJK and Kimi is present
KIMI_DEFAULT_MODEL=kimi-k2-0711-preview
# --- Custom provider auth (used with CUSTOM_API_URL when required) ---
CUSTOM_API_KEY=
# --- Policy toggles ---
POLICY_EXACT_TOOLSET=true
# --- Additional provider keys (optional; disabled by policy by default) ---
GEMINI_API_KEY=
OPENAI_API_KEY=
XAI_API_KEY=
DIAL_API_KEY=
# --- GLM Agent API ---
# Base URL for GLM Agent endpoints (agent chat, async result, conversation)
GLM_AGENT_API_URL=https://api.z.ai/api/v1
# Default Deep Thinking mode for GLM chat family (enabled|disabled)
GLM_THINKING_MODE=enabled
# --- Kimi Tool-Use (OpenAI-compatible) ---
# Enable automatic injection of an internet/browsing tool via tool_calls
KIMI_ENABLE_INTERNET_TOOL=false
# JSON for a default tool spec injected when KIMI_ENABLE_INTERNET_TOOL=true
# Example: {"type": "web_search"} or a full OpenAI tools spec
KIMI_INTERNET_TOOL_SPEC=
# Default tool_choice ('auto'|'none'|'required' or provider-specific structure)
KIMI_DEFAULT_TOOL_CHOICE=auto
# Advisory client-side max upload size checks (MB); provider enforces hard limits
KIMI_FILES_MAX_SIZE_MB=
GLM_FILES_MAX_SIZE_MB=