# MCP Evaluation Server Environment Configuration
# Copy this file to .env and configure your settings
# ═══════════════════════════════════════════════════════════════════════════════
# LLM Provider Configuration
# ═══════════════════════════════════════════════════════════════════════════════
# OpenAI Configuration
OPENAI_API_KEY=sk-your-openai-api-key-here
# OPENAI_ORGANIZATION=org-your-organization-id # Optional
# OPENAI_BASE_URL=https://api.openai.com/v1 # Optional custom endpoint
# Azure OpenAI Configuration
# AZURE_OPENAI_API_KEY=your-azure-openai-key
# AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
# AZURE_OPENAI_API_VERSION=2024-02-15-preview
# AZURE_DEPLOYMENT_NAME=your-gpt-4-deployment
# Anthropic Configuration
# ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key
# AWS Bedrock Configuration
# AWS_ACCESS_KEY_ID=AKIA...
# AWS_SECRET_ACCESS_KEY=...
# AWS_REGION=us-east-1
# OLLAMA Configuration
# OLLAMA_BASE_URL=http://localhost:11434
# Google Gemini Configuration
# GOOGLE_API_KEY=your-google-api-key
# IBM Watsonx.ai Configuration
# WATSONX_API_KEY=your-watsonx-api-key
# WATSONX_PROJECT_ID=your-project-id
# WATSONX_URL=https://us-south.ml.cloud.ibm.com
# Default Judge Model Selection
DEFAULT_JUDGE_MODEL=gpt-4o-mini
# Alternative options: claude-4-1-bedrock, gemini-1-5-pro, gemini-1-5-flash, gpt-4, gpt-3.5-turbo,
# gpt-4-turbo, claude-3-sonnet, claude-3-haiku, claude-3-opus, gpt-4-azure,
# claude-3-sonnet-bedrock, llama-3-1-70b-watsonx, granite-3-0-8b-watsonx,
# mixtral-8x7b-watsonx, llama3-8b, mistral-7b, rule-based
# ═══════════════════════════════════════════════════════════════════════════════
# Custom Configuration Paths
# ═══════════════════════════════════════════════════════════════════════════════
# Custom model configuration (defaults to built-in config/models.yaml)
# MCP_EVAL_MODELS_CONFIG=/path/to/custom/models.yaml
# Custom configuration directory (for all config files)
# MCP_EVAL_CONFIG_DIR=/path/to/custom/config/
# Custom rubrics, benchmarks, prompts (future enhancement)
# MCP_EVAL_RUBRICS_CONFIG=/path/to/custom/rubrics.yaml
# MCP_EVAL_BENCHMARKS_CONFIG=/path/to/custom/benchmarks.yaml
# MCP_EVAL_PROMPTS_CONFIG=/path/to/custom/prompts.yaml
# Cache Configuration
MCP_EVAL_CACHE_DIR=/app/data/cache
MCP_EVAL_CACHE_TTL=3600 # Cache TTL in seconds (1 hour)
MCP_EVAL_CACHE_SIZE=1000 # Maximum cached items
# Database Configuration
MCP_EVAL_RESULTS_DB=/app/data/results/evaluation_results.db
# Logging Configuration
LOG_LEVEL=INFO
PYTHONUNBUFFERED=1
# Performance Configuration
MAX_CONCURRENT_EVALUATIONS=3
EVALUATION_TIMEOUT=300 # seconds
# Development Configuration
# DEVELOPMENT_MODE=true # Enable for development features
# Model-specific settings
GPT4_TEMPERATURE=0.3
GPT4_MAX_TOKENS=2000
GPT35_TEMPERATURE=0.2
GPT35_MAX_TOKENS=2000
# Evaluation defaults
DEFAULT_CONSISTENCY_RUNS=3
DEFAULT_TEMPERATURE_RANGE=0.1,0.5,0.9
DEFAULT_RELEVANCE_THRESHOLD=0.7
DEFAULT_CONFIDENCE_THRESHOLD=0.8
# Security settings
RATE_LIMIT_REQUESTS=100 # per hour
RATE_LIMIT_TOKENS=50000 # per hour
# ═══════════════════════════════════════════════════════════════════════════════
# Provider Setup Examples & Installation Notes
# ═══════════════════════════════════════════════════════════════════════════════
# 🔧 Installation for Additional Providers:
#
# For Anthropic support:
# pip install anthropic
#
# For AWS Bedrock support:
# pip install boto3 botocore
#
# For OLLAMA support:
# pip install aiohttp
# # Also ensure OLLAMA is running: ollama serve
#
# For Google Gemini support:
# pip install google-generativeai
#
# For IBM Watsonx.ai support:
# pip install ibm-watsonx-ai
#
# For all providers:
# pip install -e ".[all]"
# 📝 Provider Configuration Examples:
#
# OpenAI (Default - always available):
# OPENAI_API_KEY=sk-proj-...
#
# Azure OpenAI (Enterprise):
# AZURE_OPENAI_API_KEY=...
# AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
# AZURE_DEPLOYMENT_NAME=gpt-4-deployment
#
# Anthropic (Claude models):
# ANTHROPIC_API_KEY=sk-ant-api...
#
# AWS Bedrock (Claude via AWS):
# AWS_ACCESS_KEY_ID=AKIA...
# AWS_SECRET_ACCESS_KEY=...
# AWS_REGION=us-east-1
#
# Google Gemini (Google AI Studio):
# GOOGLE_API_KEY=your-google-api-key
#
# IBM Watsonx.ai (Enterprise AI):
# WATSONX_API_KEY=your-watsonx-api-key
# WATSONX_PROJECT_ID=your-project-id
# WATSONX_URL=https://us-south.ml.cloud.ibm.com
#
# OLLAMA (Local/Self-hosted):
# OLLAMA_BASE_URL=http://localhost:11434
# # Ensure models are pulled: ollama pull llama3:8b
# 🧪 Validation:
# Run: make validate-models
# This will test connectivity and show which judges are available