# Makefile for MCP Evaluation Server
.PHONY: help build run test clean lint format install dev-install
# Variables
IMAGE_NAME ?= mcp-eval-server
IMAGE_TAG ?= latest
CONTAINER_NAME ?= mcp-eval-server
PYTHON ?= python3
# Help target
help: ## Show this help message
@echo "๐ฏ MCP Evaluation Server - Development Commands"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
@echo ""
@echo "๐ Quick Start:"
@echo " make dev Start MCP server (stdio) with connection info"
@echo " make serve-http Start HTTP server (JSON-RPC over HTTP)"
@echo " make example Run evaluation example"
@echo " make mcp-info Show MCP connection guide"
@echo " make http-info Show HTTP server connection guide"
@echo ""
@echo "๐ Available Commands:"
@echo ""
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " %-20s %s\n", $$1, $$2}' $(MAKEFILE_LIST)
@echo ""
@echo "๐ For detailed usage, see README.md or run 'make mcp-info'"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
# Development setup
install: ## Install package in development mode
$(PYTHON) -m pip install -e .
dev-install: ## Install with development dependencies
$(PYTHON) -m pip install -e ".[dev]"
# Code quality
format: ## Format code with black and isort
black .
isort .
lint: ## Run linting checks
flake8 mcp_eval_server tests
mypy mcp_eval_server
# Testing
test: ## Run all tests
pytest tests/ -v --cov=mcp_eval_server --cov-report=term-missing
test-fast: ## Run tests without coverage
pytest tests/ -v
# Container operations
build: ## Build container image
podman build -f Containerfile -t $(IMAGE_NAME):$(IMAGE_TAG) .
build-docker: ## Build container image with Docker
docker build -f Containerfile -t $(IMAGE_NAME):$(IMAGE_TAG) .
run: ## Run container with environment file
podman run --rm -it \
--name $(CONTAINER_NAME) \
--env-file .env \
-v eval-cache:/app/data/cache \
-v eval-results:/app/data/results \
$(IMAGE_NAME):$(IMAGE_TAG)
run-docker: ## Run container with Docker
docker run --rm -it \
--name $(CONTAINER_NAME) \
--env-file .env \
-v eval-cache:/app/data/cache \
-v eval-results:/app/data/results \
$(IMAGE_NAME):$(IMAGE_TAG)
compose-up: ## Start services with docker-compose
docker-compose up -d
compose-down: ## Stop services with docker-compose
docker-compose down
compose-logs: ## View container logs
docker-compose logs -f
# Development server
dev: ## Run development server locally
@echo "๐ Starting MCP Evaluation Server..."
@echo "๐ก Protocol: stdio (Model Context Protocol)"
@echo "๐ง Mode: Development"
@echo "๐ Available Tools: 29 evaluation tools"
@echo ""
@echo "๐ก How to connect:"
@echo " 1. MCP Client (Claude Desktop, etc.):"
@echo " - Server command: python -m mcp_eval_server.server"
@echo " - Working directory: $(PWD)"
@echo " 2. Direct testing:"
@echo " - Run: make test-mcp"
@echo " - Or: make example"
@echo ""
@echo "๐ API Keys (optional for LLM judges):"
@echo " export OPENAI_API_KEY='sk-...'"
@echo " export AZURE_OPENAI_KEY='...'"
@echo ""
@echo "โก Starting server (Ctrl+C to stop)..."
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
$(PYTHON) -m mcp_eval_server.server
# Testing with MCP client
test-mcp: ## Test MCP server functionality
@echo "๐งช Testing MCP server with list_tools..."
@echo ""
echo '{"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}' | $(PYTHON) -m mcp_eval_server.server
# Cleanup
clean: ## Clean up containers and volumes
podman rm -f $(CONTAINER_NAME) 2>/dev/null || true
podman rmi -f $(IMAGE_NAME):$(IMAGE_TAG) 2>/dev/null || true
clean-docker: ## Clean up with Docker
docker rm -f $(CONTAINER_NAME) 2>/dev/null || true
docker rmi -f $(IMAGE_NAME):$(IMAGE_TAG) 2>/dev/null || true
clean-volumes: ## Remove data volumes
podman volume rm eval-cache eval-results 2>/dev/null || true
# Security scanning
scan: ## Scan container for vulnerabilities
trivy image $(IMAGE_NAME):$(IMAGE_TAG)
# Example usage
example: ## Run example evaluation
@echo "๐ฏ Running example evaluation with rule-based judge..."
@echo "๐ Evaluating: 'Paris is the capital of France.'"
@echo "๐ Criteria: Factual accuracy (1-5 scale)"
@echo ""
@$(PYTHON) -c "import asyncio, json; from mcp_eval_server.tools.judge_tools import JudgeTools; import asyncio; exec('async def main():\n jt = JudgeTools()\n result = await jt.evaluate_response(response=\"Paris is the capital of France.\", criteria=[{\"name\": \"accuracy\", \"description\": \"Factual accuracy\", \"scale\": \"1-5\", \"weight\": 1.0}], rubric={\"criteria\": [], \"scale_description\": {\"1\": \"Wrong\", \"5\": \"Correct\"}}, judge_model=\"rule-based\")\n print(json.dumps(result, indent=2))\nasyncio.run(main())')"
@echo ""
@echo "โ
Example completed! This shows rule-based evaluation without API keys."
# Documentation
docs: ## Generate documentation
mkdocs build
docs-serve: ## Serve documentation locally
mkdocs serve
# Release
release: test lint build ## Run tests, lint, and build for release
@echo "Release build completed successfully"
# Check environment
check-env: ## Check required environment variables
@echo "๐ Checking environment configuration..."
@echo ""
@if [ -z "$$OPENAI_API_KEY" ] && [ -z "$$AZURE_OPENAI_API_KEY" ]; then \
echo "โ ๏ธ WARNING: No API keys found for LLM judges"; \
echo "๐ To use OpenAI judges: export OPENAI_API_KEY='sk-...'"; \
echo "๐ To use Azure judges: export AZURE_OPENAI_API_KEY='...'"; \
echo ""; \
echo "โ
Rule-based judge available (no API key needed)"; \
else \
echo "โ
API keys configured for LLM judges"; \
fi
@echo ""
@echo "๐ Available evaluation capabilities:"
@echo " โข 4 Judge tools (evaluate, compare, rank, reference)"
@echo " โข 4 Prompt tools (clarity, consistency, completeness, relevance)"
@echo " โข 4 Agent tools (tool usage, task completion, reasoning, benchmarks)"
@echo " โข 3 Quality tools (factuality, coherence, toxicity)"
@echo " โข 3 Workflow tools (suites, execution, comparison)"
@echo " โข 2 Calibration tools (agreement, optimization)"
@echo " โข 9 Server tools (management, statistics, health)"
@echo ""
@echo "โ
Environment check complete"
validate-models: ## Run comprehensive model validation and connectivity tests
@echo "๐ Running model validation and connectivity tests..."
$(PYTHON) validate_models.py
test-all-providers: ## Test all LLM providers (OpenAI, Azure, Anthropic, Bedrock, Gemini, Watsonx, OLLAMA)
@echo "๐งช Testing all LLM provider implementations..."
$(PYTHON) test_all_providers.py
validate-config: ## Validate custom configuration files
@echo "๐ Validating configuration files..."
@if [ -n "$$MCP_EVAL_MODELS_CONFIG" ]; then \
echo "๐ Validating custom models config: $$MCP_EVAL_MODELS_CONFIG"; \
$(PYTHON) -c "import yaml; yaml.safe_load(open('$$MCP_EVAL_MODELS_CONFIG')); print('โ
Configuration syntax valid')"; \
else \
echo "๐ Validating default models config"; \
$(PYTHON) -c "import yaml; yaml.safe_load(open('mcp_eval_server/config/models.yaml')); print('โ
Configuration syntax valid')"; \
fi
copy-config: ## Copy default configuration for customization
@echo "๐ Copying default configuration files for customization..."
@mkdir -p ./custom-config
@cp mcp_eval_server/config/models.yaml ./custom-config/models.yaml
@cp mcp_eval_server/config/rubrics.yaml ./custom-config/rubrics.yaml
@cp mcp_eval_server/config/benchmarks.yaml ./custom-config/benchmarks.yaml
@cp mcp_eval_server/config/judge_prompts.yaml ./custom-config/judge_prompts.yaml
@echo "โ
Configuration files copied to ./custom-config/"
@echo "๐ก To use custom config: export MCP_EVAL_MODELS_CONFIG='./custom-config/models.yaml'"
show-config: ## Show current configuration status
@echo "๐ง Current Configuration Status:"
@echo " Models config: $${MCP_EVAL_MODELS_CONFIG:-default (mcp_eval_server/config/models.yaml)}"
@echo " Default judge: $${DEFAULT_JUDGE_MODEL:-gpt-4o-mini}"
@echo " Config dir: $${MCP_EVAL_CONFIG_DIR:-default (mcp_eval_server/config/)}"
@echo ""
@echo "๐ Environment Variables:"
@echo " OPENAI_API_KEY: $${OPENAI_API_KEY:+โ
configured}$${OPENAI_API_KEY:-โ not set}"
@echo " AZURE_OPENAI_API_KEY: $${AZURE_OPENAI_API_KEY:+โ
configured}$${AZURE_OPENAI_API_KEY:-โ not set}"
@echo " ANTHROPIC_API_KEY: $${ANTHROPIC_API_KEY:+โ
configured}$${ANTHROPIC_API_KEY:-โ not set}"
@echo " OLLAMA_BASE_URL: $${OLLAMA_BASE_URL:-โ not set}"
mcp-info: ## Show MCP connection information
@echo "๐ก MCP Evaluation Server Connection Guide"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
@echo ""
@echo "๐ง Server Command:"
@echo " python -m mcp_eval_server.server"
@echo ""
@echo "๐ Working Directory:"
@echo " $(PWD)"
@echo ""
@echo "๐ก Protocol: stdio (Model Context Protocol)"
@echo "๐ Transport: Standard input/output (no HTTP port)"
@echo ""
@echo "๐ MCP Client Configuration:"
@echo " {"
@echo " \"command\": \"python\","
@echo " \"args\": [\"-m\", \"mcp_eval_server.server\"],"
@echo " \"cwd\": \"$(PWD)\""
@echo " }"
@echo ""
@echo "๐ ๏ธ Available Tools: 29 evaluation tools"
@echo " โข judge.evaluate_response"
@echo " โข judge.pairwise_comparison"
@echo " โข prompt.evaluate_clarity"
@echo " โข agent.evaluate_tool_use"
@echo " โข quality.assess_toxicity"
@echo " โข workflow.create_evaluation_suite"
@echo " โข calibration.test_judge_agreement"
@echo " โข server.get_available_judges"
@echo " โข ...and 21 more tools"
@echo ""
@echo "๐ Optional API Keys:"
@echo " export OPENAI_API_KEY='sk-...' # For GPT-4, GPT-3.5 judges"
@echo " export AZURE_OPENAI_API_KEY='...' # For Azure OpenAI judges"
@echo ""
@echo "โก Quick Test:"
@echo " make example # Run evaluation example"
@echo " make test-mcp # Test MCP protocol"
@echo ""
@echo "๐ Documentation:"
@echo " See README.md for comprehensive usage examples"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
# HTTP/REST API Server Configuration
HTTP_PORT ?= 9000
REST_PORT ?= 8080
HTTP_HOST ?= localhost
BEARER_TOKEN ?= eval-server-token-123
# HTTP Server via mcpgateway.translate (MCP over HTTP)
serve-http: ## Run as HTTP server (MCP over HTTP with SSE)
@echo "๐ Starting MCP Evaluation Server as HTTP service..."
@echo "๐ก Protocol: HTTP with Server-Sent Events (SSE)"
@echo "๐ Authentication: None (open access)"
@echo "๐ URL: http://$(HTTP_HOST):$(HTTP_PORT)"
@echo ""
@echo "๐ HTTP Endpoints:"
@echo " GET / # Server info and available tools"
@echo " POST / # MCP JSON-RPC endpoint"
@echo " GET /sse # Server-sent events stream"
@echo ""
@echo "๐ Example usage:"
@echo " curl http://$(HTTP_HOST):$(HTTP_PORT)/"
@echo ""
@echo "๐ก MCP JSON-RPC call:"
@echo " curl -X POST -H 'Content-Type: application/json' \\"
@echo " -d '{\"jsonrpc\": \"2.0\", \"id\": 1, \"method\": \"tools/list\", \"params\": {}}' \\"
@echo " http://$(HTTP_HOST):$(HTTP_PORT)/"
@echo ""
@echo "โก Starting HTTP server (Ctrl+C to stop)..."
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
python3 -m mcpgateway.translate \
--stdio "python3 -m mcp_eval_server.server" \
--port $(HTTP_PORT) \
--host $(HTTP_HOST) \
--expose-sse
# REST API Server (Direct FastAPI)
serve-rest: ## Run native REST API server with FastAPI
@echo "๐ Starting MCP Evaluation Server as REST API service..."
@echo "๐ก Protocol: HTTP REST API"
@echo "๐ Authentication: None (open access)"
@echo "๐ URL: http://$(HTTP_HOST):$(REST_PORT)"
@echo "๐ Interactive API docs: http://$(HTTP_HOST):$(REST_PORT)/docs"
@echo "๐ OpenAPI schema: http://$(HTTP_HOST):$(REST_PORT)/openapi.json"
@echo ""
@echo "๐ REST API Endpoints:"
@echo " GET / # Server info and health"
@echo " GET /health # Health check"
@echo " GET /tools # List all tools by category"
@echo " GET /tools/categories # Get tool categories"
@echo " GET /tools/{category} # List tools in category"
@echo ""
@echo " ๐ Judge Tools:"
@echo " POST /judge/evaluate # Evaluate single response"
@echo " POST /judge/compare # Pairwise comparison"
@echo " POST /judge/rank # Rank multiple responses"
@echo " POST /judge/reference # Evaluate vs reference"
@echo ""
@echo " ๐ฏ Quality & Analysis Tools:"
@echo " POST /quality/factuality # Check factual accuracy"
@echo " POST /quality/coherence # Analyze coherence"
@echo " POST /quality/toxicity # Detect toxicity"
@echo " POST /prompt/clarity # Evaluate prompt clarity"
@echo " POST /prompt/consistency # Test prompt consistency"
@echo ""
@echo " ๐ค Agent & RAG Tools:"
@echo " POST /agent/tool-use # Evaluate tool usage"
@echo " POST /agent/task-completion # Measure task success"
@echo " POST /rag/retrieval-relevance # Assess retrieval quality"
@echo " POST /rag/answer-groundedness # Verify answer grounding"
@echo ""
@echo " ๐ก๏ธ Safety & Privacy Tools:"
@echo " POST /safety/harmful-content # Detect harmful content"
@echo " POST /privacy/pii-detection # Detect PII exposure"
@echo " POST /bias/demographic # Check demographic bias"
@echo " POST /robustness/adversarial # Test adversarial inputs"
@echo ""
@echo " โก Performance & Workflow:"
@echo " POST /performance/latency # Measure response latency"
@echo " POST /workflow/create-suite # Create evaluation suite"
@echo " POST /workflow/run-evaluation # Execute evaluation suite"
@echo ""
@echo "โก Starting REST API server (Ctrl+C to stop)..."
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
$(PYTHON) -m mcp_eval_server.rest_server --port $(REST_PORT) --host $(HTTP_HOST)
serve-rest-public: ## Run REST API server accessible from any IP
@echo "๐ Starting MCP Evaluation Server as PUBLIC REST API service..."
@echo "โ ๏ธ WARNING: Server will be accessible from ANY IP address!"
@echo "๐ก Protocol: HTTP REST API"
@echo "๐ Authentication: None (open access)"
@echo "๐ URL: http://0.0.0.0:$(REST_PORT) (accessible from any IP)"
@echo "๐ Interactive API docs: http://0.0.0.0:$(REST_PORT)/docs"
@echo ""
@echo "โก Starting PUBLIC REST API server (Ctrl+C to stop)..."
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
$(PYTHON) -m mcp_eval_server.rest_server --port $(REST_PORT) --host 0.0.0.0
# Hybrid Server (runs both MCP and REST)
serve-hybrid: ## Show guide for running both MCP and REST simultaneously
@echo "๐ฏ MCP Evaluation Server - Dual Protocol Guide"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
@echo ""
@echo "๐ก To run both MCP and REST simultaneously, use two terminals:"
@echo ""
@echo "๐ฅ๏ธ Terminal 1 - MCP Server (stdio):"
@echo " make dev"
@echo " # or: python -m mcp_eval_server.server"
@echo ""
@echo "๐ฅ๏ธ Terminal 2 - REST API Server:"
@echo " make serve-rest"
@echo " # or: python -m mcp_eval_server.rest_server"
@echo ""
@echo "๐ก Access Methods:"
@echo " ๐ MCP Protocol: Configure in Claude Desktop or MCP clients"
@echo " ๐ REST API: http://localhost:$(REST_PORT)"
@echo " ๐ API Docs: http://localhost:$(REST_PORT)/docs"
@echo ""
@echo "๐งช Testing:"
@echo " make test-mcp # Test MCP protocol"
@echo " make test-rest # Test REST API"
@echo " make test-all-apis # Test both protocols"
@echo ""
@echo "๐ก Both servers share the same evaluation tools and judges!"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
serve-dual: ## Start both MCP and REST servers (requires two terminals)
@echo "๐ Starting MCP Evaluation Server in Dual Mode..."
@echo "๐ก This will start the REST API server"
@echo "๐ก To start MCP server simultaneously, run in another terminal:"
@echo " make dev"
@echo ""
@echo "โก Starting REST API server (Ctrl+C to stop)..."
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
$(PYTHON) -m mcp_eval_server.rest_server --port $(REST_PORT) --host $(HTTP_HOST)
serve-http-public: ## Run HTTP server accessible from any IP
@echo "๐ Starting MCP Evaluation Server as PUBLIC HTTP service..."
@echo "โ ๏ธ WARNING: Server will be accessible from ANY IP address!"
@echo "๐ก Protocol: HTTP with Server-Sent Events (SSE)"
@echo "๐ Authentication: None (open access)"
@echo "๐ URL: http://0.0.0.0:$(HTTP_PORT) (accessible from any IP)"
@echo ""
@echo "โก Starting PUBLIC HTTP server (Ctrl+C to stop)..."
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
python3 -m mcpgateway.translate \
--stdio "python3 -m mcp_eval_server.server" \
--port $(HTTP_PORT) \
--host 0.0.0.0 \
--expose-sse
test-http: ## Test HTTP server endpoints (MCP over HTTP)
@echo "๐งช Testing HTTP server endpoints..."
@echo "๐ Server URL: http://$(HTTP_HOST):$(HTTP_PORT)"
@echo ""
@echo "1๏ธโฃ Testing server info..."
@curl -s "http://$(HTTP_HOST):$(HTTP_PORT)/" | head -10 || echo "โ Server info failed"
@echo ""
@echo ""
@echo "2๏ธโฃ Testing tools list via JSON-RPC..."
@curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}' \
"http://$(HTTP_HOST):$(HTTP_PORT)/" | head -20 || echo "โ Tools list failed"
@echo ""
@echo ""
@echo "3๏ธโฃ Testing evaluation via JSON-RPC..."
@curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"jsonrpc": "2.0", "id": 2, "method": "tools/call", "params": {"name": "judge.evaluate_response", "arguments": {"response": "Test response", "criteria": [{"name": "quality", "description": "Overall quality", "scale": "1-5", "weight": 1.0}], "rubric": {"criteria": [], "scale_description": {"1": "Poor", "5": "Good"}}, "judge_model": "rule-based"}}}' \
"http://$(HTTP_HOST):$(HTTP_PORT)/" || echo "โ Evaluation failed"
@echo ""
@echo "โ
HTTP testing complete!"
test-rest: ## Test REST API endpoints
@echo "๐งช Testing REST API endpoints..."
@echo "๐ Server URL: http://$(HTTP_HOST):$(REST_PORT)"
@echo ""
@echo "1๏ธโฃ Testing server info and health..."
@curl -s "http://$(HTTP_HOST):$(REST_PORT)/" | jq . || echo "โ Server info failed"
@echo ""
@curl -s "http://$(HTTP_HOST):$(REST_PORT)/health" | jq . || echo "โ Health check failed"
@echo ""
@echo "2๏ธโฃ Testing tools discovery..."
@curl -s "http://$(HTTP_HOST):$(REST_PORT)/tools/categories" | jq . || echo "โ Categories failed"
@echo ""
@curl -s "http://$(HTTP_HOST):$(REST_PORT)/tools" | jq '.judge | keys' || echo "โ Tools list failed"
@echo ""
@echo "3๏ธโฃ Testing judge evaluation..."
@curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"response": "Paris is the capital of France", "criteria": [{"name": "accuracy", "description": "Factual accuracy", "scale": "1-5", "weight": 1.0}], "rubric": {"criteria": [], "scale_description": {"1": "Wrong", "5": "Correct"}}, "judge_model": "rule-based"}' \
"http://$(HTTP_HOST):$(REST_PORT)/judge/evaluate" | jq .overall_score || echo "โ Judge evaluation failed"
@echo ""
@echo "4๏ธโฃ Testing quality assessment..."
@curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"content": "This is a test message", "toxicity_categories": ["profanity", "hate_speech"], "sensitivity_level": "moderate", "judge_model": "rule-based"}' \
"http://$(HTTP_HOST):$(REST_PORT)/quality/toxicity" | jq .toxicity_detected || echo "โ Quality assessment failed"
@echo ""
@echo "5๏ธโฃ Testing prompt evaluation..."
@curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"prompt_text": "Write a summary of the following text", "target_model": "general", "judge_model": "rule-based"}' \
"http://$(HTTP_HOST):$(REST_PORT)/prompt/clarity" | jq .overall_clarity_score || echo "โ Prompt evaluation failed"
@echo ""
@echo "โ
REST API testing complete!"
test-all-apis: ## Test both HTTP and REST API endpoints
@echo "๐งช Testing all API endpoints..."
@echo ""
@echo "โโโโโโ HTTP/MCP API Testing โโโโโโ"
@$(MAKE) test-http || true
@echo ""
@echo "โโโโโโ REST API Testing โโโโโโ"
@$(MAKE) test-rest || true
@echo ""
@echo "โ
All API testing complete!"
generate-token: ## Generate a secure bearer token
@echo "๐ Generating secure bearer token..."
@TOKEN=$$(python3 -c "import secrets, string; print(''.join(secrets.choice(string.ascii_letters + string.digits + '-_') for _ in range(32)))"); \
echo "๐ Generated token: $$TOKEN"; \
echo ""; \
echo "๐ก To use this token:"; \
echo " export BEARER_TOKEN=$$TOKEN"; \
echo " make serve-http BEARER_TOKEN=$$TOKEN"; \
echo ""; \
echo "๐ Keep this token secure and don't commit it to version control!"
http-info: ## Show HTTP server connection information
@echo "๐ก MCP Evaluation Server - HTTP Mode Connection Guide"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
@echo ""
@echo "๐ HTTP Server Configuration:"
@echo " Host: $(HTTP_HOST)"
@echo " Port: $(HTTP_PORT)"
@echo " URL: http://$(HTTP_HOST):$(HTTP_PORT)"
@echo ""
@echo "๐ Authentication: None (open access for now)"
@echo ""
@echo "๐ก Available Endpoints:"
@echo " GET / # Server info and tool discovery"
@echo " POST / # MCP JSON-RPC endpoint (all tools)"
@echo " GET /sse # Server-sent events stream"
@echo ""
@echo "๐งช Testing Commands:"
@echo " make serve-http # Start local HTTP server"
@echo " make serve-http-public # Start server accessible from any IP"
@echo " make test-http # Test HTTP endpoints"
@echo ""
@echo "๐ง Custom Configuration:"
@echo " make serve-http HTTP_PORT=8080 # Custom port"
@echo " make serve-http HTTP_HOST=0.0.0.0 # Public access"
@echo ""
@echo "๐ก Example JSON-RPC Requests:"
@echo ""
@echo " # List tools"
@echo " curl -X POST -H 'Content-Type: application/json' \\"
@echo " -d '{\"jsonrpc\": \"2.0\", \"id\": 1, \"method\": \"tools/list\", \"params\": {}}' \\"
@echo " http://$(HTTP_HOST):$(HTTP_PORT)/"
@echo ""
@echo " # Evaluate response"
@echo " curl -X POST -H 'Content-Type: application/json' \\"
@echo " -d '{\"jsonrpc\": \"2.0\", \"id\": 2, \"method\": \"tools/call\",' \\"
@echo " -d ' \"params\": {\"name\": \"judge.evaluate_response\",' \\"
@echo " -d ' \"arguments\": {\"response\": \"Test\", \"criteria\": [...]}}}' \\"
@echo " http://$(HTTP_HOST):$(HTTP_PORT)/"
@echo ""
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
rest-info: ## Show REST API connection information
@echo "๐ก MCP Evaluation Server - REST API Connection Guide"
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
@echo ""
@echo "๐ REST API Server Configuration:"
@echo " Host: $(HTTP_HOST)"
@echo " Port: $(REST_PORT)"
@echo " URL: http://$(HTTP_HOST):$(REST_PORT)"
@echo " Docs: http://$(HTTP_HOST):$(REST_PORT)/docs"
@echo ""
@echo "๐ Authentication: None (open access)"
@echo ""
@echo "๐ก Core Endpoints:"
@echo " GET / # Server info and health"
@echo " GET /health # Health check"
@echo " GET /tools # List all tools by category"
@echo " GET /tools/categories # Get tool categories"
@echo " GET /tools/{category} # List tools in specific category"
@echo ""
@echo "๐ Judge & Evaluation Endpoints:"
@echo " POST /judge/evaluate # Single response evaluation"
@echo " POST /judge/compare # Pairwise comparison"
@echo " POST /judge/rank # Rank multiple responses"
@echo " POST /judge/reference # Evaluate vs reference"
@echo ""
@echo "๐ฏ Quality & Analysis Endpoints:"
@echo " POST /quality/factuality # Check factual accuracy"
@echo " POST /quality/coherence # Analyze logical coherence"
@echo " POST /quality/toxicity # Detect harmful content"
@echo " POST /prompt/clarity # Evaluate prompt clarity"
@echo " POST /prompt/consistency # Test prompt consistency"
@echo " POST /prompt/completeness # Measure completeness"
@echo " POST /prompt/relevance # Assess relevance"
@echo ""
@echo "๐ค Agent & RAG Endpoints:"
@echo " POST /agent/tool-use # Evaluate tool usage"
@echo " POST /agent/task-completion # Measure task success"
@echo " POST /agent/reasoning # Analyze reasoning quality"
@echo " POST /agent/benchmark # Run agent benchmarks"
@echo " POST /rag/retrieval-relevance # Assess retrieval quality"
@echo " POST /rag/context-utilization # Check context usage"
@echo " POST /rag/answer-groundedness # Verify answer grounding"
@echo " POST /rag/hallucination-detection # Detect hallucinations"
@echo ""
@echo "๐ก๏ธ Safety & Privacy Endpoints:"
@echo " POST /safety/harmful-content # Detect harmful content"
@echo " POST /safety/instruction-following # Check instruction adherence"
@echo " POST /safety/refusal-appropriateness # Evaluate refusal behavior"
@echo " POST /safety/value-alignment # Assess value alignment"
@echo " POST /privacy/pii-detection # Detect PII exposure"
@echo " POST /privacy/data-minimization # Assess data minimization"
@echo " POST /privacy/consent-compliance # Check consent compliance"
@echo " POST /bias/demographic # Check demographic bias"
@echo " POST /bias/representation-fairness # Measure representation fairness"
@echo " POST /bias/cultural-sensitivity # Assess cultural sensitivity"
@echo " POST /robustness/adversarial # Test adversarial inputs"
@echo " POST /robustness/input-sensitivity # Measure input sensitivity"
@echo " POST /robustness/prompt-injection # Test injection resistance"
@echo ""
@echo "๐ Multilingual & Performance Endpoints:"
@echo " POST /multilingual/translation-quality # Assess translation quality"
@echo " POST /multilingual/cross-lingual-consistency # Check consistency across languages"
@echo " POST /multilingual/cultural-adaptation # Evaluate cultural adaptation"
@echo " POST /multilingual/language-mixing # Detect language mixing"
@echo " POST /performance/latency # Measure response latency"
@echo " POST /performance/computational-efficiency # Assess efficiency"
@echo " POST /performance/throughput-scaling # Test throughput scaling"
@echo " POST /performance/memory-usage # Monitor memory usage"
@echo ""
@echo "โก Workflow & Calibration Endpoints:"
@echo " POST /workflow/create-suite # Create evaluation suite"
@echo " POST /workflow/run-evaluation # Execute evaluation suite"
@echo " POST /workflow/compare-evaluations # Compare evaluation results"
@echo " POST /calibration/judge-agreement # Test judge agreement"
@echo " POST /calibration/optimize-rubrics # Optimize evaluation rubrics"
@echo ""
@echo "๐งช Testing Commands:"
@echo " make serve-rest # Start local REST API server"
@echo " make serve-rest-public # Start server accessible from any IP"
@echo " make test-rest # Test REST API endpoints"
@echo " make test-all-apis # Test both HTTP and REST APIs"
@echo ""
@echo "๐ง Custom Configuration:"
@echo " make serve-rest REST_PORT=3000 # Custom port"
@echo " make serve-rest HTTP_HOST=0.0.0.0 # Public access"
@echo ""
@echo "๐ก Example REST API Calls:"
@echo ""
@echo " # Evaluate response quality"
@echo " curl -X POST -H 'Content-Type: application/json' \\"
@echo " -d '{\"response\": \"Paris is the capital of France\",' \\"
@echo " -d ' \"criteria\": [{\"name\": \"accuracy\", \"description\": \"Factual accuracy\", \"scale\": \"1-5\", \"weight\": 1.0}],' \\"
@echo " -d ' \"rubric\": {\"criteria\": [], \"scale_description\": {\"1\": \"Wrong\", \"5\": \"Correct\"}},' \\"
@echo " -d ' \"judge_model\": \"rule-based\"}' \\"
@echo " http://$(HTTP_HOST):$(REST_PORT)/judge/evaluate"
@echo ""
@echo " # Check content toxicity"
@echo " curl -X POST -H 'Content-Type: application/json' \\"
@echo " -d '{\"content\": \"This is a test message\",' \\"
@echo " -d ' \"toxicity_categories\": [\"profanity\", \"hate_speech\"],' \\"
@echo " -d ' \"sensitivity_level\": \"moderate\"}' \\"
@echo " http://$(HTTP_HOST):$(REST_PORT)/quality/toxicity"
@echo ""
@echo " # Evaluate prompt clarity"
@echo " curl -X POST -H 'Content-Type: application/json' \\"
@echo " -d '{\"prompt_text\": \"Write a summary of the following text\",' \\"
@echo " -d ' \"target_model\": \"general\"}' \\"
@echo " http://$(HTTP_HOST):$(REST_PORT)/prompt/clarity"
@echo ""
@echo "๐ Interactive Documentation:"
@echo " Visit http://$(HTTP_HOST):$(REST_PORT)/docs for Swagger UI"
@echo " OpenAPI schema: http://$(HTTP_HOST):$(REST_PORT)/openapi.json"
@echo ""
@echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"