Code-Index-MCP

run_comprehensive_performance_test.py•13.9 KiB

#!/usr/bin/env python3 """ Comprehensive Performance Test Orchestrator for MCP vs Native Retrieval This script orchestrates the full testing process including: 1. Repository validation 2. Query generation 3. Test execution using Task agents 4. Result collection and analysis """ import os import sys import json import time import subprocess from pathlib import Path from typing import Dict, List, Tuple, Any from datetime import datetime import argparse import random # Import test suite components sys.path.insert(0, str(Path(__file__).parent.parent)) from scripts.standardized_query_test_suite import StandardizedQueryTestSuite, QueryCategory # Test repositories that passed validation VALID_TEST_REPOS = { "go_gin": { "path": "/workspaces/Code-Index-MCP/test_indexes/go_gin", "language": "go", "file_count": 93, "main_class": "Engine", "main_function": "New", "config_variable": "defaultTrustedProxies" }, "python_django": { "path": "/workspaces/Code-Index-MCP/test_indexes/python_django", "language": "python", "file_count": 5497, "main_class": "Model", "main_function": "save", "config_variable": "SECRET_KEY" }, "javascript_react": { "path": "/workspaces/Code-Index-MCP/test_indexes/javascript_react", "language": "javascript", "file_count": 2405, "main_class": "Component", "main_function": "render", "config_variable": "PropTypes" }, "rust_tokio": { "path": "/workspaces/Code-Index-MCP/test_indexes/rust_tokio", "language": "rust", "file_count": 759, "main_class": "Runtime", "main_function": "spawn", "config_variable": "TOKIO_WORKER_THREADS" } } class PerformanceTestOrchestrator: """Orchestrates the full performance testing process""" def __init__(self, output_dir: Path): self.output_dir = output_dir self.output_dir.mkdir(parents=True, exist_ok=True) self.test_suite = StandardizedQueryTestSuite() self.results = [] self.start_time = None def generate_test_queries(self, repo_name: str, repo_info: Dict, categories: List[QueryCategory], queries_per_category: int = 3) -> List[Dict]: """Generate test queries for a repository""" queries = [] # Get queries from test suite query_counts = {cat.value: queries_per_category for cat in categories} test_queries = self.test_suite.get_queries_for_repository( repo_name, repo_info["language"], query_counts ) # Convert to our format for query_text, category in test_queries: # Only include queries from requested categories if category in [cat.value for cat in categories]: queries.append({ "query": query_text, "category": category, "complexity": "medium", # Default since we don't have this info "expected_result_type": "location", # Default "repository": repo_name, "language": repo_info["language"] }) return queries def create_mcp_test_prompt(self, query_info: Dict) -> str: """Create prompt for MCP-enabled agent""" return f"""You are testing MCP tools for performance analysis. Repository: {query_info['repository']} ({query_info['language']}) Query: {query_info['query']} Category: {query_info['category']} Instructions: 1. Use ONLY MCP tools (mcp__code-index-mcp__symbol_lookup, mcp__code-index-mcp__search_code) 2. Time your query execution 3. Count results found 4. Track tool calls made 5. Estimate tokens used Output JSON only: ```json {{ "query": "{query_info['query']}", "mode": "mcp", "tools_used": ["list", "of", "tools"], "tool_calls": {{"tool_name": count}}, "results_found": number, "execution_time_ms": milliseconds, "token_estimate": number, "success": true/false, "error": null_or_string }} ```""" def create_native_test_prompt(self, query_info: Dict) -> str: """Create prompt for native-only agent""" return f"""You are testing native tools for performance analysis. Repository: {query_info['repository']} ({query_info['language']}) Working Directory: {VALID_TEST_REPOS[query_info['repository']]['path']} Query: {query_info['query']} Category: {query_info['category']} Instructions: 1. Use ONLY native tools (grep, find, glob, ls, read) 2. NO MCP tools allowed 3. Time your query execution 4. Count results found 5. Track tool calls made 6. Estimate tokens used Output JSON only: ```json {{ "query": "{query_info['query']}", "mode": "native", "tools_used": ["list", "of", "tools"], "tool_calls": {{"tool_name": count}}, "results_found": number, "execution_time_ms": milliseconds, "token_estimate": number, "success": true/false, "error": null_or_string }} ```""" def save_test_batch(self, batch_name: str, queries: List[Dict]): """Save a batch of test queries""" timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') # Create test configurations test_configs = [] for i, query_info in enumerate(queries): # MCP test test_configs.append({ "test_id": f"{batch_name}_{i}_mcp", "mode": "mcp", "query_info": query_info, "prompt": self.create_mcp_test_prompt(query_info) }) # Native test test_configs.append({ "test_id": f"{batch_name}_{i}_native", "mode": "native", "query_info": query_info, "prompt": self.create_native_test_prompt(query_info) }) # Save configurations config_file = self.output_dir / f"test_batch_{batch_name}_{timestamp}.json" with open(config_file, 'w') as f: json.dump({ "batch_name": batch_name, "timestamp": timestamp, "test_count": len(test_configs), "tests": test_configs }, f, indent=2) print(f"Saved {len(test_configs)} test configurations to: {config_file}") return config_file def run_validation(self) -> bool: """Run repository validation""" print("\n=== Running Repository Validation ===") result = subprocess.run( [sys.executable, "scripts/validate_test_repositories.py"], capture_output=True, text=True ) print(result.stdout) if result.stderr: print("Errors:", result.stderr) return result.returncode == 0 def generate_all_tests(self) -> List[Path]: """Generate all test configurations""" print("\n=== Generating Test Configurations ===") test_files = [] categories = [QueryCategory.SYMBOL, QueryCategory.CONTENT, QueryCategory.NAVIGATION] for repo_name, repo_info in VALID_TEST_REPOS.items(): print(f"\nGenerating tests for {repo_name}...") queries = self.generate_test_queries( repo_name, repo_info, categories, queries_per_category=2 ) config_file = self.save_test_batch(repo_name, queries) test_files.append(config_file) return test_files def create_analysis_prompt(self, results: List[Dict]) -> str: """Create prompt for analyzing results""" return f"""Analyze these MCP vs Native performance test results: ```json {json.dumps(results, indent=2)} ``` Provide a comprehensive analysis including: 1. Performance comparison (speed, tokens, accuracy) 2. Success rates by category and mode 3. Tool usage patterns 4. Recommendations for each use case 5. Statistical summary Format as a detailed report with clear sections and data tables.""" def generate_summary_report(self): """Generate summary report from all results""" timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') # Collect all result files result_files = list(self.output_dir.glob("test_results_*.json")) all_results = [] for result_file in result_files: with open(result_file) as f: data = json.load(f) all_results.extend(data.get("results", [])) # Calculate statistics stats = { "total_tests": len(all_results), "by_mode": { "mcp": len([r for r in all_results if r.get("mode") == "mcp"]), "native": len([r for r in all_results if r.get("mode") == "native"]) }, "success_rates": { "mcp": sum(1 for r in all_results if r.get("mode") == "mcp" and r.get("success", False)) / max(1, sum(1 for r in all_results if r.get("mode") == "mcp")), "native": sum(1 for r in all_results if r.get("mode") == "native" and r.get("success", False)) / max(1, sum(1 for r in all_results if r.get("mode") == "native")) }, "avg_execution_time_ms": { "mcp": sum(r.get("execution_time_ms", 0) for r in all_results if r.get("mode") == "mcp") / max(1, sum(1 for r in all_results if r.get("mode") == "mcp")), "native": sum(r.get("execution_time_ms", 0) for r in all_results if r.get("mode") == "native") / max(1, sum(1 for r in all_results if r.get("mode") == "native")) }, "avg_tokens": { "mcp": sum(r.get("token_estimate", 0) for r in all_results if r.get("mode") == "mcp") / max(1, sum(1 for r in all_results if r.get("mode") == "mcp")), "native": sum(r.get("token_estimate", 0) for r in all_results if r.get("mode") == "native") / max(1, sum(1 for r in all_results if r.get("mode") == "native")) } } # Save summary summary_file = self.output_dir / f"performance_summary_{timestamp}.json" with open(summary_file, 'w') as f: json.dump({ "timestamp": timestamp, "statistics": stats, "test_files": [str(f) for f in result_files], "repositories_tested": list(VALID_TEST_REPOS.keys()) }, f, indent=2) print(f"\nSummary saved to: {summary_file}") print(f"\nTest Statistics:") print(f" Total tests: {stats['total_tests']}") print(f" MCP success rate: {stats['success_rates']['mcp']:.1%}") print(f" Native success rate: {stats['success_rates']['native']:.1%}") print(f" MCP avg time: {stats['avg_execution_time_ms']['mcp']:.0f}ms") print(f" Native avg time: {stats['avg_execution_time_ms']['native']:.0f}ms") return summary_file def run_full_test(self): """Run the complete test orchestration""" self.start_time = time.time() print("=== MCP vs Native Performance Test Orchestrator ===") print(f"Output directory: {self.output_dir}") # Step 1: Validate repositories # if not self.run_validation(): # print("\n❌ Repository validation failed. Please fix issues before continuing.") # return False # Step 2: Generate test configurations test_files = self.generate_all_tests() print(f"\n✅ Generated {len(test_files)} test batch files") # Step 3: Display test execution instructions print("\n=== Test Execution Instructions ===") print("\n1. For each test batch file, run the tests using the Task tool:") print(" - Copy each prompt from the test configuration") print(" - Use the Task tool to execute it") print(" - Collect the JSON output") print("\n2. Save results to files named:") for i, test_file in enumerate(test_files): result_file = test_file.name.replace("test_batch_", "test_results_") print(f" - {result_file}") print("\n3. After collecting all results, run:") print(f" python {__file__} --analyze") # Save instructions instructions_file = self.output_dir / "test_execution_instructions.txt" with open(instructions_file, 'w') as f: f.write("MCP vs Native Performance Test Instructions\n") f.write("==========================================\n\n") for test_file in test_files: f.write(f"Test batch: {test_file.name}\n") f.write(f"Execute each test prompt using the Task tool\n") f.write(f"Save results to: {test_file.name.replace('test_batch_', 'test_results_')}\n\n") print(f"\nInstructions saved to: {instructions_file}") elapsed = time.time() - self.start_time print(f"\nTest generation completed in {elapsed:.1f} seconds") return True def main(): parser = argparse.ArgumentParser(description='MCP vs Native Performance Test Orchestrator') parser.add_argument('--output', type=Path, default=Path('/workspaces/Code-Index-MCP/test_results/performance_tests'), help='Output directory for test results') parser.add_argument('--analyze', action='store_true', help='Analyze existing results instead of generating new tests') args = parser.parse_args() orchestrator = PerformanceTestOrchestrator(args.output) if args.analyze: print("=== Analyzing Test Results ===") summary_file = orchestrator.generate_summary_report() print(f"\n✅ Analysis complete!") else: success = orchestrator.run_full_test() if success: print("\n✅ Test orchestration complete!") else: print("\n❌ Test orchestration failed") sys.exit(1) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

run_comprehensive_performance_test.py•13.9 KiB