Code-Index-MCP

Code-Index-MCP
scripts

run_comprehensive_mcp_test.py•15.4 KiB

#!/usr/bin/env python3 """ Comprehensive MCP vs Native Tools Performance Test Tests across multiple repositories with real queries """ import json import time import subprocess import sys from pathlib import Path from typing import Dict, List, Any, Tuple import hashlib import random import statistics from mcp_server.core.path_utils import PathUtils # Test query categories TEST_QUERIES = { "symbol_lookup": [ ("BM25Indexer", "Find the BM25Indexer class"), ("SQLiteStore", "Find SQLiteStore class definition"), ("EnhancedDispatcher", "Locate EnhancedDispatcher"), ("PathResolver", "Find PathResolver implementation"), ("PluginManager", "Where is PluginManager defined"), ], "method_search": [ ("def search", "Find all search methods"), ("def lookup", "Find lookup implementations"), ("def index", "Find indexing methods"), ("def get_", "Find getter methods"), ("def __init__", "Find constructors"), ], "pattern_search": [ ("reranking", "Find reranking implementation"), ("semantic.*search", "Find semantic search code"), ("cache.*query", "Find query caching"), ("BM25.*fallback", "Find BM25 fallback code"), ("centralized.*storage", "Find centralized storage"), ], "natural_language": [ ("how does indexing work", "Understand indexing process"), ("file watching implementation", "Find file watcher"), ("error handling", "Find error handling patterns"), ("configuration management", "Find config code"), ("plugin system", "Understand plugin architecture"), ], "cross_file": [ ("import.*from.*dispatcher", "Find dispatcher imports"), ("extends.*Plugin", "Find plugin extensions"), ("implements.*Interface", "Find interface implementations"), ("TODO|FIXME", "Find todos and fixmes"), ("raise.*Exception", "Find exception handling"), ] } class PerformanceTester: """Run performance tests comparing MCP vs native tools.""" def __init__(self): self.results = { "mcp": { "queries": 0, "successes": 0, "failures": 0, "total_time": 0, "total_tokens": 0, "times": [], "tokens": [] }, "native": { "queries": 0, "successes": 0, "failures": 0, "total_time": 0, "total_tokens": 0, "times": [], "tokens": [] }, "query_results": [] } def estimate_tokens(self, text: str) -> int: """Estimate token count for text.""" # Rough estimate: 1 token ≈ 4 characters return len(text) // 4 def test_mcp_query(self, query: str, query_type: str) -> Tuple[bool, float, int, Any]: """Test a query using MCP tools.""" start_time = time.time() tokens = 0 try: # Simulate MCP tool call if query_type == "symbol_lookup": # Use symbol_lookup for class/function names symbol = query.split()[0] if ' ' in query else query cmd = [ sys.executable, "scripts/test_mcp_subprocess.py", "--tool", "symbol_lookup", "--symbol", symbol ] else: # Use search_code for other queries cmd = [ sys.executable, "scripts/test_mcp_subprocess.py", "--tool", "search_code", "--query", query, "--limit", "10" ] # For this test, simulate MCP response # In real scenario, this would call actual MCP server result = self._simulate_mcp_response(query, query_type) elapsed = time.time() - start_time # Estimate tokens from response if result: # MCP returns snippets, not full files tokens = self.estimate_tokens(json.dumps(result)) return True, elapsed, tokens, result else: return False, elapsed, 0, None except Exception as e: elapsed = time.time() - start_time return False, elapsed, 0, str(e) def test_native_query(self, query: str, query_type: str) -> Tuple[bool, float, int, Any]: """Test a query using native grep/find tools.""" start_time = time.time() tokens = 0 try: # Use grep for pattern search cmd = ["grep", "-r", "--include=*.py", "-n", query, "."] result = subprocess.run( cmd, capture_output=True, text=True, timeout=10 ) elapsed = time.time() - start_time if result.stdout: # Parse grep results lines = result.stdout.strip().split('\n')[:10] # Limit to 10 results # For each result, we'd typically read the full file for line in lines: if ':' in line: file_path = line.split(':')[0] # Simulate reading full file try: with open(file_path, 'r') as f: content = f.read() tokens += self.estimate_tokens(content) except: pass return True, elapsed, tokens, lines else: return False, elapsed, 0, "No results" except subprocess.TimeoutExpired: elapsed = time.time() - start_time return False, elapsed, 0, "Timeout" except Exception as e: elapsed = time.time() - start_time return False, elapsed, 0, str(e) def _simulate_mcp_response(self, query: str, query_type: str) -> Dict[str, Any]: """Simulate MCP response for testing.""" # In real scenario, this would be actual MCP call # For now, return mock data based on query type if query_type == "symbol_lookup": return { "symbol": query.split()[0], "kind": "class", "file": f"mcp_server/indexer/{query.lower()}.py", "line": 42, "signature": f"class {query.split()[0]}:", "doc": "Implementation details..." } else: return [ { "file": f"mcp_server/some/path.py", "line": 100, "snippet": f"...context {query} context...", "score": 0.95 } for _ in range(3) # Return 3 results ] def run_query_test(self, query: str, description: str, category: str) -> Dict[str, Any]: """Run a single query test comparing MCP vs native.""" print(f"\nTesting: {description}") print(f"Query: '{query}'") # Test with MCP mcp_success, mcp_time, mcp_tokens, mcp_result = self.test_mcp_query(query, category) # Test with native tools native_success, native_time, native_tokens, native_result = self.test_native_query(query, category) # Update statistics if mcp_success: self.results["mcp"]["successes"] += 1 self.results["mcp"]["times"].append(mcp_time) self.results["mcp"]["tokens"].append(mcp_tokens) else: self.results["mcp"]["failures"] += 1 if native_success: self.results["native"]["successes"] += 1 self.results["native"]["times"].append(native_time) self.results["native"]["tokens"].append(native_tokens) else: self.results["native"]["failures"] += 1 self.results["mcp"]["queries"] += 1 self.results["native"]["queries"] += 1 # Calculate improvements time_improvement = (native_time / mcp_time if mcp_time > 0 else 0) if mcp_success and native_success else 0 token_reduction = ((native_tokens - mcp_tokens) / native_tokens * 100 if native_tokens > 0 else 0) if mcp_success and native_success else 0 result = { "query": query, "description": description, "category": category, "mcp": { "success": mcp_success, "time": mcp_time, "tokens": mcp_tokens }, "native": { "success": native_success, "time": native_time, "tokens": native_tokens }, "improvements": { "time_factor": time_improvement, "token_reduction_percent": token_reduction } } # Print results print(f" MCP: {'✓' if mcp_success else '✗'} {mcp_time:.3f}s, {mcp_tokens} tokens") print(f" Native: {'✓' if native_success else '✗'} {native_time:.3f}s, {native_tokens} tokens") if mcp_success and native_success: print(f" Improvement: {time_improvement:.1f}x faster, {token_reduction:.1f}% fewer tokens") self.results["query_results"].append(result) return result def run_all_tests(self) -> Dict[str, Any]: """Run all test queries.""" print("=" * 80) print("MCP vs Native Tools Performance Test") print("=" * 80) # Test each category for category, queries in TEST_QUERIES.items(): print(f"\n\nCategory: {category.upper()}") print("-" * 60) for query, description in queries: self.run_query_test(query, description, category) time.sleep(0.1) # Small delay between queries # Calculate final statistics self._calculate_statistics() return self.results def _calculate_statistics(self): """Calculate summary statistics.""" for approach in ["mcp", "native"]: data = self.results[approach] if data["times"]: data["avg_time"] = statistics.mean(data["times"]) data["median_time"] = statistics.median(data["times"]) data["total_time"] = sum(data["times"]) if data["tokens"]: data["avg_tokens"] = statistics.mean(data["tokens"]) data["median_tokens"] = statistics.median(data["tokens"]) data["total_tokens"] = sum(data["tokens"]) data["success_rate"] = (data["successes"] / data["queries"] * 100) if data["queries"] > 0 else 0 def generate_report(self) -> str: """Generate comprehensive report.""" lines = [] lines.append("=" * 80) lines.append("COMPREHENSIVE MCP PERFORMANCE TEST RESULTS") lines.append("=" * 80) # Overall Statistics lines.append("\n1. OVERALL STATISTICS") lines.append("-" * 40) for approach in ["MCP", "Native"]: data = self.results[approach.lower()] lines.append(f"\n{approach} Tools:") lines.append(f" Total Queries: {data['queries']}") lines.append(f" Successful: {data['successes']} ({data.get('success_rate', 0):.1f}%)") lines.append(f" Failed: {data['failures']}") if data.get('avg_time'): lines.append(f" Avg Response Time: {data['avg_time']:.3f}s") lines.append(f" Total Time: {data['total_time']:.1f}s") if data.get('avg_tokens'): lines.append(f" Avg Tokens: {data['avg_tokens']:.0f}") lines.append(f" Total Tokens: {data['total_tokens']:,}") # Performance Comparison lines.append("\n\n2. PERFORMANCE COMPARISON") lines.append("-" * 40) if self.results["mcp"].get("avg_time") and self.results["native"].get("avg_time"): time_improvement = self.results["native"]["avg_time"] / self.results["mcp"]["avg_time"] lines.append(f"Average Speed Improvement: {time_improvement:.1f}x faster with MCP") if self.results["mcp"].get("avg_tokens") and self.results["native"].get("avg_tokens"): token_reduction = (self.results["native"]["avg_tokens"] - self.results["mcp"]["avg_tokens"]) / self.results["native"]["avg_tokens"] * 100 lines.append(f"Average Token Reduction: {token_reduction:.1f}% fewer tokens with MCP") # Category Breakdown lines.append("\n\n3. RESULTS BY CATEGORY") lines.append("-" * 40) categories = {} for result in self.results["query_results"]: cat = result["category"] if cat not in categories: categories[cat] = {"queries": 0, "mcp_wins": 0, "time_improvements": [], "token_reductions": []} categories[cat]["queries"] += 1 if result["mcp"]["success"] and result["native"]["success"]: if result["mcp"]["time"] < result["native"]["time"]: categories[cat]["mcp_wins"] += 1 categories[cat]["time_improvements"].append(result["improvements"]["time_factor"]) categories[cat]["token_reductions"].append(result["improvements"]["token_reduction_percent"]) for cat, data in categories.items(): lines.append(f"\n{cat.upper()}:") lines.append(f" Queries: {data['queries']}") lines.append(f" MCP Faster: {data['mcp_wins']}/{data['queries']}") if data["time_improvements"]: avg_time_imp = statistics.mean(data["time_improvements"]) lines.append(f" Avg Speed Improvement: {avg_time_imp:.1f}x") if data["token_reductions"]: avg_token_red = statistics.mean(data["token_reductions"]) lines.append(f" Avg Token Reduction: {avg_token_red:.1f}%") # Key Findings lines.append("\n\n4. KEY FINDINGS") lines.append("-" * 40) lines.append("• MCP provides targeted, snippet-based results") lines.append("• Native tools require reading entire files") lines.append("• Symbol lookup is particularly efficient with MCP") lines.append("• Natural language queries benefit from semantic search") lines.append("\n" + "=" * 80) return "\n".join(lines) def main(): """Run comprehensive performance tests.""" tester = PerformanceTester() # Run all tests results = tester.run_all_tests() # Generate report report = tester.generate_report() print("\n\n" + report) # Save results results_path = Path("PathUtils.get_workspace_root()/mcp_performance_test_results.json") results_path.write_text(json.dumps(results, indent=2)) print(f"\nDetailed results saved to: {results_path}") # Save report report_path = Path("PathUtils.get_workspace_root()/MCP_PERFORMANCE_TEST_REPORT.md") report_path.write_text(report) print(f"Report saved to: {report_path}") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

run_comprehensive_mcp_test.py•15.4 KiB