Code-Index-MCP

Code-Index-MCP
scripts

run_comprehensive_query_test.py•14.8 KiB

#!/usr/bin/env python3 """ Comprehensive Parallel Query Testing Runs 50 semantic and 50 non-semantic queries per codebase in parallel """ import os import sys import json import asyncio import time import sqlite3 from pathlib import Path from datetime import datetime from typing import Dict, List, Any, Tuple from collections import defaultdict import numpy as np from tqdm.asyncio import tqdm import aiofiles import aiosqlite from mcp_server.core.path_utils import PathUtils # Add parent directory for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from mcp_server.storage.sqlite_store import SQLiteStore from mcp_server.indexer.bm25_indexer import BM25Indexer from mcp_server.utils.token_counter import TokenCounter # Comprehensive query sets NON_SEMANTIC_QUERIES = [ # Symbol lookups (10) "class Main", "def main", "function init", "interface API", "struct Config", "enum Status", "trait Handler", "type Result", "const MAX", "var config", # Pattern matching (10) "TODO", "FIXME", "HACK", "BUG", "NOTE", "import requests", "require(", "include <", "using namespace", "#define", # Type searches (10) "extends Base", "implements Interface", ": public", "inherits from", "derives", "async function", "await", "Promise<", "Future<", "Task<", # Configuration (10) "API_KEY", "DATABASE_URL", "PORT", "HOST", "SECRET", ".env", "config.json", "settings.py", "application.yml", "config.toml", # Error patterns (10) "try {", "catch (", "except:", "rescue", "throw new", "raise", "panic!", "assert", "require(", "check(", # Additional patterns (10) "test_", "_test", "TestCase", "describe(", "it(", "beforeEach", "setUp", "@Test", "#[test]", "unittest" ] SEMANTIC_QUERIES = [ # Architecture concepts (10) "authentication and authorization logic", "database connection pooling", "caching strategy implementation", "rate limiting middleware", "error handling and recovery", "logging and monitoring setup", "dependency injection container", "event driven architecture", "microservice communication", "API gateway pattern", # Business logic (10) "user registration flow", "payment processing logic", "order fulfillment workflow", "inventory management system", "notification service implementation", "search functionality", "recommendation algorithm", "data validation rules", "business rule engine", "workflow orchestration", # Technical patterns (10) "singleton pattern implementation", "factory method pattern", "observer pattern usage", "strategy pattern examples", "decorator pattern application", "repository pattern implementation", "unit of work pattern", "builder pattern usage", "adapter pattern examples", "facade pattern implementation", # Security concerns (10) "input sanitization logic", "SQL injection prevention", "XSS protection measures", "CSRF token handling", "password hashing implementation", "JWT token validation", "API key management", "encryption at rest", "secure communication setup", "access control implementation", # Performance topics (10) "query optimization techniques", "memory leak detection", "performance bottleneck analysis", "lazy loading implementation", "connection pool optimization", "caching layer setup", "async processing pipeline", "batch processing logic", "stream processing implementation", "load balancing strategy" ] class ParallelQueryTester: """Handles parallel query testing across repositories.""" def __init__(self, max_concurrent_queries: int = 10): self.max_concurrent_queries = max_concurrent_queries self.token_counter = TokenCounter() self.results = defaultdict(list) async def test_bm25_query(self, db_path: Path, query: str) -> Dict[str, Any]: """Test a single BM25 query asynchronously.""" start_time = time.time() try: async with aiosqlite.connect(str(db_path)) as db: # Simple BM25 search simulation cursor = await db.execute( "SELECT file_path, content FROM documents WHERE content LIKE ? LIMIT 10", (f"%{query}%",) ) results = await cursor.fetchall() # Calculate tokens input_tokens = self.token_counter.count_tokens(f"search: {query}") output_tokens = sum( self.token_counter.count_tokens(f"{r[0]}: {r[1][:200]}") for r in results ) return { 'query': query, 'type': 'bm25', 'success': True, 'result_count': len(results), 'input_tokens': input_tokens, 'output_tokens': output_tokens, 'total_tokens': input_tokens + output_tokens, 'response_time': time.time() - start_time, 'sample_results': [r[0] for r in results[:3]] } except Exception as e: return { 'query': query, 'type': 'bm25', 'success': False, 'error': str(e), 'response_time': time.time() - start_time } async def test_semantic_query(self, collection_name: str, query: str) -> Dict[str, Any]: """Test a single semantic query asynchronously.""" start_time = time.time() try: # Simulate semantic search (would use Qdrant in production) # For now, return mock results mock_results = [ {'file': f'src/{query.split()[0]}.py', 'score': 0.95}, {'file': f'lib/{query.split()[0]}.js', 'score': 0.87}, {'file': f'test/{query.split()[0]}_test.go', 'score': 0.82} ] input_tokens = self.token_counter.count_tokens(f"semantic: {query}") output_tokens = sum( self.token_counter.count_tokens(str(r)) for r in mock_results ) return { 'query': query, 'type': 'semantic', 'success': True, 'result_count': len(mock_results), 'input_tokens': input_tokens, 'output_tokens': output_tokens, 'total_tokens': input_tokens + output_tokens, 'response_time': time.time() - start_time, 'avg_score': np.mean([r['score'] for r in mock_results]), 'sample_results': mock_results[:3] } except Exception as e: return { 'query': query, 'type': 'semantic', 'success': False, 'error': str(e), 'response_time': time.time() - start_time } async def test_queries_batch(self, db_path: Path, queries: List[str], query_type: str) -> List[Dict[str, Any]]: """Test a batch of queries with concurrency control.""" semaphore = asyncio.Semaphore(self.max_concurrent_queries) async def test_with_semaphore(query): async with semaphore: if query_type == 'bm25': return await self.test_bm25_query(db_path, query) else: collection = db_path.stem # Use repo name as collection return await self.test_semantic_query(collection, query) tasks = [test_with_semaphore(q) for q in queries] return await asyncio.gather(*tasks) async def test_repository(self, repo_name: str, index_path: Path) -> Dict[str, Any]: """Test all queries for a single repository.""" db_path = index_path / 'bm25_index.db' if not db_path.exists(): return { 'repository': repo_name, 'status': 'missing_index', 'error': f"Index not found at {db_path}" } # Run BM25 and semantic queries in parallel async with asyncio.TaskGroup() as tg: bm25_task = tg.create_task( self.test_queries_batch(db_path, NON_SEMANTIC_QUERIES, 'bm25') ) semantic_task = tg.create_task( self.test_queries_batch(db_path, SEMANTIC_QUERIES, 'semantic') ) bm25_results = bm25_task.result() semantic_results = semantic_task.result() # Calculate statistics def calc_stats(results): successful = [r for r in results if r.get('success', False)] if not successful: return {} response_times = [r['response_time'] for r in successful] token_counts = [r.get('total_tokens', 0) for r in successful] return { 'success_rate': len(successful) / len(results), 'avg_response_time': np.mean(response_times), 'p95_response_time': np.percentile(response_times, 95), 'p99_response_time': np.percentile(response_times, 99), 'avg_tokens': np.mean(token_counts), 'total_tokens': sum(token_counts), 'avg_results': np.mean([r.get('result_count', 0) for r in successful]) } return { 'repository': repo_name, 'status': 'completed', 'bm25': { 'queries_run': len(bm25_results), 'stats': calc_stats(bm25_results), 'sample_results': bm25_results[:3] }, 'semantic': { 'queries_run': len(semantic_results), 'stats': calc_stats(semantic_results), 'sample_results': semantic_results[:3] }, 'total_queries': len(bm25_results) + len(semantic_results) } async def run_all_repository_tests(repositories: List[str], index_base_path: Path) -> Dict[str, Any]: """Run tests for all repositories in parallel.""" tester = ParallelQueryTester(max_concurrent_queries=10) # Create tasks for all repositories tasks = [] for repo in repositories: index_path = index_base_path / repo.replace('/', '_') tasks.append(tester.test_repository(repo, index_path)) # Run with progress bar results = [] async with tqdm(total=len(tasks), desc="Testing repositories") as pbar: for coro in asyncio.as_completed(tasks): result = await coro results.append(result) status_emoji = "✅" if result['status'] == 'completed' else "❌" pbar.set_description(f"{status_emoji} {result['repository']}") pbar.update(1) return results def aggregate_results(results: List[Dict[str, Any]]) -> Dict[str, Any]: """Aggregate results across all repositories.""" summary = { 'total_repositories': len(results), 'completed': sum(1 for r in results if r['status'] == 'completed'), 'failed': sum(1 for r in results if r['status'] != 'completed'), 'total_queries_run': sum(r.get('total_queries', 0) for r in results), 'aggregate_stats': { 'bm25': {}, 'semantic': {} } } # Aggregate statistics for query_type in ['bm25', 'semantic']: all_response_times = [] all_token_counts = [] for result in results: if result['status'] == 'completed' and query_type in result: stats = result[query_type].get('stats', {}) if 'avg_response_time' in stats: # Weight by number of queries count = result[query_type]['queries_run'] all_response_times.extend([stats['avg_response_time']] * count) all_token_counts.extend([stats['avg_tokens']] * count) if all_response_times: summary['aggregate_stats'][query_type] = { 'avg_response_time': np.mean(all_response_times), 'p95_response_time': np.percentile(all_response_times, 95), 'p99_response_time': np.percentile(all_response_times, 99), 'avg_tokens_per_query': np.mean(all_token_counts), 'total_tokens': sum(all_token_counts) } return summary async def main(): """Main entry point.""" # Get repositories index_base_path = Path("PathUtils.get_workspace_root()/test_indexes") repositories = [ d.name for d in index_base_path.iterdir() if d.is_dir() and not d.name.startswith('.') ] print(f"🚀 Starting comprehensive query testing") print(f" Repositories: {len(repositories)}") print(f" Queries per repo: 100 (50 BM25 + 50 semantic)") print(f" Total queries: {len(repositories) * 100}") start_time = time.time() # Run all tests results = await run_all_repository_tests(repositories, index_base_path) # Aggregate results summary = aggregate_results(results) summary['execution_time'] = time.time() - start_time summary['queries_per_second'] = summary['total_queries_run'] / summary['execution_time'] # Save detailed results output = { 'timestamp': datetime.now().isoformat(), 'summary': summary, 'detailed_results': results, 'query_definitions': { 'non_semantic': NON_SEMANTIC_QUERIES, 'semantic': SEMANTIC_QUERIES } } output_file = Path("comprehensive_query_test_results.json") async with aiofiles.open(output_file, 'w') as f: await f.write(json.dumps(output, indent=2)) # Print summary print(f"\n📊 Testing Complete in {summary['execution_time']:.1f} seconds") print(f" Queries/second: {summary['queries_per_second']:.1f}") print(f" ✅ Completed: {summary['completed']}") print(f" ❌ Failed: {summary['failed']}") print(f"\n BM25 Performance:") print(f" Avg response: {summary['aggregate_stats']['bm25'].get('avg_response_time', 0):.3f}s") print(f" P95 response: {summary['aggregate_stats']['bm25'].get('p95_response_time', 0):.3f}s") print(f"\n Semantic Performance:") print(f" Avg response: {summary['aggregate_stats']['semantic'].get('avg_response_time', 0):.3f}s") print(f" P95 response: {summary['aggregate_stats']['semantic'].get('p95_response_time', 0):.3f}s") print(f"\n📄 Full results saved to: {output_file}") if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

run_comprehensive_query_test.py•14.8 KiB