Skip to main content
Glama
kaman05010

MCP Wikipedia Server

by kaman05010
test_performance.py12.9 kB
""" Performance and load testing for MCP Wikipedia Server. This module contains performance benchmarks and load tests to ensure the server can handle production workloads efficiently. """ import asyncio import time import statistics from concurrent.futures import ThreadPoolExecutor from typing import List, Dict, Any import sys import os # Add src to path for imports sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) try: from mcp_server.mcp_server import WikipediaServer except ImportError: sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src', 'mcp_server')) from mcp_server import WikipediaServer class PerformanceTester: """Performance testing utilities for Wikipedia server.""" def __init__(self): self.server = WikipediaServer() self.results = [] async def measure_response_time(self, func, *args, **kwargs) -> Dict[str, Any]: """Measure response time for a function call.""" start_time = time.time() try: result = await func(*args, **kwargs) end_time = time.time() response_time = end_time - start_time return { "success": result.get("success", False), "response_time": response_time, "error": result.get("error") if not result.get("success") else None } except Exception as e: end_time = time.time() response_time = end_time - start_time return { "success": False, "response_time": response_time, "error": str(e) } async def benchmark_search_tool(self, queries: List[str], iterations: int = 3) -> Dict[str, Any]: """Benchmark the fetch_wikipedia_info tool.""" print(f"\n🔍 Benchmarking fetch_wikipedia_info with {len(queries)} queries, {iterations} iterations each...") all_times = [] success_count = 0 total_requests = len(queries) * iterations for iteration in range(iterations): print(f" Iteration {iteration + 1}/{iterations}") for i, query in enumerate(queries): result = await self.measure_response_time( self.server.fetch_wikipedia_info, query ) all_times.append(result["response_time"]) if result["success"]: success_count += 1 print(f" Query {i+1}: {result['response_time']:.3f}s {'✅' if result['success'] else '❌'}") return { "tool": "fetch_wikipedia_info", "total_requests": total_requests, "successful_requests": success_count, "success_rate": success_count / total_requests, "response_times": { "min": min(all_times), "max": max(all_times), "mean": statistics.mean(all_times), "median": statistics.median(all_times), "stdev": statistics.stdev(all_times) if len(all_times) > 1 else 0 } } async def benchmark_sections_tool(self, topics: List[str], iterations: int = 3) -> Dict[str, Any]: """Benchmark the list_wikipedia_sections tool.""" print(f"\n📋 Benchmarking list_wikipedia_sections with {len(topics)} topics, {iterations} iterations each...") all_times = [] success_count = 0 total_requests = len(topics) * iterations for iteration in range(iterations): print(f" Iteration {iteration + 1}/{iterations}") for i, topic in enumerate(topics): result = await self.measure_response_time( self.server.list_wikipedia_sections, topic ) all_times.append(result["response_time"]) if result["success"]: success_count += 1 print(f" Topic {i+1}: {result['response_time']:.3f}s {'✅' if result['success'] else '❌'}") return { "tool": "list_wikipedia_sections", "total_requests": total_requests, "successful_requests": success_count, "success_rate": success_count / total_requests, "response_times": { "min": min(all_times), "max": max(all_times), "mean": statistics.mean(all_times), "median": statistics.median(all_times), "stdev": statistics.stdev(all_times) if len(all_times) > 1 else 0 } } async def benchmark_content_tool(self, topic_sections: List[tuple], iterations: int = 2) -> Dict[str, Any]: """Benchmark the get_section_content tool.""" print(f"\n📄 Benchmarking get_section_content with {len(topic_sections)} topic-section pairs, {iterations} iterations each...") all_times = [] success_count = 0 total_requests = len(topic_sections) * iterations for iteration in range(iterations): print(f" Iteration {iteration + 1}/{iterations}") for i, (topic, section) in enumerate(topic_sections): result = await self.measure_response_time( self.server.get_section_content, topic, section ) all_times.append(result["response_time"]) if result["success"]: success_count += 1 print(f" Pair {i+1}: {result['response_time']:.3f}s {'✅' if result['success'] else '❌'}") return { "tool": "get_section_content", "total_requests": total_requests, "successful_requests": success_count, "success_rate": success_count / total_requests, "response_times": { "min": min(all_times), "max": max(all_times), "mean": statistics.mean(all_times), "median": statistics.median(all_times), "stdev": statistics.stdev(all_times) if len(all_times) > 1 else 0 } } async def test_concurrent_load(self, query: str, concurrent_requests: int = 10) -> Dict[str, Any]: """Test server performance under concurrent load.""" print(f"\n⚡ Testing concurrent load: {concurrent_requests} simultaneous requests...") start_time = time.time() # Create concurrent tasks tasks = [ self.measure_response_time(self.server.fetch_wikipedia_info, f"{query} {i}") for i in range(concurrent_requests) ] # Execute all tasks concurrently results = await asyncio.gather(*tasks, return_exceptions=True) end_time = time.time() total_time = end_time - start_time # Process results successful_results = [] failed_results = [] response_times = [] for i, result in enumerate(results): if isinstance(result, Exception): failed_results.append(f"Request {i}: {str(result)}") elif result.get("success"): successful_results.append(result) response_times.append(result["response_time"]) else: failed_results.append(f"Request {i}: {result.get('error', 'Unknown error')}") success_rate = len(successful_results) / concurrent_requests print(f" Total time: {total_time:.3f}s") print(f" Successful requests: {len(successful_results)}/{concurrent_requests}") print(f" Success rate: {success_rate:.1%}") return { "test": "concurrent_load", "concurrent_requests": concurrent_requests, "total_time": total_time, "successful_requests": len(successful_results), "failed_requests": len(failed_results), "success_rate": success_rate, "response_times": { "min": min(response_times) if response_times else 0, "max": max(response_times) if response_times else 0, "mean": statistics.mean(response_times) if response_times else 0, "median": statistics.median(response_times) if response_times else 0 }, "failed_results": failed_results } def print_benchmark_summary(self, results: List[Dict[str, Any]]): """Print a summary of benchmark results.""" print("\n" + "="*60) print("📊 PERFORMANCE BENCHMARK SUMMARY") print("="*60) for result in results: if result.get("test") == "concurrent_load": print(f"\n⚡ Concurrent Load Test:") print(f" Requests: {result['concurrent_requests']}") print(f" Total Time: {result['total_time']:.3f}s") print(f" Success Rate: {result['success_rate']:.1%}") print(f" Avg Response Time: {result['response_times']['mean']:.3f}s") else: tool_name = result.get("tool", "Unknown") print(f"\n🛠️ {tool_name}:") print(f" Total Requests: {result['total_requests']}") print(f" Success Rate: {result['success_rate']:.1%}") times = result['response_times'] print(f" Response Times:") print(f" Min: {times['min']:.3f}s") print(f" Max: {times['max']:.3f}s") print(f" Mean: {times['mean']:.3f}s") print(f" Median: {times['median']:.3f}s") print(f" Std Dev: {times['stdev']:.3f}s") async def run_performance_benchmarks(): """Run comprehensive performance benchmarks.""" print("🚀 Starting MCP Wikipedia Server Performance Benchmarks") print("="*60) tester = PerformanceTester() results = [] # Test queries for different complexity levels simple_queries = [ "Python", "Java", "JavaScript", ] medium_queries = [ "Machine Learning", "Artificial Intelligence", "Data Science", ] complex_queries = [ "Quantum Computing Applications", "Climate Change Mitigation", ] # Common topics for section tests topics = [ "Python (programming language)", "Machine Learning", "Artificial Intelligence", ] # Topic-section pairs for content tests topic_sections = [ ("Python (programming language)", "History"), ("Machine Learning", "Applications"), ("Artificial Intelligence", "Ethics"), ] try: # Benchmark 1: Simple queries result = await tester.benchmark_search_tool(simple_queries) results.append(result) # Benchmark 2: Medium complexity queries result = await tester.benchmark_search_tool(medium_queries) results.append(result) # Benchmark 3: Complex queries result = await tester.benchmark_search_tool(complex_queries, iterations=1) results.append(result) # Benchmark 4: Section listing result = await tester.benchmark_sections_tool(topics) results.append(result) # Benchmark 5: Section content result = await tester.benchmark_content_tool(topic_sections) results.append(result) # Benchmark 6: Concurrent load test result = await tester.test_concurrent_load("Machine Learning", concurrent_requests=5) results.append(result) # Print summary tester.print_benchmark_summary(results) print("\n✅ Performance benchmarks completed successfully!") # Performance guidelines check print("\n📋 Performance Guidelines Check:") for result in results: if result.get("tool"): avg_time = result['response_times']['mean'] success_rate = result['success_rate'] time_ok = "✅" if avg_time < 3.0 else "⚠️" success_ok = "✅" if success_rate > 0.8 else "⚠️" print(f" {result['tool']}: {time_ok} Avg Time: {avg_time:.3f}s, {success_ok} Success: {success_rate:.1%}") except Exception as e: print(f"\n❌ Benchmark failed with error: {e}") raise if __name__ == "__main__": # Run benchmarks when script is executed directly asyncio.run(run_performance_benchmarks())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kaman05010/MCPClientServer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server