MCP Wikipedia Server

test_integration.py•21.1 kB

""" Integration tests for MCP Wikipedia Server. This module contains end-to-end integration tests that verify the complete functionality of the Wikipedia server in realistic usage scenarios. """ import asyncio import json import time import sys import os from typing import Dict, Any, List # Add src to path for imports sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) try: from mcp_server.mcp_server import WikipediaServer from mcp_server.mcp_client import WikipediaClient except ImportError: sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src', 'mcp_server')) from mcp_server import WikipediaServer try: from mcp_client import WikipediaClient except ImportError: WikipediaClient = None class IntegrationTester: """Integration testing utilities for the Wikipedia server.""" def __init__(self): self.server = WikipediaServer() self.test_results = [] self.test_data = { "articles": [ "Python (programming language)", "Machine Learning", "Artificial Intelligence", "JavaScript", "Data Science" ], "search_queries": [ "Python programming", "machine learning algorithms", "AI ethics", "web development JavaScript", "data analysis techniques" ], "known_sections": { "Python (programming language)": ["History", "Features", "Syntax", "Libraries"], "Machine Learning": ["Overview", "History", "Types", "Applications"], "Artificial Intelligence": ["History", "Goals", "Approaches", "Applications"] } } async def test_article_research_workflow(self) -> Dict[str, Any]: """Test complete article research workflow.""" print("\n📚 Testing Complete Article Research Workflow...") workflow_results = [] for query in self.test_data["search_queries"][:3]: # Test first 3 queries print(f"\n Researching: '{query}'") workflow_start = time.time() workflow_success = True workflow_steps = [] # Step 1: Search for article print(" Step 1: Searching for article...") search_start = time.time() search_result = await self.server.fetch_wikipedia_info(query) search_time = time.time() - search_start if search_result["success"]: article_title = search_result["data"]["title"] print(f" ✅ Found article: '{article_title}' ({search_time:.2f}s)") workflow_steps.append({ "step": "search", "success": True, "time": search_time, "result": article_title }) # Step 2: Get article sections print(" Step 2: Listing article sections...") sections_start = time.time() sections_result = await self.server.list_wikipedia_sections(article_title) sections_time = time.time() - sections_start if sections_result["success"]: sections = sections_result["data"]["sections"] print(f" ✅ Found {len(sections)} sections ({sections_time:.2f}s)") workflow_steps.append({ "step": "sections", "success": True, "time": sections_time, "result": len(sections) }) # Step 3: Get content from first few sections print(" Step 3: Retrieving section content...") content_results = [] for section in sections[:2]: # Get first 2 sections content_start = time.time() content_result = await self.server.get_section_content( article_title, section["title"] ) content_time = time.time() - content_start if content_result["success"]: content_length = len(content_result["data"]["content"]) print(f" ✅ '{section['title']}': {content_length} chars ({content_time:.2f}s)") content_results.append({ "section": section["title"], "success": True, "time": content_time, "content_length": content_length }) else: print(f" ❌ '{section['title']}': {content_result['error']} ({content_time:.2f}s)") content_results.append({ "section": section["title"], "success": False, "time": content_time, "error": content_result["error"] }) workflow_steps.append({ "step": "content", "success": len(content_results) > 0, "time": sum(r["time"] for r in content_results), "result": content_results }) else: print(f" ❌ Failed to get sections: {sections_result['error']}") workflow_success = False workflow_steps.append({ "step": "sections", "success": False, "time": sections_time, "error": sections_result["error"] }) else: print(f" ❌ Search failed: {search_result['error']}") workflow_success = False workflow_steps.append({ "step": "search", "success": False, "time": search_time, "error": search_result["error"] }) workflow_total_time = time.time() - workflow_start workflow_results.append({ "query": query, "success": workflow_success, "total_time": workflow_total_time, "steps": workflow_steps }) # Calculate summary statistics successful_workflows = [r for r in workflow_results if r["success"]] success_rate = len(successful_workflows) / len(workflow_results) avg_time = sum(r["total_time"] for r in successful_workflows) / len(successful_workflows) if successful_workflows else 0 return { "test": "article_research_workflow", "total_workflows": len(workflow_results), "successful_workflows": len(successful_workflows), "success_rate": success_rate, "average_time": avg_time, "workflows": workflow_results } async def test_error_recovery(self) -> Dict[str, Any]: """Test error handling and recovery scenarios.""" print("\n🚨 Testing Error Recovery Scenarios...") error_scenarios = [ { "name": "Non-existent article", "test": lambda: self.server.fetch_wikipedia_info("ThisArticleDoesNotExist12345"), "expect_success": False }, { "name": "Empty search query", "test": lambda: self.server.fetch_wikipedia_info(""), "expect_success": False }, { "name": "Non-existent section", "test": lambda: self.server.get_section_content("Python", "NonExistentSection"), "expect_success": False }, { "name": "Sections from non-existent article", "test": lambda: self.server.list_wikipedia_sections("NonExistentArticle12345"), "expect_success": False }, { "name": "Very long query", "test": lambda: self.server.fetch_wikipedia_info("x" * 1000), "expect_success": None # Could succeed or fail gracefully } ] recovery_results = [] for scenario in error_scenarios: print(f" Testing: {scenario['name']}") try: start_time = time.time() result = await scenario["test"]() test_time = time.time() - start_time success = result.get("success", False) has_error_info = "error" in result has_metadata = "metadata" in result # Check if result meets expectations expectation_met = True if scenario["expect_success"] is not None: expectation_met = (success == scenario["expect_success"]) # For failed requests, ensure proper error handling proper_error_handling = True if not success: proper_error_handling = has_error_info and has_metadata test_passed = expectation_met and proper_error_handling print(f" {'✅' if test_passed else '❌'} Result: success={success}, proper_error={proper_error_handling}") recovery_results.append({ "scenario": scenario["name"], "success": success, "test_passed": test_passed, "time": test_time, "has_error_info": has_error_info, "has_metadata": has_metadata, "error": result.get("error") if not success else None }) except Exception as e: print(f" ❌ Exception: {e}") recovery_results.append({ "scenario": scenario["name"], "success": False, "test_passed": False, "time": 0, "exception": str(e) }) passed_tests = [r for r in recovery_results if r["test_passed"]] return { "test": "error_recovery", "total_scenarios": len(error_scenarios), "passed_scenarios": len(passed_tests), "success_rate": len(passed_tests) / len(error_scenarios), "scenarios": recovery_results } async def test_data_consistency(self) -> Dict[str, Any]: """Test data consistency across multiple requests.""" print("\n🔍 Testing Data Consistency...") consistency_results = [] # Test same article multiple times test_article = "Python (programming language)" print(f" Testing consistency for: '{test_article}'") # Make multiple requests for the same article requests = [] for i in range(3): result = await self.server.fetch_wikipedia_info(test_article) requests.append(result) # Check consistency if all(r["success"] for r in requests): titles = [r["data"]["title"] for r in requests] urls = [r["data"]["url"] for r in requests] page_ids = [r["data"]["page_id"] for r in requests] title_consistent = len(set(titles)) == 1 url_consistent = len(set(urls)) == 1 page_id_consistent = len(set(page_ids)) == 1 consistency_score = sum([title_consistent, url_consistent, page_id_consistent]) / 3 print(f" ✅ Title consistent: {title_consistent}") print(f" ✅ URL consistent: {url_consistent}") print(f" ✅ Page ID consistent: {page_id_consistent}") consistency_results.append({ "article": test_article, "requests": len(requests), "all_successful": True, "consistency_score": consistency_score, "details": { "title_consistent": title_consistent, "url_consistent": url_consistent, "page_id_consistent": page_id_consistent } }) else: failed_requests = [i for i, r in enumerate(requests) if not r["success"]] print(f" ❌ {len(failed_requests)} requests failed") consistency_results.append({ "article": test_article, "requests": len(requests), "all_successful": False, "failed_requests": failed_requests, "consistency_score": 0 }) # Test section listing consistency print(f" Testing section listing consistency...") section_requests = [] for i in range(2): result = await self.server.list_wikipedia_sections(test_article) section_requests.append(result) if all(r["success"] for r in section_requests): section_lists = [r["data"]["sections"] for r in section_requests] section_titles = [[s["title"] for s in sections] for sections in section_lists] sections_consistent = all(titles == section_titles[0] for titles in section_titles) print(f" ✅ Sections consistent: {sections_consistent}") consistency_results.append({ "test": "section_listing", "article": test_article, "consistent": sections_consistent, "section_counts": [len(titles) for titles in section_titles] }) overall_consistency = all( r.get("consistency_score", 0) > 0.8 or r.get("consistent", False) for r in consistency_results ) return { "test": "data_consistency", "overall_consistent": overall_consistency, "results": consistency_results } async def test_performance_under_load(self) -> Dict[str, Any]: """Test performance characteristics under simulated load.""" print("\n⚡ Testing Performance Under Load...") # Test sequential vs concurrent performance test_queries = [ "Machine Learning", "Data Science", "Python programming", "Web Development", "Database Design" ] # Sequential execution print(" Testing sequential execution...") sequential_start = time.time() sequential_results = [] for query in test_queries: result = await self.server.fetch_wikipedia_info(query) sequential_results.append(result) sequential_time = time.time() - sequential_start sequential_success = sum(1 for r in sequential_results if r["success"]) print(f" Sequential: {sequential_time:.2f}s, {sequential_success}/{len(test_queries)} successful") # Concurrent execution print(" Testing concurrent execution...") concurrent_start = time.time() tasks = [self.server.fetch_wikipedia_info(query) for query in test_queries] concurrent_results = await asyncio.gather(*tasks, return_exceptions=True) concurrent_time = time.time() - concurrent_start concurrent_success = sum( 1 for r in concurrent_results if not isinstance(r, Exception) and r.get("success", False) ) print(f" Concurrent: {concurrent_time:.2f}s, {concurrent_success}/{len(test_queries)} successful") # Performance improvement ratio performance_ratio = sequential_time / concurrent_time if concurrent_time > 0 else 1 return { "test": "performance_under_load", "sequential_time": sequential_time, "concurrent_time": concurrent_time, "performance_ratio": performance_ratio, "sequential_success_rate": sequential_success / len(test_queries), "concurrent_success_rate": concurrent_success / len(test_queries), "performance_improvement": performance_ratio > 1.5 # At least 50% improvement } async def run_integration_tests(self) -> Dict[str, Any]: """Run all integration tests.""" print("🚀 Starting Integration Tests") print("="*40) all_results = {} try: # Test 1: Article research workflow workflow_result = await self.test_article_research_workflow() all_results["workflow"] = workflow_result # Test 2: Error recovery error_result = await self.test_error_recovery() all_results["error_recovery"] = error_result # Test 3: Data consistency consistency_result = await self.test_data_consistency() all_results["consistency"] = consistency_result # Test 4: Performance under load performance_result = await self.test_performance_under_load() all_results["performance"] = performance_result # Calculate overall success success_criteria = [ workflow_result["success_rate"] > 0.7, error_result["success_rate"] > 0.8, consistency_result["overall_consistent"], performance_result["performance_improvement"] ] all_results["overall_success"] = sum(success_criteria) >= 3 all_results["success_criteria_met"] = sum(success_criteria) all_results["total_criteria"] = len(success_criteria) except Exception as e: print(f"\n❌ Integration tests failed with exception: {e}") all_results["overall_success"] = False all_results["exception"] = str(e) return all_results def print_integration_summary(self, results: Dict[str, Any]): """Print summary of integration test results.""" print("\n" + "="*40) print("📊 INTEGRATION TESTS SUMMARY") print("="*40) if "workflow" in results: wf = results["workflow"] print(f"📚 Article Research Workflow:") print(f" Success Rate: {wf['success_rate']:.1%}") print(f" Average Time: {wf['average_time']:.2f}s") if "error_recovery" in results: er = results["error_recovery"] print(f"🚨 Error Recovery:") print(f" Scenarios Passed: {er['passed_scenarios']}/{er['total_scenarios']}") print(f" Success Rate: {er['success_rate']:.1%}") if "consistency" in results: cons = results["consistency"] print(f"🔍 Data Consistency:") print(f" Overall Consistent: {'✅' if cons['overall_consistent'] else '❌'}") if "performance" in results: perf = results["performance"] print(f"⚡ Performance Under Load:") print(f" Performance Ratio: {perf['performance_ratio']:.1f}x") print(f" Improvement: {'✅' if perf['performance_improvement'] else '❌'}") overall = results.get("overall_success", False) criteria_met = results.get("success_criteria_met", 0) total_criteria = results.get("total_criteria", 4) print(f"\n🎯 Overall Success: {'✅' if overall else '❌'}") print(f" Criteria Met: {criteria_met}/{total_criteria}") if overall: print("\n🎉 All integration tests passed successfully!") else: print("\n⚠️ Some integration tests need attention.") async def run_integration_tests(): """Main function to run integration tests.""" tester = IntegrationTester() results = await tester.run_integration_tests() tester.print_integration_summary(results) return results if __name__ == "__main__": # Run integration tests when script is executed directly asyncio.run(run_integration_tests())

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kaman05010/MCPClientServer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server