"""
Integration tests for MCP Wikipedia Server.
This module contains end-to-end integration tests that verify the complete
functionality of the Wikipedia server in realistic usage scenarios.
"""
import asyncio
import json
import time
import sys
import os
from typing import Dict, Any, List
# Add src to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
try:
from mcp_server.mcp_server import WikipediaServer
from mcp_server.mcp_client import WikipediaClient
except ImportError:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src', 'mcp_server'))
from mcp_server import WikipediaServer
try:
from mcp_client import WikipediaClient
except ImportError:
WikipediaClient = None
class IntegrationTester:
"""Integration testing utilities for the Wikipedia server."""
def __init__(self):
self.server = WikipediaServer()
self.test_results = []
self.test_data = {
"articles": [
"Python (programming language)",
"Machine Learning",
"Artificial Intelligence",
"JavaScript",
"Data Science"
],
"search_queries": [
"Python programming",
"machine learning algorithms",
"AI ethics",
"web development JavaScript",
"data analysis techniques"
],
"known_sections": {
"Python (programming language)": ["History", "Features", "Syntax", "Libraries"],
"Machine Learning": ["Overview", "History", "Types", "Applications"],
"Artificial Intelligence": ["History", "Goals", "Approaches", "Applications"]
}
}
async def test_article_research_workflow(self) -> Dict[str, Any]:
"""Test complete article research workflow."""
print("\nπ Testing Complete Article Research Workflow...")
workflow_results = []
for query in self.test_data["search_queries"][:3]: # Test first 3 queries
print(f"\n Researching: '{query}'")
workflow_start = time.time()
workflow_success = True
workflow_steps = []
# Step 1: Search for article
print(" Step 1: Searching for article...")
search_start = time.time()
search_result = await self.server.fetch_wikipedia_info(query)
search_time = time.time() - search_start
if search_result["success"]:
article_title = search_result["data"]["title"]
print(f" β
Found article: '{article_title}' ({search_time:.2f}s)")
workflow_steps.append({
"step": "search",
"success": True,
"time": search_time,
"result": article_title
})
# Step 2: Get article sections
print(" Step 2: Listing article sections...")
sections_start = time.time()
sections_result = await self.server.list_wikipedia_sections(article_title)
sections_time = time.time() - sections_start
if sections_result["success"]:
sections = sections_result["data"]["sections"]
print(f" β
Found {len(sections)} sections ({sections_time:.2f}s)")
workflow_steps.append({
"step": "sections",
"success": True,
"time": sections_time,
"result": len(sections)
})
# Step 3: Get content from first few sections
print(" Step 3: Retrieving section content...")
content_results = []
for section in sections[:2]: # Get first 2 sections
content_start = time.time()
content_result = await self.server.get_section_content(
article_title, section["title"]
)
content_time = time.time() - content_start
if content_result["success"]:
content_length = len(content_result["data"]["content"])
print(f" β
'{section['title']}': {content_length} chars ({content_time:.2f}s)")
content_results.append({
"section": section["title"],
"success": True,
"time": content_time,
"content_length": content_length
})
else:
print(f" β '{section['title']}': {content_result['error']} ({content_time:.2f}s)")
content_results.append({
"section": section["title"],
"success": False,
"time": content_time,
"error": content_result["error"]
})
workflow_steps.append({
"step": "content",
"success": len(content_results) > 0,
"time": sum(r["time"] for r in content_results),
"result": content_results
})
else:
print(f" β Failed to get sections: {sections_result['error']}")
workflow_success = False
workflow_steps.append({
"step": "sections",
"success": False,
"time": sections_time,
"error": sections_result["error"]
})
else:
print(f" β Search failed: {search_result['error']}")
workflow_success = False
workflow_steps.append({
"step": "search",
"success": False,
"time": search_time,
"error": search_result["error"]
})
workflow_total_time = time.time() - workflow_start
workflow_results.append({
"query": query,
"success": workflow_success,
"total_time": workflow_total_time,
"steps": workflow_steps
})
# Calculate summary statistics
successful_workflows = [r for r in workflow_results if r["success"]]
success_rate = len(successful_workflows) / len(workflow_results)
avg_time = sum(r["total_time"] for r in successful_workflows) / len(successful_workflows) if successful_workflows else 0
return {
"test": "article_research_workflow",
"total_workflows": len(workflow_results),
"successful_workflows": len(successful_workflows),
"success_rate": success_rate,
"average_time": avg_time,
"workflows": workflow_results
}
async def test_error_recovery(self) -> Dict[str, Any]:
"""Test error handling and recovery scenarios."""
print("\nπ¨ Testing Error Recovery Scenarios...")
error_scenarios = [
{
"name": "Non-existent article",
"test": lambda: self.server.fetch_wikipedia_info("ThisArticleDoesNotExist12345"),
"expect_success": False
},
{
"name": "Empty search query",
"test": lambda: self.server.fetch_wikipedia_info(""),
"expect_success": False
},
{
"name": "Non-existent section",
"test": lambda: self.server.get_section_content("Python", "NonExistentSection"),
"expect_success": False
},
{
"name": "Sections from non-existent article",
"test": lambda: self.server.list_wikipedia_sections("NonExistentArticle12345"),
"expect_success": False
},
{
"name": "Very long query",
"test": lambda: self.server.fetch_wikipedia_info("x" * 1000),
"expect_success": None # Could succeed or fail gracefully
}
]
recovery_results = []
for scenario in error_scenarios:
print(f" Testing: {scenario['name']}")
try:
start_time = time.time()
result = await scenario["test"]()
test_time = time.time() - start_time
success = result.get("success", False)
has_error_info = "error" in result
has_metadata = "metadata" in result
# Check if result meets expectations
expectation_met = True
if scenario["expect_success"] is not None:
expectation_met = (success == scenario["expect_success"])
# For failed requests, ensure proper error handling
proper_error_handling = True
if not success:
proper_error_handling = has_error_info and has_metadata
test_passed = expectation_met and proper_error_handling
print(f" {'β
' if test_passed else 'β'} Result: success={success}, proper_error={proper_error_handling}")
recovery_results.append({
"scenario": scenario["name"],
"success": success,
"test_passed": test_passed,
"time": test_time,
"has_error_info": has_error_info,
"has_metadata": has_metadata,
"error": result.get("error") if not success else None
})
except Exception as e:
print(f" β Exception: {e}")
recovery_results.append({
"scenario": scenario["name"],
"success": False,
"test_passed": False,
"time": 0,
"exception": str(e)
})
passed_tests = [r for r in recovery_results if r["test_passed"]]
return {
"test": "error_recovery",
"total_scenarios": len(error_scenarios),
"passed_scenarios": len(passed_tests),
"success_rate": len(passed_tests) / len(error_scenarios),
"scenarios": recovery_results
}
async def test_data_consistency(self) -> Dict[str, Any]:
"""Test data consistency across multiple requests."""
print("\nπ Testing Data Consistency...")
consistency_results = []
# Test same article multiple times
test_article = "Python (programming language)"
print(f" Testing consistency for: '{test_article}'")
# Make multiple requests for the same article
requests = []
for i in range(3):
result = await self.server.fetch_wikipedia_info(test_article)
requests.append(result)
# Check consistency
if all(r["success"] for r in requests):
titles = [r["data"]["title"] for r in requests]
urls = [r["data"]["url"] for r in requests]
page_ids = [r["data"]["page_id"] for r in requests]
title_consistent = len(set(titles)) == 1
url_consistent = len(set(urls)) == 1
page_id_consistent = len(set(page_ids)) == 1
consistency_score = sum([title_consistent, url_consistent, page_id_consistent]) / 3
print(f" β
Title consistent: {title_consistent}")
print(f" β
URL consistent: {url_consistent}")
print(f" β
Page ID consistent: {page_id_consistent}")
consistency_results.append({
"article": test_article,
"requests": len(requests),
"all_successful": True,
"consistency_score": consistency_score,
"details": {
"title_consistent": title_consistent,
"url_consistent": url_consistent,
"page_id_consistent": page_id_consistent
}
})
else:
failed_requests = [i for i, r in enumerate(requests) if not r["success"]]
print(f" β {len(failed_requests)} requests failed")
consistency_results.append({
"article": test_article,
"requests": len(requests),
"all_successful": False,
"failed_requests": failed_requests,
"consistency_score": 0
})
# Test section listing consistency
print(f" Testing section listing consistency...")
section_requests = []
for i in range(2):
result = await self.server.list_wikipedia_sections(test_article)
section_requests.append(result)
if all(r["success"] for r in section_requests):
section_lists = [r["data"]["sections"] for r in section_requests]
section_titles = [[s["title"] for s in sections] for sections in section_lists]
sections_consistent = all(titles == section_titles[0] for titles in section_titles)
print(f" β
Sections consistent: {sections_consistent}")
consistency_results.append({
"test": "section_listing",
"article": test_article,
"consistent": sections_consistent,
"section_counts": [len(titles) for titles in section_titles]
})
overall_consistency = all(
r.get("consistency_score", 0) > 0.8 or r.get("consistent", False)
for r in consistency_results
)
return {
"test": "data_consistency",
"overall_consistent": overall_consistency,
"results": consistency_results
}
async def test_performance_under_load(self) -> Dict[str, Any]:
"""Test performance characteristics under simulated load."""
print("\nβ‘ Testing Performance Under Load...")
# Test sequential vs concurrent performance
test_queries = [
"Machine Learning",
"Data Science",
"Python programming",
"Web Development",
"Database Design"
]
# Sequential execution
print(" Testing sequential execution...")
sequential_start = time.time()
sequential_results = []
for query in test_queries:
result = await self.server.fetch_wikipedia_info(query)
sequential_results.append(result)
sequential_time = time.time() - sequential_start
sequential_success = sum(1 for r in sequential_results if r["success"])
print(f" Sequential: {sequential_time:.2f}s, {sequential_success}/{len(test_queries)} successful")
# Concurrent execution
print(" Testing concurrent execution...")
concurrent_start = time.time()
tasks = [self.server.fetch_wikipedia_info(query) for query in test_queries]
concurrent_results = await asyncio.gather(*tasks, return_exceptions=True)
concurrent_time = time.time() - concurrent_start
concurrent_success = sum(
1 for r in concurrent_results
if not isinstance(r, Exception) and r.get("success", False)
)
print(f" Concurrent: {concurrent_time:.2f}s, {concurrent_success}/{len(test_queries)} successful")
# Performance improvement ratio
performance_ratio = sequential_time / concurrent_time if concurrent_time > 0 else 1
return {
"test": "performance_under_load",
"sequential_time": sequential_time,
"concurrent_time": concurrent_time,
"performance_ratio": performance_ratio,
"sequential_success_rate": sequential_success / len(test_queries),
"concurrent_success_rate": concurrent_success / len(test_queries),
"performance_improvement": performance_ratio > 1.5 # At least 50% improvement
}
async def run_integration_tests(self) -> Dict[str, Any]:
"""Run all integration tests."""
print("π Starting Integration Tests")
print("="*40)
all_results = {}
try:
# Test 1: Article research workflow
workflow_result = await self.test_article_research_workflow()
all_results["workflow"] = workflow_result
# Test 2: Error recovery
error_result = await self.test_error_recovery()
all_results["error_recovery"] = error_result
# Test 3: Data consistency
consistency_result = await self.test_data_consistency()
all_results["consistency"] = consistency_result
# Test 4: Performance under load
performance_result = await self.test_performance_under_load()
all_results["performance"] = performance_result
# Calculate overall success
success_criteria = [
workflow_result["success_rate"] > 0.7,
error_result["success_rate"] > 0.8,
consistency_result["overall_consistent"],
performance_result["performance_improvement"]
]
all_results["overall_success"] = sum(success_criteria) >= 3
all_results["success_criteria_met"] = sum(success_criteria)
all_results["total_criteria"] = len(success_criteria)
except Exception as e:
print(f"\nβ Integration tests failed with exception: {e}")
all_results["overall_success"] = False
all_results["exception"] = str(e)
return all_results
def print_integration_summary(self, results: Dict[str, Any]):
"""Print summary of integration test results."""
print("\n" + "="*40)
print("π INTEGRATION TESTS SUMMARY")
print("="*40)
if "workflow" in results:
wf = results["workflow"]
print(f"π Article Research Workflow:")
print(f" Success Rate: {wf['success_rate']:.1%}")
print(f" Average Time: {wf['average_time']:.2f}s")
if "error_recovery" in results:
er = results["error_recovery"]
print(f"π¨ Error Recovery:")
print(f" Scenarios Passed: {er['passed_scenarios']}/{er['total_scenarios']}")
print(f" Success Rate: {er['success_rate']:.1%}")
if "consistency" in results:
cons = results["consistency"]
print(f"π Data Consistency:")
print(f" Overall Consistent: {'β
' if cons['overall_consistent'] else 'β'}")
if "performance" in results:
perf = results["performance"]
print(f"β‘ Performance Under Load:")
print(f" Performance Ratio: {perf['performance_ratio']:.1f}x")
print(f" Improvement: {'β
' if perf['performance_improvement'] else 'β'}")
overall = results.get("overall_success", False)
criteria_met = results.get("success_criteria_met", 0)
total_criteria = results.get("total_criteria", 4)
print(f"\nπ― Overall Success: {'β
' if overall else 'β'}")
print(f" Criteria Met: {criteria_met}/{total_criteria}")
if overall:
print("\nπ All integration tests passed successfully!")
else:
print("\nβ οΈ Some integration tests need attention.")
async def run_integration_tests():
"""Main function to run integration tests."""
tester = IntegrationTester()
results = await tester.run_integration_tests()
tester.print_integration_summary(results)
return results
if __name__ == "__main__":
# Run integration tests when script is executed directly
asyncio.run(run_integration_tests())