Skip to main content
Glama

OpenEdu MCP Server

test_performance.pyโ€ข32.8 kB
""" Performance Testing for Education MCP Server This module provides comprehensive performance benchmarking and testing for the Education MCP Server, including response times, caching effectiveness, concurrent request handling, and educational filtering performance. """ import asyncio import pytest import time import statistics import sys from pathlib import Path from typing import Dict, Any, List, Tuple from unittest.mock import AsyncMock, patch import concurrent.futures from dataclasses import dataclass # Add src to path for imports sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from main import ( initialize_services, cleanup_services, search_educational_books, get_book_details_by_isbn, search_books_by_subject, search_educational_articles, get_article_summary, get_word_definition, search_academic_papers, get_server_status ) @dataclass class PerformanceMetrics: """Container for performance metrics.""" operation: str response_times: List[float] success_count: int error_count: int cache_hits: int cache_misses: int @property def average_response_time(self) -> float: return statistics.mean(self.response_times) if self.response_times else 0.0 @property def median_response_time(self) -> float: return statistics.median(self.response_times) if self.response_times else 0.0 @property def p95_response_time(self) -> float: if not self.response_times: return 0.0 sorted_times = sorted(self.response_times) index = int(0.95 * len(sorted_times)) return sorted_times[index] @property def success_rate(self) -> float: total = self.success_count + self.error_count return self.success_count / total if total > 0 else 0.0 @property def cache_hit_rate(self) -> float: total = self.cache_hits + self.cache_misses return self.cache_hits / total if total > 0 else 0.0 class MockContext: """Mock context for performance testing.""" def __init__(self, session_id: str = "perf_test_session"): self.session_id = session_id class PerformanceTester: """Performance testing utility class.""" def __init__(self): self.metrics: Dict[str, PerformanceMetrics] = {} async def measure_operation( self, operation_name: str, operation_func, *args, **kwargs ) -> Tuple[Any, float]: """Measure the performance of a single operation.""" start_time = time.time() try: result = await operation_func(*args, **kwargs) end_time = time.time() response_time = end_time - start_time # Initialize metrics if not exists if operation_name not in self.metrics: self.metrics[operation_name] = PerformanceMetrics( operation=operation_name, response_times=[], success_count=0, error_count=0, cache_hits=0, cache_misses=0 ) # Update metrics self.metrics[operation_name].response_times.append(response_time) self.metrics[operation_name].success_count += 1 return result, response_time except Exception as e: end_time = time.time() response_time = end_time - start_time if operation_name not in self.metrics: self.metrics[operation_name] = PerformanceMetrics( operation=operation_name, response_times=[], success_count=0, error_count=0, cache_hits=0, cache_misses=0 ) self.metrics[operation_name].response_times.append(response_time) self.metrics[operation_name].error_count += 1 raise e async def run_concurrent_operations( self, operation_name: str, operation_func, args_list: List[Tuple], max_workers: int = 10 ) -> List[Tuple[Any, float]]: """Run multiple operations concurrently.""" tasks = [] for args in args_list: task = self.measure_operation(operation_name, operation_func, *args) tasks.append(task) # Limit concurrency semaphore = asyncio.Semaphore(max_workers) async def limited_task(task): async with semaphore: return await task limited_tasks = [limited_task(task) for task in tasks] results = await asyncio.gather(*limited_tasks, return_exceptions=True) # Filter out exceptions successful_results = [r for r in results if not isinstance(r, Exception)] return successful_results def get_performance_report(self) -> Dict[str, Any]: """Generate a comprehensive performance report.""" report = { "summary": { "total_operations": len(self.metrics), "total_requests": sum(m.success_count + m.error_count for m in self.metrics.values()), "total_successes": sum(m.success_count for m in self.metrics.values()), "total_errors": sum(m.error_count for m in self.metrics.values()), "overall_success_rate": 0.0, "average_response_time": 0.0 }, "operations": {} } # Calculate overall metrics total_requests = report["summary"]["total_requests"] if total_requests > 0: report["summary"]["overall_success_rate"] = ( report["summary"]["total_successes"] / total_requests ) all_response_times = [] for metrics in self.metrics.values(): all_response_times.extend(metrics.response_times) if all_response_times: report["summary"]["average_response_time"] = statistics.mean(all_response_times) # Add per-operation metrics for operation_name, metrics in self.metrics.items(): report["operations"][operation_name] = { "success_count": metrics.success_count, "error_count": metrics.error_count, "success_rate": metrics.success_rate, "average_response_time": metrics.average_response_time, "median_response_time": metrics.median_response_time, "p95_response_time": metrics.p95_response_time, "cache_hit_rate": metrics.cache_hit_rate, "total_requests": metrics.success_count + metrics.error_count } return report @pytest.fixture async def performance_tester(): """Set up performance testing environment.""" # Mock external dependencies for consistent testing with patch('aiohttp.ClientSession') as mock_session: mock_response = AsyncMock() mock_response.status = 200 mock_response.json = AsyncMock() mock_response.text = AsyncMock() mock_session.return_value.__aenter__.return_value.get.return_value.__aenter__.return_value = mock_response mock_session.return_value.__aenter__.return_value.post.return_value.__aenter__.return_value = mock_response # Initialize services await initialize_services() tester = PerformanceTester() yield tester # Cleanup await cleanup_services() class TestResponseTimePerformance: """Test response time performance across all APIs.""" @pytest.mark.asyncio async def test_book_search_response_times(self, performance_tester): """Test book search response times.""" ctx = MockContext() # Mock book search responses with patch('src.api.openlibrary.OpenLibraryAPI.search_books') as mock_search: mock_search.return_value = [{ "title": "Test Book", "author": "Test Author", "grade_level": "6-8", "educational_value": 0.9 }] # Test multiple searches search_queries = [ ("mathematics", "Mathematics", "6-8"), ("science", "Science", "3-5"), ("history", "Social Studies", "9-12"), ("literature", "English Language Arts", "K-2"), ("physics", "Science", "College") ] for query, subject, grade_level in search_queries: result, response_time = await performance_tester.measure_operation( "search_educational_books", search_educational_books, ctx, query=query, subject=subject, grade_level=grade_level ) # Verify response time is reasonable assert response_time < 2.0, f"Response time {response_time}s too slow for book search" assert len(result) > 0, "Should return results" @pytest.mark.asyncio async def test_article_search_response_times(self, performance_tester): """Test article search response times.""" ctx = MockContext() with patch('src.api.wikipedia.WikipediaAPI.search_articles') as mock_search: mock_search.return_value = [{ "title": "Test Article", "summary": "Test summary", "grade_level": "6-8", "educational_value": 0.85 }] # Test multiple article searches search_queries = [ ("photosynthesis", "Science", "3-5"), ("democracy", "Social Studies", "9-12"), ("algebra", "Mathematics", "6-8"), ("shakespeare", "English Language Arts", "9-12"), ("quantum mechanics", "Science", "College") ] for query, subject, grade_level in search_queries: result, response_time = await performance_tester.measure_operation( "search_educational_articles", search_educational_articles, ctx, query=query, subject=subject, grade_level=grade_level ) assert response_time < 2.0, f"Response time {response_time}s too slow for article search" assert len(result) > 0, "Should return results" @pytest.mark.asyncio async def test_definition_response_times(self, performance_tester): """Test word definition response times.""" ctx = MockContext() with patch('src.api.dictionary.DictionaryAPI.get_definition') as mock_definition: mock_definition.return_value = { "word": "test", "definition": "A test definition", "grade_level": "6-8", "complexity_score": 0.6 } # Test multiple word definitions words = [ ("ecosystem", "6-8"), ("democracy", "9-12"), ("fraction", "3-5"), ("photosynthesis", "6-8"), ("algorithm", "College") ] for word, grade_level in words: result, response_time = await performance_tester.measure_operation( "get_word_definition", get_word_definition, ctx, word=word, grade_level=grade_level ) assert response_time < 1.5, f"Response time {response_time}s too slow for definition" assert result["word"] == word, "Should return correct word" @pytest.mark.asyncio async def test_research_paper_response_times(self, performance_tester): """Test research paper search response times.""" ctx = MockContext() with patch('src.api.arxiv.ArxivAPI.search_papers') as mock_search: mock_search.return_value = [{ "title": "Test Paper", "abstract": "Test abstract", "academic_level": "Graduate", "educational_relevance": 0.8 }] # Test multiple paper searches search_queries = [ ("machine learning", "Computer Science", "Graduate"), ("climate change", "Environmental Science", "Undergraduate"), ("quantum computing", "Physics", "Graduate"), ("education technology", "Education", "Research"), ("data science", "Mathematics", "Undergraduate") ] for query, subject, academic_level in search_queries: result, response_time = await performance_tester.measure_operation( "search_academic_papers", search_academic_papers, ctx, query=query, subject=subject, academic_level=academic_level ) assert response_time < 3.0, f"Response time {response_time}s too slow for paper search" assert len(result) > 0, "Should return results" class TestCachingPerformance: """Test caching effectiveness and performance.""" @pytest.mark.asyncio async def test_cache_hit_performance(self, performance_tester): """Test that cached requests are significantly faster.""" ctx = MockContext() with patch('src.api.openlibrary.OpenLibraryAPI.search_books') as mock_search: mock_search.return_value = [{ "title": "Cached Book", "author": "Cache Author", "grade_level": "6-8" }] query = "mathematics" subject = "Mathematics" grade_level = "6-8" # First request (cache miss) result1, time1 = await performance_tester.measure_operation( "search_books_cache_miss", search_educational_books, ctx, query=query, subject=subject, grade_level=grade_level ) # Second request (should be cache hit) result2, time2 = await performance_tester.measure_operation( "search_books_cache_hit", search_educational_books, ctx, query=query, subject=subject, grade_level=grade_level ) # Verify cache effectiveness assert result1 == result2, "Cached results should be identical" # Cache hit should be faster (allowing some variance for test environment) if time2 > 0: # Avoid division by zero in fast test environments speedup = time1 / time2 assert speedup > 1.0, f"Cache hit should be faster (speedup: {speedup:.2f}x)" # API should only be called once (first request) assert mock_search.call_count == 1, "API should only be called once due to caching" @pytest.mark.asyncio async def test_cache_hit_rate_target(self, performance_tester): """Test that cache hit rate meets target threshold.""" ctx = MockContext() with patch('src.api.wikipedia.WikipediaAPI.search_articles') as mock_search: mock_search.return_value = [{ "title": "Test Article", "summary": "Test summary" }] # Perform repeated searches with some variation queries = [ "photosynthesis", # Will be repeated "ecosystem", # Will be repeated "democracy", # Will be repeated "photosynthesis", # Cache hit "ecosystem", # Cache hit "democracy", # Cache hit "photosynthesis", # Cache hit "new_query_1", # Cache miss "new_query_2", # Cache miss ] for query in queries: await performance_tester.measure_operation( "article_search_cache_test", search_educational_articles, ctx, query=query, grade_level="6-8" ) # Calculate expected cache hit rate # 3 unique queries initially, then 4 cache hits, then 2 new queries # Total: 9 requests, 4 cache hits = 44% hit rate minimum # In a real scenario with proper caching, we'd expect higher hit rates # For this test, we verify the caching mechanism is working assert mock_search.call_count <= len(set(queries)), "Should not exceed unique query count" class TestConcurrentRequestHandling: """Test concurrent request handling performance.""" @pytest.mark.asyncio async def test_concurrent_book_searches(self, performance_tester): """Test handling multiple concurrent book searches.""" ctx = MockContext() with patch('src.api.openlibrary.OpenLibraryAPI.search_books') as mock_search: mock_search.return_value = [{ "title": "Concurrent Book", "author": "Concurrent Author" }] # Prepare concurrent requests concurrent_requests = [ (ctx, "math", "Mathematics", "6-8"), (ctx, "science", "Science", "3-5"), (ctx, "history", "Social Studies", "9-12"), (ctx, "english", "English Language Arts", "K-2"), (ctx, "physics", "Science", "College"), (ctx, "chemistry", "Science", "9-12"), (ctx, "biology", "Science", "6-8"), (ctx, "algebra", "Mathematics", "9-12"), (ctx, "geometry", "Mathematics", "6-8"), (ctx, "literature", "English Language Arts", "9-12") ] # Execute concurrent requests start_time = time.time() results = await performance_tester.run_concurrent_operations( "concurrent_book_search", lambda ctx, query, subject, grade_level: search_educational_books( ctx, query=query, subject=subject, grade_level=grade_level ), [(args[0], args[1], args[2], args[3]) for args in concurrent_requests], max_workers=5 ) end_time = time.time() total_time = end_time - start_time # Verify all requests completed assert len(results) == len(concurrent_requests), "All requests should complete" # Verify reasonable total time (should be much less than sequential) sequential_estimate = len(concurrent_requests) * 0.5 # Assume 0.5s per request assert total_time < sequential_estimate, f"Concurrent execution should be faster than sequential" # Verify all results are valid for result, response_time in results: assert len(result) > 0, "Each request should return results" assert response_time < 5.0, "Individual response time should be reasonable" @pytest.mark.asyncio async def test_concurrent_mixed_operations(self, performance_tester): """Test handling mixed concurrent operations across all APIs.""" ctx = MockContext() # Mock all APIs with patch('src.api.openlibrary.OpenLibraryAPI.search_books') as mock_books, \ patch('src.api.wikipedia.WikipediaAPI.search_articles') as mock_articles, \ patch('src.api.dictionary.DictionaryAPI.get_definition') as mock_definition, \ patch('src.api.arxiv.ArxivAPI.search_papers') as mock_papers: mock_books.return_value = [{"title": "Test Book"}] mock_articles.return_value = [{"title": "Test Article"}] mock_definition.return_value = {"word": "test", "definition": "test def"} mock_papers.return_value = [{"title": "Test Paper"}] # Prepare mixed operations operations = [ ("book_search", lambda: search_educational_books(ctx, "math", grade_level="6-8")), ("article_search", lambda: search_educational_articles(ctx, "science", grade_level="6-8")), ("definition", lambda: get_word_definition(ctx, "ecosystem", grade_level="6-8")), ("paper_search", lambda: search_academic_papers(ctx, "AI", academic_level="Graduate")), ("book_search", lambda: search_educational_books(ctx, "science", grade_level="3-5")), ("article_search", lambda: search_educational_articles(ctx, "math", grade_level="9-12")), ("definition", lambda: get_word_definition(ctx, "democracy", grade_level="9-12")), ("paper_search", lambda: search_academic_papers(ctx, "physics", academic_level="Undergraduate")) ] # Execute mixed operations concurrently tasks = [] for op_name, op_func in operations: task = performance_tester.measure_operation(f"mixed_{op_name}", op_func) tasks.append(task) start_time = time.time() results = await asyncio.gather(*tasks, return_exceptions=True) end_time = time.time() total_time = end_time - start_time # Verify all operations completed successfully successful_results = [r for r in results if not isinstance(r, Exception)] assert len(successful_results) == len(operations), "All operations should succeed" # Verify reasonable performance assert total_time < 10.0, "Mixed operations should complete within reasonable time" class TestEducationalFilteringPerformance: """Test performance of educational filtering features.""" @pytest.mark.asyncio async def test_grade_level_filtering_performance(self, performance_tester): """Test performance of grade level filtering.""" ctx = MockContext() with patch('src.api.openlibrary.OpenLibraryAPI.search_books') as mock_search: # Mock different responses for different grade levels def mock_grade_response(*args, **kwargs): grade_level = kwargs.get('grade_level', 'K-2') return [{ "title": f"Book for {grade_level}", "grade_level": grade_level, "educational_value": 0.9, "complexity_score": 0.3 if grade_level == "K-2" else 0.8 }] mock_search.side_effect = mock_grade_response # Test filtering across all grade levels grade_levels = ["K-2", "3-5", "6-8", "9-12", "College"] for grade_level in grade_levels: result, response_time = await performance_tester.measure_operation( f"grade_filtering_{grade_level}", search_educational_books, ctx, query="mathematics", grade_level=grade_level ) # Verify filtering performance assert response_time < 1.0, f"Grade filtering should be fast for {grade_level}" assert result[0]["grade_level"] == grade_level, "Should return correct grade level" # Verify educational filtering is applied assert result[0]["educational_value"] >= 0.7, "Should meet educational value threshold" @pytest.mark.asyncio async def test_subject_classification_performance(self, performance_tester): """Test performance of subject classification.""" ctx = MockContext() with patch('src.api.wikipedia.WikipediaAPI.search_articles') as mock_search: def mock_subject_response(*args, **kwargs): subject = kwargs.get('subject', 'Science') return [{ "title": f"Article about {subject}", "subject": subject, "educational_value": 0.85, "grade_level": "6-8" }] mock_search.side_effect = mock_subject_response # Test classification across subjects subjects = ["Mathematics", "Science", "English Language Arts", "Social Studies"] for subject in subjects: result, response_time = await performance_tester.measure_operation( f"subject_classification_{subject.replace(' ', '_')}", search_educational_articles, ctx, query="test", subject=subject, grade_level="6-8" ) # Verify classification performance assert response_time < 1.0, f"Subject classification should be fast for {subject}" assert result[0]["subject"] == subject, "Should return correct subject" @pytest.mark.asyncio async def test_complexity_scoring_performance(self, performance_tester): """Test performance of complexity scoring.""" ctx = MockContext() with patch('src.api.dictionary.DictionaryAPI.get_definition') as mock_definition: def mock_complexity_response(*args, **kwargs): word = kwargs.get('word', 'test') grade_level = kwargs.get('grade_level', '6-8') # Simulate complexity scoring based on word and grade level complexity_scores = { "K-2": 0.2, "3-5": 0.4, "6-8": 0.6, "9-12": 0.8, "College": 0.9 } return { "word": word, "definition": f"Definition for {word}", "grade_level": grade_level, "complexity_score": complexity_scores.get(grade_level, 0.6) } mock_definition.side_effect = mock_complexity_response # Test complexity scoring for different words and grade levels test_cases = [ ("cat", "K-2"), ("ecosystem", "3-5"), ("photosynthesis", "6-8"), ("democracy", "9-12"), ("quantum", "College") ] for word, grade_level in test_cases: result, response_time = await performance_tester.measure_operation( f"complexity_scoring_{word}_{grade_level}", get_word_definition, ctx, word=word, grade_level=grade_level ) # Verify complexity scoring performance assert response_time < 0.8, f"Complexity scoring should be fast for {word}" assert 0.0 <= result["complexity_score"] <= 1.0, "Complexity score should be valid" class TestServerStatusPerformance: """Test server status and monitoring performance.""" @pytest.mark.asyncio async def test_server_status_response_time(self, performance_tester): """Test server status endpoint performance.""" ctx = MockContext() # Test multiple status requests for i in range(5): result, response_time = await performance_tester.measure_operation( "server_status", get_server_status, ctx ) # Status should be very fast assert response_time < 0.5, f"Server status should be fast (got {response_time}s)" assert result["status"] == "healthy", "Server should be healthy" assert "cache" in result, "Should include cache statistics" assert "usage" in result, "Should include usage statistics" @pytest.mark.asyncio async def test_performance_benchmarks(performance_tester): """Run comprehensive performance benchmarks.""" print("\n" + "="*60) print("EDUCATION MCP SERVER - PERFORMANCE BENCHMARKS") print("="*60) # Run all performance tests ctx = MockContext() # Mock all APIs for consistent testing with patch('src.api.openlibrary.OpenLibraryAPI.search_books') as mock_books, \ patch('src.api.wikipedia.WikipediaAPI.search_articles') as mock_articles, \ patch('src.api.dictionary.DictionaryAPI.get_definition') as mock_definition, \ patch('src.api.arxiv.ArxivAPI.search_papers') as mock_papers: # Configure mock responses mock_books.return_value = [{"title": "Test Book", "educational_value": 0.9}] mock_articles.return_value = [{"title": "Test Article", "educational_value": 0.85}] mock_definition.return_value = {"word": "test", "definition": "test", "complexity_score": 0.6} mock_papers.return_value = [{"title": "Test Paper", "educational_relevance": 0.8}] # Benchmark individual operations operations = [ ("Book Search", lambda: search_educational_books(ctx, "math", grade_level="6-8")), ("Article Search", lambda: search_educational_articles(ctx, "science", grade_level="6-8")), ("Word Definition", lambda: get_word_definition(ctx, "ecosystem", grade_level="6-8")), ("Paper Search", lambda: search_academic_papers(ctx, "AI", academic_level="Graduate")), ("Server Status", lambda: get_server_status(ctx)) ] print("\nIndividual Operation Benchmarks:") print("-" * 40) for op_name, op_func in operations: # Run operation multiple times times = [] for _ in range(10): _, response_time = await performance_tester.measure_operation( f"benchmark_{op_name.replace(' ', '_').lower()}", op_func ) times.append(response_time) avg_time = statistics.mean(times) min_time = min(times) max_time = max(times) p95_time = sorted(times)[int(0.95 * len(times))] print(f"{op_name:15} | Avg: {avg_time:.3f}s | Min: {min_time:.3f}s | Max: {max_time:.3f}s | P95: {p95_time:.3f}s") # Generate final performance report report = performance_tester.get_performance_report() print("\nPerformance Summary:") print("-" * 40) print(f"Total Operations: {report['summary']['total_operations']}") print(f"Total Requests: {report['summary']['total_requests']}") print(f"Success Rate: {report['summary']['overall_success_rate']:.2%}") print(f"Average Response Time: {report['summary']['average_response_time']:.3f}s") print("\nPerformance Targets:") print("-" * 40) print("โœ“ Response times under 500ms for cached requests") print("โœ“ Response times under 2000ms for uncached requests") print("โœ“ Successful handling of 10+ concurrent requests") print("โœ“ Educational filtering processing under 100ms") # Verify performance targets assert report['summary']['overall_success_rate'] >= 0.95, "Success rate should be >= 95%" assert report['summary']['average_response_time'] <= 2.0, "Average response time should be <= 2s" print("\nโœ… All performance benchmarks passed!") if __name__ == "__main__": # Run performance tests pytest.main([__file__, "-v", "-s"])

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Cicatriiz/openedu-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server