Skip to main content
Glama

Prompt Auto-Optimizer MCP

by sloth-wq
gepa-e2e.test.tsβ€’22.9 kB
/** * GEPA End-to-End Integration Tests * * Comprehensive E2E test suite covering: * - Complete evolution workflows * - MCP tool integrations * - Performance and load testing * - Memory system validation * - Concurrent operations * - Error handling and recovery */ import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'vitest'; import { E2ETestRunner, type E2ETestConfig, type E2ETestResults } from './e2e-test-runner'; // Global test configuration const testConfig: Partial<E2ETestConfig> = { maxConcurrentTests: 3, defaultTimeout: 60000, // 60 seconds for E2E tests performanceThresholds: { evolutionTime: 30000, // 30 seconds trajectoryRecording: 1000, // 1 second paretoFrontierQuery: 500, // 500ms memoryOperations: 100, // 100ms }, retryOptions: { maxRetries: 3, baseDelay: 1000, }, }; // Global test runner instance let testRunner: E2ETestRunner; let testResults: E2ETestResults; /** * Test suite setup and configuration */ describe('GEPA End-to-End Integration Tests', () => { beforeAll(async () => { // eslint-disable-next-line no-console console.log('πŸš€ Initializing GEPA E2E test environment...'); testRunner = new E2ETestRunner(testConfig); await testRunner.initialize(); // eslint-disable-next-line no-console console.log('βœ… E2E test environment ready'); }, 120000); // 2 minute timeout for setup afterAll(async () => { // eslint-disable-next-line no-console console.log('🧹 Cleaning up E2E test environment...'); if (testRunner) { await testRunner.cleanup(); } // eslint-disable-next-line no-console console.log('βœ… E2E test environment cleaned up'); }, 30000); // 30 second timeout for cleanup beforeEach(() => { // Reset any test-specific state before each test // eslint-disable-next-line no-console console.log(`πŸ”„ Preparing for test: ${expect.getState().currentTestName || 'unknown'}`); }); /** * Core Workflow Tests */ describe('Core Workflow Integration', () => { test('Complete Evolution Cycle - Start to Finish', async () => { // eslint-disable-next-line no-console console.log('🧬 Testing complete evolution cycle...'); // This test verifies the entire evolution workflow: // 1. Initialize evolution with seed prompt // 2. Generate candidate mutations // 3. Evaluate candidates across multiple tasks // 4. Record execution trajectories // 5. Update Pareto frontier with results // 6. Select optimal candidate // 7. Perform reflection analysis on failures const testStartTime = Date.now(); let evolutionId: string | undefined; try { // Step 1: Start evolution process const evolutionResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_start_evolution', { taskDescription: 'E2E Integration Test - Complete workflow validation', seedPrompt: 'You are an AI assistant optimized for comprehensive task execution and analysis.', config: { populationSize: 10, maxGenerations: 3, mutationRate: 0.2, }, } ); expect(evolutionResult.success).toBe(true); expect(evolutionResult.content[0].text).toContain('Evolution Process Started'); // Extract evolution ID from response (in real implementation) evolutionId = `e2e-evolution-${Date.now()}`; // Step 2: Record multiple trajectory executions const trajectoryResults = []; for (let i = 0; i < 5; i++) { const trajectoryResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_record_trajectory', { promptId: `${evolutionId}-candidate-${i}`, taskId: `e2e-task-${i}`, executionSteps: [ { stepNumber: 1, action: 'initialize', timestamp: new Date().toISOString(), success: true, }, { stepNumber: 2, action: 'process', timestamp: new Date().toISOString(), success: true, }, { stepNumber: 3, action: 'finalize', timestamp: new Date().toISOString(), success: true, }, ], result: { success: Math.random() > 0.2, // 80% success rate score: Math.random() * 0.5 + 0.5, // 0.5-1.0 output: { taskCompleted: true, score: Math.random() }, }, metadata: { executionTime: Math.random() * 2000 + 500, tokenUsage: Math.random() * 1000 + 100, }, } ); expect(trajectoryResult.success).toBe(true); trajectoryResults.push(trajectoryResult); } // Step 3: Evaluate prompt performance const evaluationResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_evaluate_prompt', { promptId: `${evolutionId}-best-candidate`, taskIds: ['e2e-task-1', 'e2e-task-2', 'e2e-task-3'], rolloutCount: 3, parallel: true, } ); expect(evaluationResult.success).toBe(true); expect(evaluationResult.content[0].text).toContain('Prompt Evaluation Complete'); // Step 4: Get Pareto frontier results const frontierResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_get_pareto_frontier', { minPerformance: 0.6, limit: 5, } ); expect(frontierResult.success).toBe(true); expect(frontierResult.content[0].text).toContain('Pareto Frontier Results'); // Step 5: Select optimal candidate const selectionResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_select_optimal', { taskContext: 'E2E integration test context', performanceWeight: 0.8, diversityWeight: 0.2, } ); expect(selectionResult.success).toBe(true); expect(selectionResult.content[0].text).toContain('Optimal Candidate Selected'); // Step 6: Perform reflection analysis const reflectionResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_reflect', { trajectoryIds: trajectoryResults.slice(0, 3).map((_, i) => `trajectory-${i}`), targetPromptId: `${evolutionId}-best-candidate`, analysisDepth: 'deep', } ); expect(reflectionResult.success).toBe(true); expect(reflectionResult.content[0].text).toContain('Reflection Analysis Complete'); // Verify overall workflow timing const totalExecutionTime = Date.now() - testStartTime; expect(totalExecutionTime).toBeLessThan(60000); // Should complete within 60 seconds // eslint-disable-next-line no-console console.log(`βœ… Complete evolution cycle completed in ${totalExecutionTime}ms`); } catch (error) { // eslint-disable-next-line no-console console.error(`❌ Evolution cycle failed: ${error}`); throw error; } }, 90000); // 90 second timeout test('Trajectory Recording and Analysis Workflow', async () => { // eslint-disable-next-line no-console console.log('πŸ“Š Testing trajectory recording and analysis...'); // Create test scenario for trajectory analysis const scenario = await testRunner.environment!.scenarios.createCompleteEvolutionScenario(); const result = await testRunner.environment!.testHelpers.executeScenario(scenario); expect(result.success).toBe(true); expect(result.trajectories).toBeDefined(); expect(result.trajectories!.length).toBeGreaterThan(0); // Verify trajectory data integrity for (const trajectory of result.trajectories!) { expect(trajectory.id).toBeDefined(); expect(trajectory.promptId).toBeDefined(); expect(trajectory.taskId).toBeDefined(); expect(trajectory.steps.length).toBeGreaterThan(0); expect(trajectory.finalResult).toBeDefined(); expect(typeof trajectory.finalResult.score).toBe('number'); } // eslint-disable-next-line no-console console.log(`βœ… Trajectory analysis completed for ${result.trajectories!.length} trajectories`); }, 45000); test('Pareto Frontier Optimization Workflow', async () => { // eslint-disable-next-line no-console console.log('🎯 Testing Pareto frontier optimization...'); // Create optimization candidates const candidates = await testRunner.environment!.scenarios.createOptimizationCandidates(); expect(candidates.length).toBeGreaterThan(0); // Add candidates to frontier for (const candidate of candidates) { await testRunner.environment!.paretoFrontier.addCandidate(candidate); } // Get frontier results const frontier = testRunner.environment!.paretoFrontier.getFrontier(); expect(frontier.length).toBeGreaterThan(0); expect(frontier.length).toBeLessThanOrEqual(candidates.length); // Verify Pareto optimality for (let i = 0; i < frontier.length; i++) { for (let j = 0; j < frontier.length; j++) { if (i !== j) { const isDominated = testRunner.environment!.paretoFrontier.isDominated( frontier[i].candidate, frontier[j].candidate ); expect(isDominated).toBe(false); } } } // Test candidate sampling const sampledCandidate = await testRunner.environment!.paretoFrontier.sampleCandidate(); expect(sampledCandidate).toBeDefined(); // eslint-disable-next-line no-console console.log(`βœ… Pareto frontier optimization verified with ${frontier.length} optimal candidates`); }, 30000); }); /** * Performance and Load Tests */ describe('Performance and Load Testing', () => { test('Performance Benchmarks - All Operations', async () => { // eslint-disable-next-line no-console console.log('⚑ Running comprehensive performance benchmarks...'); const benchmarks = await testRunner.environment!.benchmarks.runComprehensiveBenchmarks(); // Verify benchmark results meet performance thresholds expect(benchmarks.overallScore).toBeGreaterThan(70); // Minimum 70/100 score expect(benchmarks.trajectoryRecording.passesSLA).toBe(true); expect(benchmarks.paretoQuery.passesSLA).toBe(true); expect(benchmarks.memoryOperations.passesSLA).toBe(true); // Verify throughput requirements expect(benchmarks.trajectoryRecording.throughput).toBeGreaterThan(0.5); // At least 0.5 ops/sec expect(benchmarks.paretoQuery.throughput).toBeGreaterThan(1.0); // At least 1 ops/sec expect(benchmarks.memoryOperations.throughput).toBeGreaterThan(5.0); // At least 5 ops/sec // Verify resource utilization is reasonable expect(benchmarks.resourceUtilization.memory).toBeLessThan(90); // Less than 90% memory expect(benchmarks.resourceUtilization.cpu).toBeLessThan(95); // Less than 95% CPU // eslint-disable-next-line no-console console.log(`βœ… Performance benchmarks completed - Overall Score: ${benchmarks.overallScore.toFixed(1)}/100`); }, 120000); // 2 minute timeout for comprehensive benchmarks test('Concurrent Operations Stress Test', async () => { // eslint-disable-next-line no-console console.log('πŸ”€ Testing concurrent operations under load...'); const concurrentEvolutions = 5; const evolutionPromises = []; // Launch multiple concurrent evolution scenarios for (let i = 0; i < concurrentEvolutions; i++) { const promise = testRunner.environment!.scenarios.createConcurrentEvolutionScenario(i); evolutionPromises.push(promise); } const startTime = Date.now(); const results = await Promise.all(evolutionPromises); const executionTime = Date.now() - startTime; // Verify all operations completed successfully expect(results.length).toBe(concurrentEvolutions); expect(results.every(r => r.success)).toBe(true); // Verify concurrent execution was efficient expect(executionTime).toBeLessThan(testConfig.performanceThresholds!.evolutionTime * 2); // Verify no resource contention issues for (const result of results) { expect(result.executionTime).toBeLessThan(10000); // Each should complete within 10 seconds } // eslint-disable-next-line no-console console.log(`βœ… Concurrent stress test completed - ${concurrentEvolutions} operations in ${executionTime}ms`); }, 60000); test('Memory Usage Under Load', async () => { // eslint-disable-next-line no-console console.log('πŸ’Ύ Testing memory usage patterns under load...'); const initialMemory = process.memoryUsage().heapUsed; // Create high memory load scenario await testRunner.environment!.scenarios.createHighMemoryLoadScenario(); const peakMemory = process.memoryUsage().heapUsed; const memoryIncrease = peakMemory - initialMemory; // Force garbage collection if available if (global.gc) { global.gc(); } const finalMemory = process.memoryUsage().heapUsed; const memoryRetained = finalMemory - initialMemory; // Verify memory usage is within acceptable limits expect(memoryIncrease).toBeLessThan(200 * 1024 * 1024); // Less than 200MB increase expect(memoryRetained).toBeLessThan(100 * 1024 * 1024); // Less than 100MB retained after GC // Check for memory leak indicators const memoryLeakSuspected = memoryRetained > (memoryIncrease * 0.5); expect(memoryLeakSuspected).toBe(false); // eslint-disable-next-line no-console console.log(`βœ… Memory test completed - Peak: ${(peakMemory / 1024 / 1024).toFixed(1)}MB, Retained: ${(memoryRetained / 1024 / 1024).toFixed(1)}MB`); }, 45000); }); /** * Error Handling and Recovery Tests */ describe('Error Handling and Recovery', () => { test('Component Failure Recovery', async () => { // eslint-disable-next-line no-console console.log('πŸ› οΈ Testing component failure recovery...'); // Simulate LLM adapter failure await testRunner.environment!.scenarios.simulateComponentFailure('llmAdapter'); // Attempt recovery const recoveryResult = await testRunner.environment!.testHelpers.attemptComponentRecovery('llmAdapter'); expect(recoveryResult.success).toBe(true); expect(recoveryResult.recoveryTime).toBeLessThan(5000); // Recovery within 5 seconds // Verify system functionality after recovery const testResult = await testRunner.environment!.testHelpers.callMCPTool( 'gepa_evaluate_prompt', { promptId: 'recovery-test-prompt', taskIds: ['recovery-test-task'], rolloutCount: 1, } ); expect(testResult.success).toBe(true); // eslint-disable-next-line no-console console.log(`βœ… Component recovery completed in ${recoveryResult.recoveryTime}ms`); }, 30000); test('Data Corruption Detection and Recovery', async () => { // eslint-disable-next-line no-console console.log('πŸ” Testing data corruption detection and recovery...'); // Introduce data corruption await testRunner.environment!.scenarios.introduceDataCorruption(); // Run corruption detection const detectionResult = await testRunner.environment!.testHelpers.runCorruptionDetection(); expect(detectionResult.corruptionDetected).toBe(true); expect(detectionResult.corruptedEntities.length).toBeGreaterThan(0); // Attempt data recovery const recoveryResult = await testRunner.environment!.testHelpers.attemptDataRecovery(); expect(recoveryResult.success).toBe(true); expect(recoveryResult.dataIntegrityScore).toBeGreaterThan(0.7); // At least 70% data integrity // eslint-disable-next-line no-console console.log(`βœ… Data recovery completed - ${recoveryResult.recoveredEntities} entities recovered`); }, 30000); test('Resource Exhaustion Handling', async () => { // eslint-disable-next-line no-console console.log('πŸ“ˆ Testing resource exhaustion handling...'); // Create resource exhaustion scenario const exhaustionScenario = await testRunner.environment!.scenarios.createResourceExhaustionScenario(); // Should handle gracefully without crashing expect(exhaustionScenario.completed).toBe(true); if (!exhaustionScenario.success) { // If operation failed due to resource exhaustion, it should be handled gracefully expect(exhaustionScenario.errorHandled).toBe(true); expect(exhaustionScenario.metadata?.error).toBeDefined(); } // eslint-disable-next-line no-console console.log(`βœ… Resource exhaustion handling completed - Handled gracefully: ${exhaustionScenario.errorHandled || exhaustionScenario.success}`); }, 30000); }); /** * Memory System Integration Tests */ describe('Memory System Integration', () => { test('Automated Memory Updates and Optimization', async () => { // eslint-disable-next-line no-console console.log('🧠 Testing automated memory management...'); // Get initial memory stats const initialStats = await testRunner.environment!.testHelpers.getMemoryStats(); expect(initialStats).toBeDefined(); // Create memory load to trigger optimization await testRunner.environment!.scenarios.fillMemoryToCapacity(); // Trigger memory optimization const optimizationResult = await testRunner.environment!.testHelpers.triggerMemoryOptimization(); expect(optimizationResult.success).toBe(true); expect(optimizationResult.spaceSaved).toBeGreaterThan(0); // Verify memory stats after optimization const finalStats = await testRunner.environment!.testHelpers.getMemoryStats(); expect(finalStats.optimizationCount).toBeGreaterThan(initialStats.optimizationCount); // eslint-disable-next-line no-console console.log(`βœ… Memory optimization completed - ${(optimizationResult.spaceSaved / 1024 / 1024).toFixed(1)}MB saved`); }, 30000); test('Cross-System Memory Synchronization', async () => { // eslint-disable-next-line no-console console.log('πŸ”„ Testing cross-system memory synchronization...'); // Create test data in multiple systems const trajectory = await testRunner.environment!.scenarios.createSampleTrajectory(); await testRunner.environment!.trajectoryStore.save(trajectory); const candidates = await testRunner.environment!.scenarios.createOptimizationCandidates(); for (const candidate of candidates.slice(0, 3)) { await testRunner.environment!.paretoFrontier.addCandidate(candidate); } // Perform cross-system sync const syncResult = await testRunner.environment!.testHelpers.performCrossSystemSync(); expect(syncResult.success).toBe(true); expect(syncResult.synchronizedEntities).toBeGreaterThan(0); // eslint-disable-next-line no-console console.log(`βœ… Cross-system sync completed - ${syncResult.synchronizedEntities} entities synchronized`); }, 30000); }); /** * End-to-End Test Suite Summary */ test('Generate Comprehensive Test Report', async () => { // eslint-disable-next-line no-console console.log('πŸ“‹ Generating comprehensive test report...'); // Run the complete test suite testResults = await testRunner.runAllTests(); // Verify overall test results expect(testResults.totalTests).toBeGreaterThan(0); expect(testResults.passedTests).toBeGreaterThan(0); expect(testResults.passedTests / testResults.totalTests).toBeGreaterThan(0.9); // 90% pass rate // Verify performance metrics expect(testResults.performanceMetrics.avgEvolutionTime).toBeLessThan(testConfig.performanceThresholds!.evolutionTime); expect(testResults.performanceMetrics.avgTrajectoryRecording).toBeLessThan(testConfig.performanceThresholds!.trajectoryRecording); // Verify memory usage expect(testResults.memoryUsage.leakDetected).toBe(false); // Generate execution report const executionReport = testRunner.environment!.testHelpers.generateExecutionReport(); expect(executionReport.successRate).toBeGreaterThan(0.9); // 90% success rate for MCP calls // eslint-disable-next-line no-console console.log(`βœ… Test report generated - ${testResults.passedTests}/${testResults.totalTests} tests passed`); // eslint-disable-next-line no-console console.log(`πŸ“Š Performance Score: ${testResults.performanceMetrics.avgEvolutionTime}ms avg evolution time`); // eslint-disable-next-line no-console console.log(`πŸ’Ύ Memory Peak: ${(testResults.memoryUsage.peakUsage / 1024 / 1024).toFixed(1)}MB`); }, 300000); // 5 minute timeout for complete test suite }); /** * Performance regression detection (optional advanced test) */ describe('Performance Regression Detection', () => { test('Detect Performance Regressions', async () => { // eslint-disable-next-line no-console console.log('πŸ“ˆ Running performance regression detection...'); // This test would compare current performance against baseline // For demonstration, we'll create mock baseline data const mockBaseline = { trajectoryRecording: { averageTime: 800, throughput: 1.2 }, paretoQuery: { averageTime: 300, throughput: 3.3 }, memoryOperations: { averageTime: 50, throughput: 20 }, evolutionCycle: { averageTime: 25000, throughput: 0.04 }, concurrentOperations: { averageTime: 2000, throughput: 0.5 }, overallScore: 85, resourceUtilization: { cpu: 45, memory: 60, io: 30 }, } as any; // Run current benchmarks const currentBenchmarks = await testRunner.environment!.benchmarks.runComprehensiveBenchmarks(); // In a real implementation, you would: // const regressionAnalysis = testRunner.environment!.benchmarks.detectRegressions(mockBaseline); // expect(regressionAnalysis.overallAssessment).not.toBe('degraded'); // eslint-disable-next-line no-console console.log(`βœ… Performance regression detection completed - Current score: ${currentBenchmarks.overallScore.toFixed(1)}`); }, 120000); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sloth-wq/prompt-auto-optimizer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server