Skip to main content
Glama

Prompt Auto-Optimizer MCP

by sloth-wq
test-scenarios.ts23.7 kB
/** * Test Scenarios for GEPA E2E Testing * * This module provides comprehensive test scenarios covering: * - Complete evolution workflows * - Component integration patterns * - Failure simulation scenarios * - Performance stress tests * - Memory management scenarios * - Concurrent operation patterns */ import type { PromptCandidate, ExecutionTrajectory, ExecutionStep, EvolutionResult, TaskContext, ReflectionAnalysis, } from '../../types/gepa'; import type { TrajectoryStore } from '../../core/trajectory-store'; import type { ParetoFrontier } from '../../core/pareto-frontier'; import type { ReflectionEngine } from '../../core/reflection-engine'; import type { LLMAdapter } from '../../services/llm-adapter'; import type { PromptMutator } from '../../services/prompt-mutator'; /** * Test Scenario Configuration */ interface ScenarioConfig { trajectoryStore: TrajectoryStore; paretoFrontier: ParetoFrontier; reflectionEngine: ReflectionEngine; llmAdapter: LLMAdapter; promptMutator: PromptMutator; } /** * Scenario Execution Result */ export interface ScenarioResult { success: boolean; completed: boolean; executionTime: number; evolutionResult?: EvolutionResult; trajectories?: ExecutionTrajectory[]; candidates?: PromptCandidate[]; analysisResult?: ReflectionAnalysis; errorHandled?: boolean; metadata?: Record<string, unknown>; } /** * Main Test Scenarios Class */ export class TestScenarios { private config: ScenarioConfig; private scenarioCounter = 0; constructor(config: ScenarioConfig) { this.config = config; } /** * Create a complete evolution scenario from start to finish */ async createCompleteEvolutionScenario(): Promise<ScenarioResult> { const scenarioId = this.generateScenarioId('complete-evolution'); const startTime = Date.now(); try { // Step 1: Create initial population const seedPrompt = `You are a helpful assistant specialized in ${scenarioId}. Please analyze the given task carefully and provide a comprehensive solution.`; const initialCandidate: PromptCandidate = { id: `${scenarioId}-seed`, content: seedPrompt, generation: 0, taskPerformance: new Map(), averageScore: 0, rolloutCount: 0, createdAt: new Date(), lastEvaluated: new Date(), mutationType: 'initial', }; // Step 2: Add to Pareto frontier await this.config.paretoFrontier.addCandidate(initialCandidate); // Step 3: Generate mutations const taskContext: TaskContext = { taskId: `${scenarioId}-task`, description: `Test task for scenario ${scenarioId}`, category: 'integration-test', difficulty: 'medium', requiredCapabilities: ['reasoning', 'analysis'], expectedDuration: 30, }; const mutations = await this.config.promptMutator.generateAdaptiveMutations( initialCandidate, taskContext ); // Step 4: Evaluate mutations const evaluatedCandidates = []; for (const mutation of mutations.slice(0, 5)) { // Limit for test performance mutation.averageScore = Math.random() * 0.5 + 0.5; // 0.5-1.0 mutation.rolloutCount = 3; mutation.lastEvaluated = new Date(); await this.config.paretoFrontier.addCandidate(mutation); evaluatedCandidates.push(mutation); } // Step 5: Create execution trajectories const trajectories = []; for (const candidate of evaluatedCandidates) { const trajectory = await this.createTrajectoryForCandidate(candidate, taskContext); const saveResult = await this.config.trajectoryStore.save(trajectory); if (saveResult.success) { trajectories.push(trajectory); } } // Step 6: Get best candidate from frontier const frontier = this.config.paretoFrontier.getFrontier(); const bestCandidate = frontier.reduce((best, current) => current.candidate.averageScore > best.candidate.averageScore ? current : best ).candidate; const evolutionResult: EvolutionResult = { evolutionId: scenarioId, taskDescription: taskContext.description, generations: 1, bestPrompt: bestCandidate, convergenceAchieved: false, totalRollouts: evaluatedCandidates.reduce((total, c) => total + c.rolloutCount, 0), evolutionHistory: [evaluatedCandidates], }; return { success: true, completed: true, executionTime: Date.now() - startTime, evolutionResult, trajectories, candidates: evaluatedCandidates, }; } catch (error) { return { success: false, completed: true, executionTime: Date.now() - startTime, metadata: { error: error instanceof Error ? error.message : 'Unknown error' }, }; } } /** * Create sample trajectory for testing */ async createSampleTrajectory(): Promise<ExecutionTrajectory> { const trajectoryId = this.generateScenarioId('sample-trajectory'); const steps: ExecutionStep[] = [ { stepNumber: 1, action: 'initialize', reasoning: 'Initialize the execution context', timestamp: new Date(), }, { stepNumber: 2, action: 'analyze', reasoning: 'Analyze the input parameters', toolName: 'analyzer', toolInput: { input: 'test data' }, toolOutput: { analysis: 'completed' }, timestamp: new Date(), }, { stepNumber: 3, action: 'execute', reasoning: 'Execute the main task', toolName: 'executor', toolInput: { task: 'process' }, toolOutput: { result: 'success' }, timestamp: new Date(), }, ]; return { id: trajectoryId, promptId: `prompt-${trajectoryId}`, taskId: `task-${trajectoryId}`, timestamp: new Date(), steps, finalResult: { success: true, score: 0.85, output: { result: 'Sample trajectory executed successfully' }, }, llmCalls: [], toolCalls: [], totalTokens: 250, executionTime: 1500, }; } /** * Create optimization candidates for Pareto frontier testing */ async createOptimizationCandidates(): Promise<PromptCandidate[]> { const candidates: PromptCandidate[] = []; const basePrompt = 'You are an AI assistant that helps with various tasks.'; for (let i = 0; i < 10; i++) { const candidate: PromptCandidate = { id: `optimization-candidate-${i}`, content: `${basePrompt} Variant ${i} with specific focus on efficiency and accuracy.`, generation: i % 3, taskPerformance: new Map([ ['task-1', Math.random() * 0.5 + 0.5], // 0.5-1.0 ['task-2', Math.random() * 0.5 + 0.4], // 0.4-0.9 ['task-3', Math.random() * 0.6 + 0.3], // 0.3-0.9 ]), averageScore: 0, rolloutCount: Math.floor(Math.random() * 10) + 5, // 5-14 createdAt: new Date(Date.now() - Math.random() * 86400000), // Last 24 hours lastEvaluated: new Date(), mutationType: i === 0 ? 'initial' : ['reflection', 'crossover', 'random'][i % 3] as any, }; // Calculate average score const scores = Array.from(candidate.taskPerformance.values()); candidate.averageScore = scores.reduce((sum, score) => sum + score, 0) / scores.length; candidates.push(candidate); } return candidates; } /** * Create failure trajectories for reflection testing */ async createFailureTrajectories(): Promise<ExecutionTrajectory[]> { const trajectories: ExecutionTrajectory[] = []; // Failure pattern 1: Timeout errors const timeoutTrajectory: ExecutionTrajectory = { id: 'trajectory-timeout-failure', promptId: 'prompt-timeout-test', taskId: 'task-timeout-test', timestamp: new Date(), steps: [ { stepNumber: 1, action: 'initialize', timestamp: new Date(), }, { stepNumber: 2, action: 'long_operation', error: 'Operation timeout after 30 seconds', timestamp: new Date(), }, ], finalResult: { success: false, score: 0.1, output: null, error: 'Timeout during execution', }, llmCalls: [], toolCalls: [], totalTokens: 0, executionTime: 30000, }; // Failure pattern 2: Invalid parameter errors const parameterTrajectory: ExecutionTrajectory = { id: 'trajectory-parameter-failure', promptId: 'prompt-parameter-test', taskId: 'task-parameter-test', timestamp: new Date(), steps: [ { stepNumber: 1, action: 'validate_input', timestamp: new Date(), }, { stepNumber: 2, action: 'process', error: 'Invalid parameter format: expected object, got string', timestamp: new Date(), }, ], finalResult: { success: false, score: 0.2, output: null, error: 'Parameter validation failed', }, llmCalls: [], toolCalls: [], totalTokens: 0, executionTime: 500, }; // Failure pattern 3: Resource exhaustion const resourceTrajectory: ExecutionTrajectory = { id: 'trajectory-resource-failure', promptId: 'prompt-resource-test', taskId: 'task-resource-test', timestamp: new Date(), steps: [ { stepNumber: 1, action: 'allocate_memory', timestamp: new Date(), }, { stepNumber: 2, action: 'process_data', error: 'Out of memory: unable to allocate additional resources', timestamp: new Date(), }, ], finalResult: { success: false, score: 0.0, output: null, error: 'Resource exhaustion', }, llmCalls: [], toolCalls: [], totalTokens: 0, executionTime: 2000, }; trajectories.push(timeoutTrajectory, parameterTrajectory, resourceTrajectory); // Save trajectories to store for (const trajectory of trajectories) { await this.config.trajectoryStore.save(trajectory); } return trajectories; } /** * Populate Pareto frontier with test data */ async populateParetoFrontier(): Promise<void> { const candidates = await this.createOptimizationCandidates(); for (const candidate of candidates) { await this.config.paretoFrontier.addCandidate(candidate); } } /** * Create concurrent evolution scenario */ async createConcurrentEvolutionScenario(index: number): Promise<ScenarioResult> { const scenarioId = this.generateScenarioId(`concurrent-evolution-${index}`); const startTime = Date.now(); try { const seedPrompt = `Concurrent evolution scenario ${index}: You are a specialized assistant for task ${index}.`; const candidate: PromptCandidate = { id: `${scenarioId}-candidate`, content: seedPrompt, generation: 0, taskPerformance: new Map([['concurrent-task', Math.random() * 0.5 + 0.5]]), averageScore: Math.random() * 0.5 + 0.5, rolloutCount: 5, createdAt: new Date(), lastEvaluated: new Date(), mutationType: 'initial', }; await this.config.paretoFrontier.addCandidate(candidate); // Simulate some processing time await this.sleep(Math.random() * 1000 + 500); // 500-1500ms return { success: true, completed: true, executionTime: Date.now() - startTime, candidates: [candidate], }; } catch (error) { return { success: false, completed: true, executionTime: Date.now() - startTime, metadata: { error: error instanceof Error ? error.message : 'Unknown error' }, }; } } /** * Create large population scenario for performance testing */ async createLargePopulationScenario(populationSize: number): Promise<ScenarioResult> { const scenarioId = this.generateScenarioId('large-population'); const startTime = Date.now(); try { const candidates: PromptCandidate[] = []; for (let i = 0; i < populationSize; i++) { const candidate: PromptCandidate = { id: `${scenarioId}-candidate-${i}`, content: `Large population test candidate ${i}: Specialized prompt for task ${i % 10}.`, generation: Math.floor(i / 20), taskPerformance: new Map([ [`task-${i % 5}`, Math.random() * 0.5 + 0.5], ]), averageScore: Math.random() * 0.5 + 0.5, rolloutCount: Math.floor(Math.random() * 5) + 1, createdAt: new Date(), lastEvaluated: new Date(), mutationType: i === 0 ? 'initial' : 'random', }; candidates.push(candidate); // Add to frontier (batch processing would be better in real implementation) if (i % 10 === 0) { // Add in batches to avoid overwhelming the frontier await this.config.paretoFrontier.addCandidate(candidate); } } return { success: true, completed: true, executionTime: Date.now() - startTime, candidates, }; } catch (error) { return { success: false, completed: true, executionTime: Date.now() - startTime, metadata: { error: error instanceof Error ? error.message : 'Unknown error' }, }; } } /** * Create high memory load scenario */ async createHighMemoryLoadScenario(): Promise<void> { const largeData: string[] = []; // Create large amounts of data to stress memory for (let i = 0; i < 1000; i++) { const trajectory = await this.createSampleTrajectory(); trajectory.id = `memory-load-trajectory-${i}`; // Add large payload to steps trajectory.steps.forEach(step => { step.toolOutput = { largeData: 'x'.repeat(1000), // 1KB per step index: i, timestamp: Date.now(), }; }); await this.config.trajectoryStore.save(trajectory); // Keep some in memory to test memory management if (i % 10 === 0) { largeData.push(JSON.stringify(trajectory)); } } } /** * Create reflection analysis scenario */ async createReflectionAnalysisScenario(): Promise<ScenarioResult> { const startTime = Date.now(); try { // Create failure trajectories const failureTrajectories = await this.createFailureTrajectories(); // Perform batch analysis const analysisResult = await this.config.reflectionEngine.analyzeBatch(failureTrajectories); // Convert BatchAnalysisResult to ReflectionAnalysis format const reflectionAnalysis: ReflectionAnalysis = { trajectoryId: failureTrajectories[0]?.id || 'batch-analysis', promptId: failureTrajectories[0]?.promptId || 'batch-prompt', diagnosis: { failurePoint: 'Multiple trajectory failures', rootCause: analysisResult.commonPatterns.join(', '), moduleResponsibility: new Map<string, number>(), patterns: [] }, suggestions: analysisResult.recommendations, confidence: 0.8, rationale: 'Batch analysis of failure trajectories to identify common patterns' }; return { success: true, completed: true, executionTime: Date.now() - startTime, analysisResult: reflectionAnalysis, trajectories: failureTrajectories, }; } catch (error) { return { success: false, completed: true, executionTime: Date.now() - startTime, metadata: { error: error instanceof Error ? error.message : 'Unknown error' }, }; } } /** * Simulate component failure */ async simulateComponentFailure(componentName: string): Promise<void> { switch (componentName) { case 'llmAdapter': // Simulate by temporarily replacing with failing implementation (this.config.llmAdapter as any)._originalEvaluatePrompt = this.config.llmAdapter.evaluatePrompt; this.config.llmAdapter.evaluatePrompt = async () => { throw new Error('Simulated LLM adapter failure'); }; break; case 'trajectoryStore': // Simulate storage failure (this.config.trajectoryStore as any)._originalSave = this.config.trajectoryStore.save; this.config.trajectoryStore.save = async () => { throw new Error('Simulated trajectory store failure'); }; break; default: throw new Error(`Unknown component: ${componentName}`); } } /** * Fill memory to capacity for testing optimization triggers */ async fillMemoryToCapacity(): Promise<void> { const candidates = await this.createOptimizationCandidates(); // Add many candidates to fill frontier to capacity for (let i = 0; i < 200; i++) { const candidate = { ...candidates[i % candidates.length] }; candidate.id = `capacity-test-${i}`; candidate.averageScore = Math.random(); const fullCandidate: PromptCandidate = { id: candidate.id || `prompt-${Date.now()}-${i}`, content: candidate.content || 'Test prompt content', generation: candidate.generation || 0, taskPerformance: candidate.taskPerformance || new Map(), averageScore: candidate.averageScore || 0, rolloutCount: candidate.rolloutCount || 0, createdAt: candidate.createdAt || new Date(), lastEvaluated: candidate.lastEvaluated || new Date(), mutationType: candidate.mutationType || 'initial', ...(candidate.parentId && { parentId: candidate.parentId }) }; await this.config.paretoFrontier.addCandidate(fullCandidate); } } /** * Simulate memory corruption */ async simulateMemoryCorruption(): Promise<void> { // Create invalid trajectory data const corruptTrajectory = { id: 'corrupt-trajectory', promptId: null as any, // Invalid: should be string taskId: 'valid-task', timestamp: 'invalid-date' as any, // Invalid: should be Date steps: 'not-an-array' as any, // Invalid: should be array finalResult: { success: 'maybe' as any, // Invalid: should be boolean score: 'high' as any, // Invalid: should be number output: undefined, }, llmCalls: [], toolCalls: [], totalTokens: -1, // Invalid: should be positive executionTime: 'long' as any, // Invalid: should be number }; try { await this.config.trajectoryStore.save(corruptTrajectory as any); } catch (error) { // Expected to fail - this simulates corruption in storage } } /** * Create resource exhaustion scenario */ async createResourceExhaustionScenario(): Promise<ScenarioResult> { const scenarioId = this.generateScenarioId('resource-exhaustion'); const startTime = Date.now(); // Use scenarioId to track this scenario const metadata = { scenarioId }; try { // Simulate resource-intensive operations const promises = []; for (let i = 0; i < 100; i++) { const promise = this.createLargeMemoryOperation(i); promises.push(promise); } // Attempt to run all operations concurrently await Promise.all(promises); return { success: true, completed: true, executionTime: Date.now() - startTime, errorHandled: false, }; } catch (error) { // Expected to fail due to resource exhaustion return { success: false, completed: true, executionTime: Date.now() - startTime, errorHandled: true, metadata: { ...metadata, error: error instanceof Error ? error.message : 'Resource exhaustion' }, }; } } /** * Introduce data corruption for recovery testing */ async introduceDataCorruption(): Promise<void> { // Create multiple corrupt trajectories with different corruption types const corruptionTypes = [ { field: 'promptId', value: null }, { field: 'timestamp', value: 'invalid-date' }, { field: 'steps', value: 'not-array' }, { field: 'finalResult.score', value: 'not-number' }, ]; for (let i = 0; i < corruptionTypes.length; i++) { const corruption = corruptionTypes[i]!; const trajectory = await this.createSampleTrajectory(); trajectory.id = `corrupt-${i}`; // Introduce specific corruption if (corruption!.field === 'finalResult.score') { (trajectory.finalResult as any).score = corruption!.value; } else { (trajectory as any)[corruption!.field] = corruption!.value; } try { await this.config.trajectoryStore.save(trajectory as any); } catch (error) { // Some corruptions might be caught during save } } } /** * Create trajectory for a specific candidate */ private async createTrajectoryForCandidate( candidate: PromptCandidate, taskContext: TaskContext ): Promise<ExecutionTrajectory> { const steps: ExecutionStep[] = [ { stepNumber: 1, action: 'prompt_initialization', reasoning: `Initialize with prompt: ${candidate.id}`, timestamp: new Date(), }, { stepNumber: 2, action: 'task_execution', reasoning: `Execute task: ${taskContext.taskId}`, toolName: 'task_executor', toolInput: { prompt: candidate.content, task: taskContext }, toolOutput: { result: 'success', score: candidate.averageScore }, timestamp: new Date(), }, ]; return { id: `trajectory-${candidate.id}-${taskContext.taskId}`, promptId: candidate.id, taskId: taskContext.taskId, timestamp: new Date(), steps, finalResult: { success: candidate.averageScore > 0.5, score: candidate.averageScore, output: { candidateId: candidate.id, taskId: taskContext.taskId }, }, llmCalls: [], toolCalls: [], totalTokens: Math.floor(Math.random() * 500) + 100, executionTime: Math.floor(Math.random() * 2000) + 500, }; } /** * Create large memory operation for testing */ private async createLargeMemoryOperation(index: number): Promise<void> { // Create memory-intensive operation const largeArray = new Array(10000).fill(0).map((_, i) => ({ id: `memory-item-${index}-${i}`, data: 'x'.repeat(100), // 100 bytes each timestamp: Date.now(), })); // Simulate processing await this.sleep(Math.random() * 100); // Try to store (might fail due to memory pressure) const trajectory = await this.createSampleTrajectory(); trajectory.id = `memory-operation-${index}`; (trajectory as any).largeData = largeArray; await this.config.trajectoryStore.save(trajectory); } /** * Generate unique scenario ID */ private generateScenarioId(prefix: string): string { return `${prefix}-${Date.now()}-${++this.scenarioCounter}`; } /** * Sleep utility for timing control */ private sleep(ms: number): Promise<void> { return new Promise(resolve => setTimeout(resolve, ms)); } } // Note: TestScenarios and ScenarioResult are already exported

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sloth-wq/prompt-auto-optimizer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server