test-scenarios.ts•23.7 kB
/**
* Test Scenarios for GEPA E2E Testing
*
* This module provides comprehensive test scenarios covering:
* - Complete evolution workflows
* - Component integration patterns
* - Failure simulation scenarios
* - Performance stress tests
* - Memory management scenarios
* - Concurrent operation patterns
*/
import type {
PromptCandidate,
ExecutionTrajectory,
ExecutionStep,
EvolutionResult,
TaskContext,
ReflectionAnalysis,
} from '../../types/gepa';
import type { TrajectoryStore } from '../../core/trajectory-store';
import type { ParetoFrontier } from '../../core/pareto-frontier';
import type { ReflectionEngine } from '../../core/reflection-engine';
import type { LLMAdapter } from '../../services/llm-adapter';
import type { PromptMutator } from '../../services/prompt-mutator';
/**
* Test Scenario Configuration
*/
interface ScenarioConfig {
trajectoryStore: TrajectoryStore;
paretoFrontier: ParetoFrontier;
reflectionEngine: ReflectionEngine;
llmAdapter: LLMAdapter;
promptMutator: PromptMutator;
}
/**
* Scenario Execution Result
*/
export interface ScenarioResult {
success: boolean;
completed: boolean;
executionTime: number;
evolutionResult?: EvolutionResult;
trajectories?: ExecutionTrajectory[];
candidates?: PromptCandidate[];
analysisResult?: ReflectionAnalysis;
errorHandled?: boolean;
metadata?: Record<string, unknown>;
}
/**
* Main Test Scenarios Class
*/
export class TestScenarios {
private config: ScenarioConfig;
private scenarioCounter = 0;
constructor(config: ScenarioConfig) {
this.config = config;
}
/**
* Create a complete evolution scenario from start to finish
*/
async createCompleteEvolutionScenario(): Promise<ScenarioResult> {
const scenarioId = this.generateScenarioId('complete-evolution');
const startTime = Date.now();
try {
// Step 1: Create initial population
const seedPrompt = `You are a helpful assistant specialized in ${scenarioId}.
Please analyze the given task carefully and provide a comprehensive solution.`;
const initialCandidate: PromptCandidate = {
id: `${scenarioId}-seed`,
content: seedPrompt,
generation: 0,
taskPerformance: new Map(),
averageScore: 0,
rolloutCount: 0,
createdAt: new Date(),
lastEvaluated: new Date(),
mutationType: 'initial',
};
// Step 2: Add to Pareto frontier
await this.config.paretoFrontier.addCandidate(initialCandidate);
// Step 3: Generate mutations
const taskContext: TaskContext = {
taskId: `${scenarioId}-task`,
description: `Test task for scenario ${scenarioId}`,
category: 'integration-test',
difficulty: 'medium',
requiredCapabilities: ['reasoning', 'analysis'],
expectedDuration: 30,
};
const mutations = await this.config.promptMutator.generateAdaptiveMutations(
initialCandidate,
taskContext
);
// Step 4: Evaluate mutations
const evaluatedCandidates = [];
for (const mutation of mutations.slice(0, 5)) { // Limit for test performance
mutation.averageScore = Math.random() * 0.5 + 0.5; // 0.5-1.0
mutation.rolloutCount = 3;
mutation.lastEvaluated = new Date();
await this.config.paretoFrontier.addCandidate(mutation);
evaluatedCandidates.push(mutation);
}
// Step 5: Create execution trajectories
const trajectories = [];
for (const candidate of evaluatedCandidates) {
const trajectory = await this.createTrajectoryForCandidate(candidate, taskContext);
const saveResult = await this.config.trajectoryStore.save(trajectory);
if (saveResult.success) {
trajectories.push(trajectory);
}
}
// Step 6: Get best candidate from frontier
const frontier = this.config.paretoFrontier.getFrontier();
const bestCandidate = frontier.reduce((best, current) =>
current.candidate.averageScore > best.candidate.averageScore ? current : best
).candidate;
const evolutionResult: EvolutionResult = {
evolutionId: scenarioId,
taskDescription: taskContext.description,
generations: 1,
bestPrompt: bestCandidate,
convergenceAchieved: false,
totalRollouts: evaluatedCandidates.reduce((total, c) => total + c.rolloutCount, 0),
evolutionHistory: [evaluatedCandidates],
};
return {
success: true,
completed: true,
executionTime: Date.now() - startTime,
evolutionResult,
trajectories,
candidates: evaluatedCandidates,
};
} catch (error) {
return {
success: false,
completed: true,
executionTime: Date.now() - startTime,
metadata: { error: error instanceof Error ? error.message : 'Unknown error' },
};
}
}
/**
* Create sample trajectory for testing
*/
async createSampleTrajectory(): Promise<ExecutionTrajectory> {
const trajectoryId = this.generateScenarioId('sample-trajectory');
const steps: ExecutionStep[] = [
{
stepNumber: 1,
action: 'initialize',
reasoning: 'Initialize the execution context',
timestamp: new Date(),
},
{
stepNumber: 2,
action: 'analyze',
reasoning: 'Analyze the input parameters',
toolName: 'analyzer',
toolInput: { input: 'test data' },
toolOutput: { analysis: 'completed' },
timestamp: new Date(),
},
{
stepNumber: 3,
action: 'execute',
reasoning: 'Execute the main task',
toolName: 'executor',
toolInput: { task: 'process' },
toolOutput: { result: 'success' },
timestamp: new Date(),
},
];
return {
id: trajectoryId,
promptId: `prompt-${trajectoryId}`,
taskId: `task-${trajectoryId}`,
timestamp: new Date(),
steps,
finalResult: {
success: true,
score: 0.85,
output: { result: 'Sample trajectory executed successfully' },
},
llmCalls: [],
toolCalls: [],
totalTokens: 250,
executionTime: 1500,
};
}
/**
* Create optimization candidates for Pareto frontier testing
*/
async createOptimizationCandidates(): Promise<PromptCandidate[]> {
const candidates: PromptCandidate[] = [];
const basePrompt = 'You are an AI assistant that helps with various tasks.';
for (let i = 0; i < 10; i++) {
const candidate: PromptCandidate = {
id: `optimization-candidate-${i}`,
content: `${basePrompt} Variant ${i} with specific focus on efficiency and accuracy.`,
generation: i % 3,
taskPerformance: new Map([
['task-1', Math.random() * 0.5 + 0.5], // 0.5-1.0
['task-2', Math.random() * 0.5 + 0.4], // 0.4-0.9
['task-3', Math.random() * 0.6 + 0.3], // 0.3-0.9
]),
averageScore: 0,
rolloutCount: Math.floor(Math.random() * 10) + 5, // 5-14
createdAt: new Date(Date.now() - Math.random() * 86400000), // Last 24 hours
lastEvaluated: new Date(),
mutationType: i === 0 ? 'initial' : ['reflection', 'crossover', 'random'][i % 3] as any,
};
// Calculate average score
const scores = Array.from(candidate.taskPerformance.values());
candidate.averageScore = scores.reduce((sum, score) => sum + score, 0) / scores.length;
candidates.push(candidate);
}
return candidates;
}
/**
* Create failure trajectories for reflection testing
*/
async createFailureTrajectories(): Promise<ExecutionTrajectory[]> {
const trajectories: ExecutionTrajectory[] = [];
// Failure pattern 1: Timeout errors
const timeoutTrajectory: ExecutionTrajectory = {
id: 'trajectory-timeout-failure',
promptId: 'prompt-timeout-test',
taskId: 'task-timeout-test',
timestamp: new Date(),
steps: [
{
stepNumber: 1,
action: 'initialize',
timestamp: new Date(),
},
{
stepNumber: 2,
action: 'long_operation',
error: 'Operation timeout after 30 seconds',
timestamp: new Date(),
},
],
finalResult: {
success: false,
score: 0.1,
output: null,
error: 'Timeout during execution',
},
llmCalls: [],
toolCalls: [],
totalTokens: 0,
executionTime: 30000,
};
// Failure pattern 2: Invalid parameter errors
const parameterTrajectory: ExecutionTrajectory = {
id: 'trajectory-parameter-failure',
promptId: 'prompt-parameter-test',
taskId: 'task-parameter-test',
timestamp: new Date(),
steps: [
{
stepNumber: 1,
action: 'validate_input',
timestamp: new Date(),
},
{
stepNumber: 2,
action: 'process',
error: 'Invalid parameter format: expected object, got string',
timestamp: new Date(),
},
],
finalResult: {
success: false,
score: 0.2,
output: null,
error: 'Parameter validation failed',
},
llmCalls: [],
toolCalls: [],
totalTokens: 0,
executionTime: 500,
};
// Failure pattern 3: Resource exhaustion
const resourceTrajectory: ExecutionTrajectory = {
id: 'trajectory-resource-failure',
promptId: 'prompt-resource-test',
taskId: 'task-resource-test',
timestamp: new Date(),
steps: [
{
stepNumber: 1,
action: 'allocate_memory',
timestamp: new Date(),
},
{
stepNumber: 2,
action: 'process_data',
error: 'Out of memory: unable to allocate additional resources',
timestamp: new Date(),
},
],
finalResult: {
success: false,
score: 0.0,
output: null,
error: 'Resource exhaustion',
},
llmCalls: [],
toolCalls: [],
totalTokens: 0,
executionTime: 2000,
};
trajectories.push(timeoutTrajectory, parameterTrajectory, resourceTrajectory);
// Save trajectories to store
for (const trajectory of trajectories) {
await this.config.trajectoryStore.save(trajectory);
}
return trajectories;
}
/**
* Populate Pareto frontier with test data
*/
async populateParetoFrontier(): Promise<void> {
const candidates = await this.createOptimizationCandidates();
for (const candidate of candidates) {
await this.config.paretoFrontier.addCandidate(candidate);
}
}
/**
* Create concurrent evolution scenario
*/
async createConcurrentEvolutionScenario(index: number): Promise<ScenarioResult> {
const scenarioId = this.generateScenarioId(`concurrent-evolution-${index}`);
const startTime = Date.now();
try {
const seedPrompt = `Concurrent evolution scenario ${index}: You are a specialized assistant for task ${index}.`;
const candidate: PromptCandidate = {
id: `${scenarioId}-candidate`,
content: seedPrompt,
generation: 0,
taskPerformance: new Map([['concurrent-task', Math.random() * 0.5 + 0.5]]),
averageScore: Math.random() * 0.5 + 0.5,
rolloutCount: 5,
createdAt: new Date(),
lastEvaluated: new Date(),
mutationType: 'initial',
};
await this.config.paretoFrontier.addCandidate(candidate);
// Simulate some processing time
await this.sleep(Math.random() * 1000 + 500); // 500-1500ms
return {
success: true,
completed: true,
executionTime: Date.now() - startTime,
candidates: [candidate],
};
} catch (error) {
return {
success: false,
completed: true,
executionTime: Date.now() - startTime,
metadata: { error: error instanceof Error ? error.message : 'Unknown error' },
};
}
}
/**
* Create large population scenario for performance testing
*/
async createLargePopulationScenario(populationSize: number): Promise<ScenarioResult> {
const scenarioId = this.generateScenarioId('large-population');
const startTime = Date.now();
try {
const candidates: PromptCandidate[] = [];
for (let i = 0; i < populationSize; i++) {
const candidate: PromptCandidate = {
id: `${scenarioId}-candidate-${i}`,
content: `Large population test candidate ${i}: Specialized prompt for task ${i % 10}.`,
generation: Math.floor(i / 20),
taskPerformance: new Map([
[`task-${i % 5}`, Math.random() * 0.5 + 0.5],
]),
averageScore: Math.random() * 0.5 + 0.5,
rolloutCount: Math.floor(Math.random() * 5) + 1,
createdAt: new Date(),
lastEvaluated: new Date(),
mutationType: i === 0 ? 'initial' : 'random',
};
candidates.push(candidate);
// Add to frontier (batch processing would be better in real implementation)
if (i % 10 === 0) {
// Add in batches to avoid overwhelming the frontier
await this.config.paretoFrontier.addCandidate(candidate);
}
}
return {
success: true,
completed: true,
executionTime: Date.now() - startTime,
candidates,
};
} catch (error) {
return {
success: false,
completed: true,
executionTime: Date.now() - startTime,
metadata: { error: error instanceof Error ? error.message : 'Unknown error' },
};
}
}
/**
* Create high memory load scenario
*/
async createHighMemoryLoadScenario(): Promise<void> {
const largeData: string[] = [];
// Create large amounts of data to stress memory
for (let i = 0; i < 1000; i++) {
const trajectory = await this.createSampleTrajectory();
trajectory.id = `memory-load-trajectory-${i}`;
// Add large payload to steps
trajectory.steps.forEach(step => {
step.toolOutput = {
largeData: 'x'.repeat(1000), // 1KB per step
index: i,
timestamp: Date.now(),
};
});
await this.config.trajectoryStore.save(trajectory);
// Keep some in memory to test memory management
if (i % 10 === 0) {
largeData.push(JSON.stringify(trajectory));
}
}
}
/**
* Create reflection analysis scenario
*/
async createReflectionAnalysisScenario(): Promise<ScenarioResult> {
const startTime = Date.now();
try {
// Create failure trajectories
const failureTrajectories = await this.createFailureTrajectories();
// Perform batch analysis
const analysisResult = await this.config.reflectionEngine.analyzeBatch(failureTrajectories);
// Convert BatchAnalysisResult to ReflectionAnalysis format
const reflectionAnalysis: ReflectionAnalysis = {
trajectoryId: failureTrajectories[0]?.id || 'batch-analysis',
promptId: failureTrajectories[0]?.promptId || 'batch-prompt',
diagnosis: {
failurePoint: 'Multiple trajectory failures',
rootCause: analysisResult.commonPatterns.join(', '),
moduleResponsibility: new Map<string, number>(),
patterns: []
},
suggestions: analysisResult.recommendations,
confidence: 0.8,
rationale: 'Batch analysis of failure trajectories to identify common patterns'
};
return {
success: true,
completed: true,
executionTime: Date.now() - startTime,
analysisResult: reflectionAnalysis,
trajectories: failureTrajectories,
};
} catch (error) {
return {
success: false,
completed: true,
executionTime: Date.now() - startTime,
metadata: { error: error instanceof Error ? error.message : 'Unknown error' },
};
}
}
/**
* Simulate component failure
*/
async simulateComponentFailure(componentName: string): Promise<void> {
switch (componentName) {
case 'llmAdapter':
// Simulate by temporarily replacing with failing implementation
(this.config.llmAdapter as any)._originalEvaluatePrompt = this.config.llmAdapter.evaluatePrompt;
this.config.llmAdapter.evaluatePrompt = async () => {
throw new Error('Simulated LLM adapter failure');
};
break;
case 'trajectoryStore':
// Simulate storage failure
(this.config.trajectoryStore as any)._originalSave = this.config.trajectoryStore.save;
this.config.trajectoryStore.save = async () => {
throw new Error('Simulated trajectory store failure');
};
break;
default:
throw new Error(`Unknown component: ${componentName}`);
}
}
/**
* Fill memory to capacity for testing optimization triggers
*/
async fillMemoryToCapacity(): Promise<void> {
const candidates = await this.createOptimizationCandidates();
// Add many candidates to fill frontier to capacity
for (let i = 0; i < 200; i++) {
const candidate = { ...candidates[i % candidates.length] };
candidate.id = `capacity-test-${i}`;
candidate.averageScore = Math.random();
const fullCandidate: PromptCandidate = {
id: candidate.id || `prompt-${Date.now()}-${i}`,
content: candidate.content || 'Test prompt content',
generation: candidate.generation || 0,
taskPerformance: candidate.taskPerformance || new Map(),
averageScore: candidate.averageScore || 0,
rolloutCount: candidate.rolloutCount || 0,
createdAt: candidate.createdAt || new Date(),
lastEvaluated: candidate.lastEvaluated || new Date(),
mutationType: candidate.mutationType || 'initial',
...(candidate.parentId && { parentId: candidate.parentId })
};
await this.config.paretoFrontier.addCandidate(fullCandidate);
}
}
/**
* Simulate memory corruption
*/
async simulateMemoryCorruption(): Promise<void> {
// Create invalid trajectory data
const corruptTrajectory = {
id: 'corrupt-trajectory',
promptId: null as any, // Invalid: should be string
taskId: 'valid-task',
timestamp: 'invalid-date' as any, // Invalid: should be Date
steps: 'not-an-array' as any, // Invalid: should be array
finalResult: {
success: 'maybe' as any, // Invalid: should be boolean
score: 'high' as any, // Invalid: should be number
output: undefined,
},
llmCalls: [],
toolCalls: [],
totalTokens: -1, // Invalid: should be positive
executionTime: 'long' as any, // Invalid: should be number
};
try {
await this.config.trajectoryStore.save(corruptTrajectory as any);
} catch (error) {
// Expected to fail - this simulates corruption in storage
}
}
/**
* Create resource exhaustion scenario
*/
async createResourceExhaustionScenario(): Promise<ScenarioResult> {
const scenarioId = this.generateScenarioId('resource-exhaustion');
const startTime = Date.now();
// Use scenarioId to track this scenario
const metadata = { scenarioId };
try {
// Simulate resource-intensive operations
const promises = [];
for (let i = 0; i < 100; i++) {
const promise = this.createLargeMemoryOperation(i);
promises.push(promise);
}
// Attempt to run all operations concurrently
await Promise.all(promises);
return {
success: true,
completed: true,
executionTime: Date.now() - startTime,
errorHandled: false,
};
} catch (error) {
// Expected to fail due to resource exhaustion
return {
success: false,
completed: true,
executionTime: Date.now() - startTime,
errorHandled: true,
metadata: { ...metadata, error: error instanceof Error ? error.message : 'Resource exhaustion' },
};
}
}
/**
* Introduce data corruption for recovery testing
*/
async introduceDataCorruption(): Promise<void> {
// Create multiple corrupt trajectories with different corruption types
const corruptionTypes = [
{ field: 'promptId', value: null },
{ field: 'timestamp', value: 'invalid-date' },
{ field: 'steps', value: 'not-array' },
{ field: 'finalResult.score', value: 'not-number' },
];
for (let i = 0; i < corruptionTypes.length; i++) {
const corruption = corruptionTypes[i]!;
const trajectory = await this.createSampleTrajectory();
trajectory.id = `corrupt-${i}`;
// Introduce specific corruption
if (corruption!.field === 'finalResult.score') {
(trajectory.finalResult as any).score = corruption!.value;
} else {
(trajectory as any)[corruption!.field] = corruption!.value;
}
try {
await this.config.trajectoryStore.save(trajectory as any);
} catch (error) {
// Some corruptions might be caught during save
}
}
}
/**
* Create trajectory for a specific candidate
*/
private async createTrajectoryForCandidate(
candidate: PromptCandidate,
taskContext: TaskContext
): Promise<ExecutionTrajectory> {
const steps: ExecutionStep[] = [
{
stepNumber: 1,
action: 'prompt_initialization',
reasoning: `Initialize with prompt: ${candidate.id}`,
timestamp: new Date(),
},
{
stepNumber: 2,
action: 'task_execution',
reasoning: `Execute task: ${taskContext.taskId}`,
toolName: 'task_executor',
toolInput: { prompt: candidate.content, task: taskContext },
toolOutput: { result: 'success', score: candidate.averageScore },
timestamp: new Date(),
},
];
return {
id: `trajectory-${candidate.id}-${taskContext.taskId}`,
promptId: candidate.id,
taskId: taskContext.taskId,
timestamp: new Date(),
steps,
finalResult: {
success: candidate.averageScore > 0.5,
score: candidate.averageScore,
output: { candidateId: candidate.id, taskId: taskContext.taskId },
},
llmCalls: [],
toolCalls: [],
totalTokens: Math.floor(Math.random() * 500) + 100,
executionTime: Math.floor(Math.random() * 2000) + 500,
};
}
/**
* Create large memory operation for testing
*/
private async createLargeMemoryOperation(index: number): Promise<void> {
// Create memory-intensive operation
const largeArray = new Array(10000).fill(0).map((_, i) => ({
id: `memory-item-${index}-${i}`,
data: 'x'.repeat(100), // 100 bytes each
timestamp: Date.now(),
}));
// Simulate processing
await this.sleep(Math.random() * 100);
// Try to store (might fail due to memory pressure)
const trajectory = await this.createSampleTrajectory();
trajectory.id = `memory-operation-${index}`;
(trajectory as any).largeData = largeArray;
await this.config.trajectoryStore.save(trajectory);
}
/**
* Generate unique scenario ID
*/
private generateScenarioId(prefix: string): string {
return `${prefix}-${Date.now()}-${++this.scenarioCounter}`;
}
/**
* Sleep utility for timing control
*/
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
// Note: TestScenarios and ScenarioResult are already exported