prompt-evolution.test.ts•10.4 kB
// prompt-evolution.test.ts - Tests for Evolution Engine
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { EvolutionEngine } from './prompt-evolution';
import type {
EvolutionConfig,
EvolutionResult,
PromptCandidate,
ExecutionTrajectory,
TaskContext,
StartEvolutionParams
} from '../types/gepa';
import { ParetoFrontier, type ParetoFrontierConfig } from './pareto-frontier';
import { ReflectionEngine, type ReflectionEngineDependencies } from './reflection-engine';
import { PromptMutator, type PromptMutatorConfig } from '../services/prompt-mutator';
import { LLMAdapter } from '../services/llm-adapter';
import { TrajectoryStore } from './trajectory-store';
// Mock dependencies
vi.mock('../services/llm-adapter.js');
vi.mock('./pareto-frontier.js');
vi.mock('./reflection-engine.js');
vi.mock('../services/prompt-mutator.js');
vi.mock('./trajectory-store.js');
describe('EvolutionEngine', () => {
let evolutionEngine: EvolutionEngine;
let mockLLMAdapter: any;
let mockParetoFrontier: any;
let mockReflectionEngine: any;
let mockPromptMutator: any;
let mockTrajectoryStore: any;
let config: EvolutionConfig;
let samplePromptCandidate: PromptCandidate;
let sampleTrajectory: ExecutionTrajectory;
let sampleTaskContext: TaskContext;
beforeEach(() => {
// Create proper mocks with all required methods
mockLLMAdapter = {
evaluatePrompt: vi.fn(),
analyzeTrajectory: vi.fn(),
callLLM: vi.fn(),
generateMutation: vi.fn(),
shutdown: vi.fn()
};
mockTrajectoryStore = {
save: vi.fn(),
query: vi.fn(),
get: vi.fn(),
delete: vi.fn()
};
mockParetoFrontier = {
addCandidate: vi.fn(),
sampleCandidate: vi.fn(),
getFrontier: vi.fn(),
getConvergenceMetrics: vi.fn(),
isDominated: vi.fn(),
getDominatedCandidates: vi.fn(),
computeHypervolume: vi.fn(),
optimizeArchive: vi.fn(),
size: vi.fn(),
clear: vi.fn(),
getStatistics: vi.fn()
};
mockReflectionEngine = {
analyzeTrajectory: vi.fn(),
analyzeBatch: vi.fn(),
findPatternsForPrompt: vi.fn(),
config: {
maxAnalysisDepth: 5,
confidenceThreshold: 0.8,
patternMinFrequency: 3,
batchSize: 10,
enableCaching: false,
cacheTimeout: 3600000
}
};
mockPromptMutator = {
generateReflectiveMutations: vi.fn(),
generateCrossoverMutations: vi.fn(),
generateAdaptiveMutations: vi.fn(),
validatePromptStructure: vi.fn(),
validatePromptLength: vi.fn(),
validateContentSafety: vi.fn(),
validateEssentialComponents: vi.fn(),
applyMutationConstraints: vi.fn(),
calculateMutationDivergence: vi.fn(),
validateMutationDivergence: vi.fn(),
calculateGeneticDiversity: vi.fn(),
calculateInbreedingCoefficient: vi.fn(),
shouldPreventCrossover: vi.fn(),
calculateGenerationStatistics: vi.fn(),
getResourceUsageMetrics: vi.fn(),
shutdown: vi.fn()
};
// Setup config
config = {
taskDescription: 'Test task',
seedPrompt: 'You are a helpful assistant',
maxGenerations: 5,
populationSize: 10,
mutationRate: 0.3
};
samplePromptCandidate = {
id: 'prompt-1',
content: 'You are a helpful assistant',
generation: 0,
taskPerformance: new Map([['test-task', 0.8]]),
averageScore: 0.8,
rolloutCount: 5,
createdAt: new Date(),
lastEvaluated: new Date(),
mutationType: 'initial'
};
sampleTrajectory = {
id: 'trajectory-1',
promptId: 'prompt-1',
taskId: 'test-task',
context: { description: 'test', category: 'testing', difficulty: 'medium', requiredCapabilities: [], expectedDuration: 30 },
steps: [],
finalResult: { success: true, score: 0.8, output: 'success', error: null },
createdAt: new Date(),
completedAt: new Date()
};
sampleTaskContext = {
description: 'Test task',
category: 'testing',
difficulty: 'medium',
requiredCapabilities: ['reasoning'],
expectedDuration: 30
};
// Setup mock behaviors
mockParetoFrontier.addCandidate.mockResolvedValue(true);
mockParetoFrontier.sampleCandidate.mockResolvedValue(samplePromptCandidate);
mockParetoFrontier.getFrontier.mockReturnValue([{
candidate: samplePromptCandidate,
objectives: new Map([['performance', 0.8]]),
dominationCount: 0,
rank: 1
}]);
mockParetoFrontier.getConvergenceMetrics.mockReturnValue({
diversity: 0.5,
spacing: 0.4,
spread: 0.6,
hypervolume: 0.7,
generationalDistance: 0.3
});
mockPromptMutator.generateReflectiveMutations.mockResolvedValue([samplePromptCandidate]);
mockPromptMutator.generateCrossoverMutations.mockResolvedValue([samplePromptCandidate]);
mockPromptMutator.generateAdaptiveMutations.mockResolvedValue([samplePromptCandidate]);
mockPromptMutator.calculateGeneticDiversity.mockReturnValue(0.5);
mockReflectionEngine.analyzeBatch.mockResolvedValue({
trajectoryIds: ['trajectory-1'],
commonPatterns: [],
recommendations: [],
overallConfidence: 0.8
});
mockLLMAdapter.evaluatePrompt.mockResolvedValue({
score: 0.8,
trajectory: sampleTrajectory,
metrics: { performance: 0.8, efficiency: 0.7 }
});
mockTrajectoryStore.save.mockResolvedValue(undefined);
mockTrajectoryStore.query.mockResolvedValue([]);
// Create evolution engine with mocked dependencies
evolutionEngine = new EvolutionEngine({
llmAdapter: mockLLMAdapter,
paretoFrontier: mockParetoFrontier,
reflectionEngine: mockReflectionEngine,
promptMutator: mockPromptMutator,
trajectoryStore: mockTrajectoryStore,
config
});
});
describe('constructor', () => {
it('should initialize with valid dependencies', () => {
expect(evolutionEngine).toBeDefined();
expect(evolutionEngine.config).toEqual(expect.objectContaining(config));
});
it('should validate required dependencies', () => {
expect(() => new EvolutionEngine({
llmAdapter: null as any,
paretoFrontier: mockParetoFrontier,
reflectionEngine: mockReflectionEngine,
promptMutator: mockPromptMutator,
trajectoryStore: mockTrajectoryStore,
config
})).toThrow('LLM adapter is required');
});
it('should set default configuration values', () => {
const engineWithDefaults = new EvolutionEngine({
llmAdapter: mockLLMAdapter,
paretoFrontier: mockParetoFrontier,
reflectionEngine: mockReflectionEngine,
promptMutator: mockPromptMutator,
trajectoryStore: mockTrajectoryStore,
config: { taskDescription: 'test' }
});
expect(engineWithDefaults.config.maxGenerations).toBe(10);
expect(engineWithDefaults.config.populationSize).toBe(20);
expect(engineWithDefaults.config.mutationRate).toBe(0.4);
});
});
describe('detectConvergence', () => {
it('should detect convergence based on diversity metrics', () => {
const lowDiversityMetrics = {
diversity: 0.01,
spacing: 0.02,
spread: 0.01,
hypervolume: 0.95,
generationalDistance: 0.05
};
const converged = evolutionEngine.detectConvergence(lowDiversityMetrics, 3);
expect(converged).toBe(true);
});
it('should not detect convergence with high diversity', () => {
const highDiversityMetrics = {
diversity: 0.5,
spacing: 0.4,
spread: 0.6,
hypervolume: 0.7,
generationalDistance: 0.3
};
const converged = evolutionEngine.detectConvergence(highDiversityMetrics, 3);
expect(converged).toBe(false);
});
it('should require minimum generations before convergence', () => {
const lowDiversityMetrics = {
diversity: 0.01,
spacing: 0.02,
spread: 0.01,
hypervolume: 0.95,
generationalDistance: 0.05
};
const converged = evolutionEngine.detectConvergence(lowDiversityMetrics, 1);
expect(converged).toBe(false);
});
});
describe('getBestCandidate', () => {
it('should return highest scoring candidate', () => {
const candidates = [
{ ...samplePromptCandidate, id: 'c1', averageScore: 0.7 },
{ ...samplePromptCandidate, id: 'c2', averageScore: 0.9 },
{ ...samplePromptCandidate, id: 'c3', averageScore: 0.8 }
];
const best = evolutionEngine.getBestCandidate(candidates);
expect(best.id).toBe('c2');
expect(best.averageScore).toBe(0.9);
});
it('should handle empty candidate list', () => {
expect(() => evolutionEngine.getBestCandidate([])).toThrow('No candidates available');
});
});
describe('updateEvolutionHistory', () => {
it('should track evolution progress over generations', () => {
const history: PromptCandidate[][] = [];
const generation = [samplePromptCandidate];
evolutionEngine.updateEvolutionHistory(history, generation, 1);
expect(history.length).toBe(1);
expect(history[0]).toEqual(generation);
});
it('should limit history size to prevent memory issues', () => {
const history: PromptCandidate[][] = Array(100).fill([samplePromptCandidate]);
const generation = [samplePromptCandidate];
evolutionEngine.updateEvolutionHistory(history, generation, 101);
expect(history.length).toBeLessThanOrEqual(50);
});
});
describe('getEvolutionMetrics', () => {
it('should return comprehensive evolution metrics', () => {
const history = [[samplePromptCandidate]];
const metrics = evolutionEngine.getEvolutionMetrics(history, 5);
expect(metrics).toEqual(expect.objectContaining({
totalGenerations: expect.any(Number),
totalRollouts: expect.any(Number),
bestScore: expect.any(Number),
averageScore: expect.any(Number),
convergenceMetrics: expect.any(Object)
}));
});
it('should handle empty history', () => {
const metrics = evolutionEngine.getEvolutionMetrics([], 0);
expect(metrics.totalGenerations).toBe(0);
expect(metrics.totalRollouts).toBe(0);
});
});
});