prompt-mutator.test.ts•36.2 kB
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { EventEmitter } from 'events';
// Mock the LLM adapter
vi.mock('./llm-adapter.js', () => ({
LLMAdapter: vi.fn(),
}));
import { PromptMutator } from './prompt-mutator';
import { LLMAdapter } from './llm-adapter';
import {
PromptCandidate,
ReflectionAnalysis,
ExecutionTrajectory,
} from '../types/gepa';
// Mock LLM adapter class
class MockLLMAdapter extends EventEmitter {
public callLLM = vi.fn();
public analyzeTrajectory = vi.fn();
public generateMutation = vi.fn();
public generateResponse = vi.fn();
public shutdown = vi.fn();
constructor() {
super();
}
}
describe('PromptMutator', () => {
let mutator: PromptMutator;
let mockLLM: MockLLMAdapter;
// Extended PromptCandidate interface for testing
interface ExtendedPromptCandidate extends PromptCandidate {
lineage?: string[];
mutationHistory?: Array<{
strategy: string;
parents?: string[];
timestamp: Date;
}>;
taskContext?: any;
}
// Test data fixtures
const mockPromptCandidate: ExtendedPromptCandidate = {
id: 'prompt-123',
content: 'You are a helpful AI assistant. Solve the given task step by step.',
generation: 1,
taskPerformance: new Map([
['task1', 0.85],
['task2', 0.70],
]),
averageScore: 0.775,
rolloutCount: 5,
createdAt: new Date('2024-01-01'),
lastEvaluated: new Date('2024-01-02'),
mutationType: 'initial',
lineage: [],
mutationHistory: [],
};
const mockExecutionTrajectory: ExecutionTrajectory = {
id: 'traj-456',
promptId: 'prompt-123',
taskId: 'task1',
timestamp: new Date('2024-01-02'),
steps: [
{
stepNumber: 1,
action: 'analyze_problem',
reasoning: 'Breaking down the problem',
timestamp: new Date('2024-01-02'),
},
{
stepNumber: 2,
action: 'generate_solution',
error: 'Missing key information',
timestamp: new Date('2024-01-02'),
},
],
finalResult: {
success: false,
score: 0.3,
output: 'Incomplete solution',
error: 'Missing key information',
},
llmCalls: [],
toolCalls: [],
totalTokens: 250,
executionTime: 15000,
};
const mockReflectionAnalysis: ReflectionAnalysis = {
trajectoryId: 'traj-456',
promptId: 'prompt-123',
diagnosis: {
failurePoint: 'Step 2: generate_solution',
rootCause: 'Insufficient guidance for information gathering',
moduleResponsibility: new Map([['reasoning', 0.7], ['execution', 0.3]]),
patterns: [
{
type: 'missing_information',
frequency: 3,
description: 'Agent frequently fails to gather required information',
examples: ['Missing key details', 'Incomplete analysis'],
},
],
},
suggestions: [
{
type: 'add_instruction',
targetSection: 'problem_analysis',
proposedChange: 'Add explicit instruction to identify and gather all required information before proceeding',
rationale: 'This will help prevent incomplete solutions due to missing information',
expectedImpact: 0.8,
},
],
confidence: 0.85,
rationale: 'Clear pattern of information gathering failures observed',
};
beforeEach(() => {
vi.clearAllMocks();
mockLLM = new MockLLMAdapter();
// Mock the LLMAdapter constructor to return our mockLLM instance
(LLMAdapter as any).mockImplementation(() => mockLLM);
mutator = new PromptMutator();
// Set up default mock implementations to avoid empty arrays
mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis);
mockLLM.generateMutation.mockResolvedValue('Generated mutation content');
mockLLM.callLLM.mockResolvedValue({
content: 'Generated LLM response',
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date()
});
});
afterEach(() => {
mutator?.shutdown();
});
describe('Initialization', () => {
it('should initialize with default configuration', () => {
expect(mutator).toBeDefined();
expect(mutator.config.maxMutationsPerGeneration).toBe(5);
expect(mutator.mutationStrategies).toContain('reflective');
expect(mutator.mutationStrategies).toContain('crossover');
expect(mutator.mutationStrategies).toContain('adaptive');
});
it('should accept custom configuration', () => {
const customMutator = new PromptMutator({
maxMutationsPerGeneration: 10,
mutationRate: 0.7,
crossoverRate: 0.4,
adaptiveWeight: 0.8,
fitnessThreshold: 0.9,
});
expect(customMutator.config.maxMutationsPerGeneration).toBe(10);
expect(customMutator.config.mutationRate).toBe(0.7);
expect(customMutator.config.crossoverRate).toBe(0.4);
expect(customMutator.config.adaptiveWeight).toBe(0.8);
expect(customMutator.config.fitnessThreshold).toBe(0.9);
});
it('should validate configuration parameters', () => {
expect(() => new PromptMutator({
mutationRate: 1.5, // Invalid: > 1.0
})).toThrow('Mutation rate must be between 0 and 1');
expect(() => new PromptMutator({
crossoverRate: -0.1, // Invalid: < 0
})).toThrow('Crossover rate must be between 0 and 1');
expect(() => new PromptMutator({
maxMutationsPerGeneration: 0, // Invalid: must be positive
})).toThrow('Max mutations per generation must be positive');
});
});
describe('Reflective Mutation', () => {
it('should perform reflective mutation based on trajectory analysis', async () => {
// Mock LLM responses
mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis);
mockLLM.generateMutation.mockResolvedValue(
'You are a helpful AI assistant. Before solving any task, first identify and gather all required information. Then solve the task step by step.'
);
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
expect(mutations).toHaveLength(1);
expect(mutations[0].content).toContain('identify and gather all required information');
expect(mutations[0].mutationType).toBe('reflection');
expect(mutations[0].parentId).toBe(mockPromptCandidate.id);
expect(mutations[0].generation).toBe(mockPromptCandidate.generation + 1);
// Verify LLM was called correctly
expect(mockLLM.analyzeTrajectory).toHaveBeenCalledWith(
mockExecutionTrajectory,
mockPromptCandidate.content
);
expect(mockLLM.generateMutation).toHaveBeenCalledWith(
mockPromptCandidate.content,
mockReflectionAnalysis.suggestions[0]
);
});
it('should handle multiple trajectories in reflective analysis', async () => {
const secondTrajectory = { ...mockExecutionTrajectory, id: 'traj-789' };
const trajectories = [mockExecutionTrajectory, secondTrajectory];
mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis);
mockLLM.generateMutation.mockResolvedValue('Improved prompt content');
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
trajectories
);
expect(mockLLM.analyzeTrajectory).toHaveBeenCalledTimes(2);
expect(mutations.length).toBeGreaterThan(0);
});
it('should prioritize high-confidence improvements', async () => {
const lowConfidenceAnalysis = {
...mockReflectionAnalysis,
confidence: 0.3,
};
const highConfidenceAnalysis = {
...mockReflectionAnalysis,
confidence: 0.95,
suggestions: [
{
type: 'add_constraint' as const,
targetSection: 'execution',
proposedChange: 'Add validation step',
rationale: 'Prevents errors',
expectedImpact: 0.9,
},
],
};
mockLLM.analyzeTrajectory
.mockResolvedValueOnce(lowConfidenceAnalysis)
.mockResolvedValueOnce(highConfidenceAnalysis);
mockLLM.generateMutation.mockResolvedValue('High confidence mutation');
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory, { ...mockExecutionTrajectory, id: 'traj-2' }]
);
// Should prioritize high confidence suggestion
expect(mutations).toHaveLength(1);
expect(mockLLM.generateMutation).toHaveBeenCalledWith(
mockPromptCandidate.content,
highConfidenceAnalysis.suggestions[0]
);
});
it('should handle analysis errors gracefully', async () => {
mockLLM.analyzeTrajectory.mockRejectedValue(new Error('Analysis failed'));
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
expect(mutations).toHaveLength(0);
expect(mutator.getErrorCount()).toBe(1);
});
it('should filter out low-impact suggestions', async () => {
const lowImpactAnalysis = {
...mockReflectionAnalysis,
suggestions: [
{
type: 'add_instruction' as const,
targetSection: 'minor_detail',
proposedChange: 'Small tweak',
rationale: 'Minor improvement',
expectedImpact: 0.1, // Below threshold
},
],
};
mockLLM.analyzeTrajectory.mockResolvedValue(lowImpactAnalysis);
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
expect(mutations).toHaveLength(0); // No mutations due to low impact
});
});
describe('Crossover Mutation', () => {
it('should generate crossover mutations from parent prompts', async () => {
const parent1 = {
...mockPromptCandidate,
id: 'parent1',
content: 'You are an expert problem solver. Always break down complex problems.',
averageScore: 0.85,
};
const parent2 = {
...mockPromptCandidate,
id: 'parent2',
content: 'You are a careful analyst. Verify each step before proceeding.',
averageScore: 0.80,
};
mockLLM.callLLM.mockResolvedValue({
content: 'You are an expert problem solver and careful analyst. Always break down complex problems and verify each step before proceeding.',
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
const mutations = await mutator.generateCrossoverMutations([parent1, parent2]);
expect(mutations).toHaveLength(1);
expect(mutations[0].mutationType).toBe('crossover');
expect(mutations[0].content).toContain('expert problem solver');
expect(mutations[0].content).toContain('careful analyst');
expect(mutations[0].generation).toBe(Math.max(parent1.generation, parent2.generation) + 1);
// Should have lineage from both parents
expect(mutations[0]['lineage']).toContain(parent1.id);
expect(mutations[0]['lineage']).toContain(parent2.id);
});
it('should select diverse parent pairs for crossover', async () => {
const parents = [
{ ...mockPromptCandidate, id: 'p1', content: 'Systematic approach', averageScore: 0.9 },
{ ...mockPromptCandidate, id: 'p2', content: 'Creative thinking', averageScore: 0.85 },
{ ...mockPromptCandidate, id: 'p3', content: 'Analytical method', averageScore: 0.8 },
{ ...mockPromptCandidate, id: 'p4', content: 'Intuitive process', averageScore: 0.75 },
];
mockLLM.callLLM.mockResolvedValue({
content: 'Combined approach',
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
const mutations = await mutator.generateCrossoverMutations(parents);
// Should generate multiple diverse crossovers
expect(mutations.length).toBeGreaterThan(1);
expect(mutations.length).toBeLessThanOrEqual(mutator.config.maxMutationsPerGeneration);
// Verify diversity in parent selection
const parentPairs = mutations.map(m => m['lineage']).map(lineage => lineage.sort().join(','));
const uniquePairs = new Set(parentPairs);
expect(uniquePairs.size).toBeGreaterThan(1);
});
it('should weight parent selection by fitness', async () => {
const highFitnessParent = {
...mockPromptCandidate,
id: 'high',
content: 'High performing prompt',
averageScore: 0.95,
};
const lowFitnessParent = {
...mockPromptCandidate,
id: 'low',
content: 'Lower performing prompt',
averageScore: 0.5,
};
mockLLM.callLLM.mockResolvedValue({
content: 'Fitness-weighted crossover',
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
// Run multiple crossovers to test selection bias
const allMutations = [];
for (let i = 0; i < 10; i++) {
const mutations = await mutator.generateCrossoverMutations([
highFitnessParent,
lowFitnessParent,
]);
allMutations.push(...mutations);
}
// High fitness parent should be selected more often
const highFitnessSelections = allMutations.filter(m =>
m['lineage']?.includes(highFitnessParent.id)
).length;
const lowFitnessSelections = allMutations.filter(m =>
m['lineage']?.includes(lowFitnessParent.id)
).length;
expect(highFitnessSelections).toBeGreaterThanOrEqual(lowFitnessSelections);
});
it('should handle crossover generation errors', async () => {
const parents = [
{ ...mockPromptCandidate, id: 'p1' },
{ ...mockPromptCandidate, id: 'p2' },
];
mockLLM.callLLM.mockRejectedValue(new Error('Crossover generation failed'));
const mutations = await mutator.generateCrossoverMutations(parents);
expect(mutations).toHaveLength(0);
expect(mutator.getErrorCount()).toBe(1);
});
it('should prevent duplicate crossovers', async () => {
const parent1 = { ...mockPromptCandidate, id: 'p1', content: 'Same content' };
const parent2 = { ...mockPromptCandidate, id: 'p2', content: 'Same content' };
mockLLM.callLLM.mockResolvedValue({
content: 'Same content', // Identical output
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
const mutations = await mutator.generateCrossoverMutations([parent1, parent2]);
// Should not generate mutation if content is identical to parents
expect(mutations).toHaveLength(0);
});
});
describe('Adaptive Mutation', () => {
it('should adapt mutations based on task context', async () => {
const taskContext = {
taskId: 'coding-task',
description: 'Generate Python code for data analysis',
category: 'programming',
difficulty: 'medium' as const,
requiredCapabilities: ['coding', 'data-analysis'],
expectedDuration: 1800,
};
mockLLM.callLLM.mockResolvedValue({
content: 'You are an expert Python programmer specializing in data analysis. Write clean, well-documented code with proper error handling.',
model: 'claude-via-cli',
tokens: { prompt: 120, completion: 60, total: 180 },
finishReason: 'stop' as const,
latency: 1200,
timestamp: new Date(),
});
const mutations = await mutator.generateAdaptiveMutations(
mockPromptCandidate,
taskContext
);
expect(mutations).toHaveLength(1);
expect(mutations[0].mutationType).toBe('adaptive');
expect(mutations[0].content).toContain('Python programmer');
expect(mutations[0].content).toContain('data analysis');
expect(mutations[0]['taskContext']).toEqual(taskContext);
});
it('should adjust mutations for different difficulty levels', async () => {
const easyTask = {
taskId: 'easy-task',
description: 'Simple arithmetic',
category: 'math',
difficulty: 'easy' as const,
requiredCapabilities: ['basic-math'],
expectedDuration: 300,
};
const hardTask = {
taskId: 'hard-task',
description: 'Complex optimization problem',
category: 'advanced-math',
difficulty: 'hard' as const,
requiredCapabilities: ['optimization', 'advanced-math'],
expectedDuration: 3600,
};
mockLLM.callLLM
.mockResolvedValueOnce({
content: 'You are a math assistant. Solve this simple problem clearly.',
model: 'claude-via-cli',
tokens: { prompt: 80, completion: 40, total: 120 },
finishReason: 'stop' as const,
latency: 800,
timestamp: new Date(),
})
.mockResolvedValueOnce({
content: 'You are an expert mathematician with deep knowledge of optimization theory. Approach this complex problem systematically, breaking it into manageable sub-problems.',
model: 'claude-via-cli',
tokens: { prompt: 150, completion: 80, total: 230 },
finishReason: 'stop' as const,
latency: 1500,
timestamp: new Date(),
});
const easyMutations = await mutator.generateAdaptiveMutations(
mockPromptCandidate,
easyTask
);
const hardMutations = await mutator.generateAdaptiveMutations(
mockPromptCandidate,
hardTask
);
expect(easyMutations[0].content).toContain('simple');
expect(hardMutations[0].content).toContain('expert');
expect(hardMutations[0].content).toContain('systematically');
expect(hardMutations[0].content.length).toBeGreaterThan(easyMutations[0].content.length);
});
it('should incorporate capability requirements', async () => {
const multiCapabilityTask = {
taskId: 'multi-task',
description: 'Code review and documentation',
category: 'software-engineering',
difficulty: 'medium' as const,
requiredCapabilities: ['code-review', 'documentation', 'best-practices'],
expectedDuration: 2400,
};
mockLLM.callLLM.mockResolvedValue({
content: 'You are a senior software engineer skilled in code review, documentation, and best practices. Provide thorough analysis and clear documentation.',
model: 'claude-via-cli',
tokens: { prompt: 140, completion: 70, total: 210 },
finishReason: 'stop' as const,
latency: 1300,
timestamp: new Date(),
});
const mutations = await mutator.generateAdaptiveMutations(
mockPromptCandidate,
multiCapabilityTask
);
const content = mutations[0].content;
expect(content).toContain('code review');
expect(content).toContain('documentation');
expect(content).toContain('best practices');
});
it('should handle unknown task categories gracefully', async () => {
const unknownTask = {
taskId: 'unknown-task',
description: 'Novel task type',
category: 'unknown-category',
difficulty: 'medium' as const,
requiredCapabilities: ['general-reasoning'],
expectedDuration: 1200,
};
mockLLM.callLLM.mockResolvedValue({
content: 'You are a versatile AI assistant capable of handling diverse tasks. Apply careful reasoning and adapt your approach as needed.',
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
const mutations = await mutator.generateAdaptiveMutations(
mockPromptCandidate,
unknownTask
);
expect(mutations).toHaveLength(1);
expect(mutations[0].content).toContain('versatile');
expect(mutations[0].content).toContain('adapt');
});
});
describe('Mutation Validation and Constraints', () => {
it('should validate prompt structure is maintained', () => {
const validPrompt = 'You are a helpful assistant. Solve problems step by step.';
const invalidPrompt = ''; // Empty prompt
const malformedPrompt = 'Not a proper instruction'; // No clear role
expect(mutator.validatePromptStructure(validPrompt)).toBe(true);
expect(mutator.validatePromptStructure(invalidPrompt)).toBe(false);
expect(mutator.validatePromptStructure(malformedPrompt)).toBe(false);
});
it('should enforce minimum and maximum prompt length', () => {
const tooShort = 'Help';
const justRight = 'You are a helpful AI assistant. Please solve the given task carefully.';
const tooLong = 'You are a ' + 'very '.repeat(1000) + 'helpful assistant.';
expect(mutator.validatePromptLength(tooShort)).toBe(false);
expect(mutator.validatePromptLength(justRight)).toBe(true);
expect(mutator.validatePromptLength(tooLong)).toBe(false);
});
it('should detect and prevent harmful content', () => {
const safePrompt = 'You are a helpful assistant for educational purposes.';
const harmfulPrompt = 'You are an assistant that helps with illegal activities.';
const biasedPrompt = 'You prefer certain groups of people over others.';
expect(mutator.validateContentSafety(safePrompt)).toBe(true);
expect(mutator.validateContentSafety(harmfulPrompt)).toBe(false);
expect(mutator.validateContentSafety(biasedPrompt)).toBe(false);
});
it('should preserve essential prompt components', () => {
const basePrompt = 'You are a helpful AI assistant. Follow these rules: 1) Be accurate 2) Be helpful';
const validMutation = 'You are an expert AI assistant. Follow these rules: 1) Be accurate 2) Be helpful 3) Be concise';
const invalidMutation = 'You are an assistant.'; // Missing rules
expect(mutator.validateEssentialComponents(basePrompt, validMutation)).toBe(true);
expect(mutator.validateEssentialComponents(basePrompt, invalidMutation)).toBe(false);
});
it('should enforce mutation constraints', async () => {
const constrainedMutator = new PromptMutator({
maxPromptLength: 200,
minPromptLength: 50,
requireRoleDefinition: true,
prohibitedTerms: ['dangerous', 'harmful'],
});
// Mock a mutation that violates constraints
mockLLM.generateMutation.mockResolvedValue('You are dangerous.'); // Too short and contains prohibited term
const validMutation = await constrainedMutator.applyMutationConstraints(
'You are a dangerous assistant that provides harmful advice.',
mockReflectionAnalysis.suggestions[0]
);
expect(validMutation).toBeNull(); // Should reject invalid mutation
});
it('should limit mutation divergence from parent', () => {
const parent = 'You are a helpful assistant for coding tasks.';
const slightMutation = 'You are a helpful assistant for programming tasks.'; // Similar
const majorMutation = 'You are a creative writing expert.'; // Very different
expect(mutator.calculateMutationDivergence(parent, slightMutation)).toBeLessThan(0.5);
expect(mutator.calculateMutationDivergence(parent, majorMutation)).toBeGreaterThan(0.7);
// Should reject mutations that are too divergent
expect(mutator.validateMutationDivergence(parent, majorMutation, 0.6)).toBe(false);
expect(mutator.validateMutationDivergence(parent, slightMutation, 0.6)).toBe(true);
});
});
describe('Genetic Lineage Tracking', () => {
it('should track parent lineage in mutations', async () => {
mockLLM.generateMutation.mockResolvedValue('Mutated prompt content');
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
expect(mutations[0]).toHaveProperty('parentId', mockPromptCandidate.id);
expect(mutations[0]).toHaveProperty('lineage');
expect(mutations[0]['lineage']).toContain(mockPromptCandidate.id);
});
it('should maintain multi-generational lineage', async () => {
const grandparent = {
...mockPromptCandidate,
id: 'grandparent',
lineage: ['ancestor-1', 'ancestor-2'],
};
mockLLM.generateMutation.mockResolvedValue('Multi-gen mutation');
const mutations = await mutator.generateReflectiveMutations(
grandparent,
[mockExecutionTrajectory]
);
const lineage = mutations[0]['lineage'];
expect(lineage).toContain('ancestor-1');
expect(lineage).toContain('ancestor-2');
expect(lineage).toContain(grandparent.id);
});
it('should track mutation strategy in lineage', async () => {
const parent1 = { ...mockPromptCandidate, id: 'p1' };
const parent2 = { ...mockPromptCandidate, id: 'p2' };
mockLLM.callLLM.mockResolvedValue({
content: 'Crossover result',
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 50, total: 150 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
const mutations = await mutator.generateCrossoverMutations([parent1, parent2]);
expect(mutations[0]).toHaveProperty('mutationType', 'crossover');
expect(mutations[0]['mutationHistory']).toContain({
strategy: 'crossover',
parents: [parent1.id, parent2.id],
timestamp: expect.any(Date),
});
});
it('should calculate genetic diversity metrics', () => {
const population = [
{ ...mockPromptCandidate, id: 'p1', lineage: ['a1', 'a2'] },
{ ...mockPromptCandidate, id: 'p2', lineage: ['a1', 'a3'] },
{ ...mockPromptCandidate, id: 'p3', lineage: ['a4', 'a5'] },
];
const diversity = mutator.calculateGeneticDiversity(population);
expect(diversity).toBeGreaterThan(0);
expect(diversity).toBeLessThanOrEqual(1);
});
it('should prevent inbreeding in population', async () => {
const closelyRelated1 = {
...mockPromptCandidate,
id: 'related1',
lineage: ['common-ancestor', 'branch-a'],
};
const closelyRelated2 = {
...mockPromptCandidate,
id: 'related2',
lineage: ['common-ancestor', 'branch-b'],
};
const inbreedingCoeff = mutator.calculateInbreedingCoefficient(
closelyRelated1,
closelyRelated2
);
expect(inbreedingCoeff).toBeGreaterThan(0.5); // High relatedness
expect(mutator.shouldPreventCrossover(closelyRelated1, closelyRelated2)).toBe(true);
});
it('should track generation statistics', () => {
const generation = [
{ ...mockPromptCandidate, generation: 3, mutationType: 'reflection' },
{ ...mockPromptCandidate, generation: 3, mutationType: 'crossover' },
{ ...mockPromptCandidate, generation: 3, mutationType: 'adaptive' },
];
const stats = mutator.calculateGenerationStatistics(generation);
expect(stats).toHaveProperty('averageFitness');
expect(stats).toHaveProperty('mutationTypeDistribution');
expect(stats).toHaveProperty('diversityIndex');
expect(stats.mutationTypeDistribution).toHaveProperty('reflection', 1);
expect(stats.mutationTypeDistribution).toHaveProperty('crossover', 1);
expect(stats.mutationTypeDistribution).toHaveProperty('adaptive', 1);
});
});
describe('Edge Cases', () => {
it('should handle minimal prompts without breaking', async () => {
const minimalPrompt = {
...mockPromptCandidate,
content: 'Help.',
};
mockLLM.generateMutation.mockResolvedValue('You are a helpful assistant.');
const mutations = await mutator.generateReflectiveMutations(
minimalPrompt,
[mockExecutionTrajectory]
);
expect(mutations).toHaveLength(1);
expect(mutations[0].content.length).toBeGreaterThan(minimalPrompt.content.length);
});
it('should handle very long prompts appropriately', async () => {
const longContent = 'You are a helpful assistant. ' + 'Please follow these detailed instructions: '.repeat(100);
const longPrompt = {
...mockPromptCandidate,
content: longContent,
};
mockLLM.generateMutation.mockResolvedValue('Optimized version: You are a helpful assistant with clear, concise instructions.');
const mutations = await mutator.generateReflectiveMutations(
longPrompt,
[mockExecutionTrajectory]
);
expect(mutations).toHaveLength(1);
expect(mutations[0].content.length).toBeLessThan(longPrompt.content.length);
});
it('should handle prompts with special characters and formatting', async () => {
const specialPrompt = {
...mockPromptCandidate,
content: 'You are an AI assistant.\n\nRules:\n1. Be helpful\n2. Use "quotes" properly\n3. Handle symbols: @#$%^&*()',
};
mockLLM.generateMutation.mockResolvedValue('Formatted mutation with proper structure.');
const mutations = await mutator.generateReflectiveMutations(
specialPrompt,
[mockExecutionTrajectory]
);
expect(mutations).toHaveLength(1);
expect(mutations[0].content).toBeDefined();
expect(typeof mutations[0].content).toBe('string');
});
it('should handle empty trajectory lists gracefully', async () => {
const mutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[]
);
expect(mutations).toHaveLength(0);
});
it('should handle single parent in crossover', async () => {
const singleParent = [mockPromptCandidate];
const mutations = await mutator.generateCrossoverMutations(singleParent);
expect(mutations).toHaveLength(0); // Cannot do crossover with single parent
});
it('should handle network timeouts and LLM failures', async () => {
mockLLM.callLLM.mockRejectedValue(new Error('Network timeout'));
mockLLM.generateMutation.mockRejectedValue(new Error('LLM service unavailable'));
const reflectiveMutations = await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
const crossoverMutations = await mutator.generateCrossoverMutations([
mockPromptCandidate,
{ ...mockPromptCandidate, id: 'other' },
]);
expect(reflectiveMutations).toHaveLength(0);
expect(crossoverMutations).toHaveLength(0);
expect(mutator.getErrorCount()).toBeGreaterThan(0);
});
it('should handle malformed LLM responses', async () => {
mockLLM.callLLM.mockResolvedValue({
content: '', // Empty response
model: 'claude-via-cli',
tokens: { prompt: 100, completion: 0, total: 100 },
finishReason: 'stop' as const,
latency: 1000,
timestamp: new Date(),
});
const mutations = await mutator.generateAdaptiveMutations(
mockPromptCandidate,
{
taskId: 'test',
description: 'test task',
category: 'test',
difficulty: 'easy',
requiredCapabilities: [],
expectedDuration: 100,
}
);
expect(mutations).toHaveLength(0); // Should reject empty responses
});
it('should handle concurrent mutation requests', async () => {
mockLLM.generateMutation.mockImplementation(async () => {
await new Promise(resolve => setTimeout(resolve, 100)); // Simulate delay
return 'Concurrent mutation result';
});
const promises = Array.from({ length: 5 }, () =>
mutator.generateReflectiveMutations(
{ ...mockPromptCandidate, id: `prompt-${Math.random()}` },
[mockExecutionTrajectory]
)
);
const results = await Promise.all(promises);
results.forEach(mutations => {
expect(mutations).toHaveLength(1);
expect(mutations[0].content).toBe('Concurrent mutation result');
});
});
it('should maintain thread safety with concurrent access', async () => {
let generationCounter = 0;
const concurrentMutator = new PromptMutator();
// Override internal counter for testing
concurrentMutator['generateId'] = () => `id-${++generationCounter}`;
mockLLM.generateMutation.mockResolvedValue('Thread safe mutation');
const concurrentPromises = Array.from({ length: 10 }, async (_, i) => {
const prompt = { ...mockPromptCandidate, id: `prompt-${i}` };
return mutator.generateReflectiveMutations(prompt, [mockExecutionTrajectory]);
});
const allResults = await Promise.all(concurrentPromises);
const allMutations = allResults.flat();
// All mutations should have unique IDs
const ids = allMutations.map(m => m.id);
const uniqueIds = new Set(ids);
expect(uniqueIds.size).toBe(ids.length);
});
});
describe('Performance and Resource Management', () => {
it('should limit concurrent LLM calls', async () => {
const mutator = new PromptMutator({ maxConcurrentMutations: 2 });
let activeCalls = 0;
let maxConcurrent = 0;
mockLLM.generateMutation.mockImplementation(async () => {
activeCalls++;
maxConcurrent = Math.max(maxConcurrent, activeCalls);
await new Promise(resolve => setTimeout(resolve, 100));
activeCalls--;
return 'Rate limited mutation';
});
const promises = Array.from({ length: 5 }, () =>
mutator.generateReflectiveMutations(
{ ...mockPromptCandidate, id: `prompt-${Math.random()}` },
[mockExecutionTrajectory]
)
);
await Promise.all(promises);
expect(maxConcurrent).toBeLessThanOrEqual(2);
});
it('should cache analysis results to avoid redundant LLM calls', async () => {
const mutator = new PromptMutator({ enableCaching: true });
mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis);
mockLLM.generateMutation.mockResolvedValue('Cached mutation result');
// First call
await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
// Second call with same trajectory should use cache
await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
expect(mockLLM.analyzeTrajectory).toHaveBeenCalledTimes(1); // Should be cached
});
it('should track resource usage metrics', async () => {
mockLLM.generateMutation.mockResolvedValue('Resource tracked mutation');
await mutator.generateReflectiveMutations(
mockPromptCandidate,
[mockExecutionTrajectory]
);
const metrics = mutator.getResourceUsageMetrics();
expect(metrics).toHaveProperty('totalMutations');
expect(metrics).toHaveProperty('averageLatency');
expect(metrics).toHaveProperty('errorRate');
expect(metrics).toHaveProperty('cacheHitRate');
expect(metrics.totalMutations).toBeGreaterThan(0);
});
it('should cleanup resources on shutdown', () => {
const mutator = new PromptMutator();
mutator.shutdown();
expect(mockLLM.shutdown).toHaveBeenCalled();
expect(mutator.getIsShutdown()).toBe(true);
});
});
});