Prompt Auto-Optimizer MCP

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

prompt-mutator.test.ts•36.2 kB

import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { EventEmitter } from 'events'; // Mock the LLM adapter vi.mock('./llm-adapter.js', () => ({ LLMAdapter: vi.fn(), })); import { PromptMutator } from './prompt-mutator'; import { LLMAdapter } from './llm-adapter'; import { PromptCandidate, ReflectionAnalysis, ExecutionTrajectory, } from '../types/gepa'; // Mock LLM adapter class class MockLLMAdapter extends EventEmitter { public callLLM = vi.fn(); public analyzeTrajectory = vi.fn(); public generateMutation = vi.fn(); public generateResponse = vi.fn(); public shutdown = vi.fn(); constructor() { super(); } } describe('PromptMutator', () => { let mutator: PromptMutator; let mockLLM: MockLLMAdapter; // Extended PromptCandidate interface for testing interface ExtendedPromptCandidate extends PromptCandidate { lineage?: string[]; mutationHistory?: Array<{ strategy: string; parents?: string[]; timestamp: Date; }>; taskContext?: any; } // Test data fixtures const mockPromptCandidate: ExtendedPromptCandidate = { id: 'prompt-123', content: 'You are a helpful AI assistant. Solve the given task step by step.', generation: 1, taskPerformance: new Map([ ['task1', 0.85], ['task2', 0.70], ]), averageScore: 0.775, rolloutCount: 5, createdAt: new Date('2024-01-01'), lastEvaluated: new Date('2024-01-02'), mutationType: 'initial', lineage: [], mutationHistory: [], }; const mockExecutionTrajectory: ExecutionTrajectory = { id: 'traj-456', promptId: 'prompt-123', taskId: 'task1', timestamp: new Date('2024-01-02'), steps: [ { stepNumber: 1, action: 'analyze_problem', reasoning: 'Breaking down the problem', timestamp: new Date('2024-01-02'), }, { stepNumber: 2, action: 'generate_solution', error: 'Missing key information', timestamp: new Date('2024-01-02'), }, ], finalResult: { success: false, score: 0.3, output: 'Incomplete solution', error: 'Missing key information', }, llmCalls: [], toolCalls: [], totalTokens: 250, executionTime: 15000, }; const mockReflectionAnalysis: ReflectionAnalysis = { trajectoryId: 'traj-456', promptId: 'prompt-123', diagnosis: { failurePoint: 'Step 2: generate_solution', rootCause: 'Insufficient guidance for information gathering', moduleResponsibility: new Map([['reasoning', 0.7], ['execution', 0.3]]), patterns: [ { type: 'missing_information', frequency: 3, description: 'Agent frequently fails to gather required information', examples: ['Missing key details', 'Incomplete analysis'], }, ], }, suggestions: [ { type: 'add_instruction', targetSection: 'problem_analysis', proposedChange: 'Add explicit instruction to identify and gather all required information before proceeding', rationale: 'This will help prevent incomplete solutions due to missing information', expectedImpact: 0.8, }, ], confidence: 0.85, rationale: 'Clear pattern of information gathering failures observed', }; beforeEach(() => { vi.clearAllMocks(); mockLLM = new MockLLMAdapter(); // Mock the LLMAdapter constructor to return our mockLLM instance (LLMAdapter as any).mockImplementation(() => mockLLM); mutator = new PromptMutator(); // Set up default mock implementations to avoid empty arrays mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis); mockLLM.generateMutation.mockResolvedValue('Generated mutation content'); mockLLM.callLLM.mockResolvedValue({ content: 'Generated LLM response', model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date() }); }); afterEach(() => { mutator?.shutdown(); }); describe('Initialization', () => { it('should initialize with default configuration', () => { expect(mutator).toBeDefined(); expect(mutator.config.maxMutationsPerGeneration).toBe(5); expect(mutator.mutationStrategies).toContain('reflective'); expect(mutator.mutationStrategies).toContain('crossover'); expect(mutator.mutationStrategies).toContain('adaptive'); }); it('should accept custom configuration', () => { const customMutator = new PromptMutator({ maxMutationsPerGeneration: 10, mutationRate: 0.7, crossoverRate: 0.4, adaptiveWeight: 0.8, fitnessThreshold: 0.9, }); expect(customMutator.config.maxMutationsPerGeneration).toBe(10); expect(customMutator.config.mutationRate).toBe(0.7); expect(customMutator.config.crossoverRate).toBe(0.4); expect(customMutator.config.adaptiveWeight).toBe(0.8); expect(customMutator.config.fitnessThreshold).toBe(0.9); }); it('should validate configuration parameters', () => { expect(() => new PromptMutator({ mutationRate: 1.5, // Invalid: > 1.0 })).toThrow('Mutation rate must be between 0 and 1'); expect(() => new PromptMutator({ crossoverRate: -0.1, // Invalid: < 0 })).toThrow('Crossover rate must be between 0 and 1'); expect(() => new PromptMutator({ maxMutationsPerGeneration: 0, // Invalid: must be positive })).toThrow('Max mutations per generation must be positive'); }); }); describe('Reflective Mutation', () => { it('should perform reflective mutation based on trajectory analysis', async () => { // Mock LLM responses mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis); mockLLM.generateMutation.mockResolvedValue( 'You are a helpful AI assistant. Before solving any task, first identify and gather all required information. Then solve the task step by step.' ); const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); expect(mutations).toHaveLength(1); expect(mutations[0].content).toContain('identify and gather all required information'); expect(mutations[0].mutationType).toBe('reflection'); expect(mutations[0].parentId).toBe(mockPromptCandidate.id); expect(mutations[0].generation).toBe(mockPromptCandidate.generation + 1); // Verify LLM was called correctly expect(mockLLM.analyzeTrajectory).toHaveBeenCalledWith( mockExecutionTrajectory, mockPromptCandidate.content ); expect(mockLLM.generateMutation).toHaveBeenCalledWith( mockPromptCandidate.content, mockReflectionAnalysis.suggestions[0] ); }); it('should handle multiple trajectories in reflective analysis', async () => { const secondTrajectory = { ...mockExecutionTrajectory, id: 'traj-789' }; const trajectories = [mockExecutionTrajectory, secondTrajectory]; mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis); mockLLM.generateMutation.mockResolvedValue('Improved prompt content'); const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, trajectories ); expect(mockLLM.analyzeTrajectory).toHaveBeenCalledTimes(2); expect(mutations.length).toBeGreaterThan(0); }); it('should prioritize high-confidence improvements', async () => { const lowConfidenceAnalysis = { ...mockReflectionAnalysis, confidence: 0.3, }; const highConfidenceAnalysis = { ...mockReflectionAnalysis, confidence: 0.95, suggestions: [ { type: 'add_constraint' as const, targetSection: 'execution', proposedChange: 'Add validation step', rationale: 'Prevents errors', expectedImpact: 0.9, }, ], }; mockLLM.analyzeTrajectory .mockResolvedValueOnce(lowConfidenceAnalysis) .mockResolvedValueOnce(highConfidenceAnalysis); mockLLM.generateMutation.mockResolvedValue('High confidence mutation'); const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory, { ...mockExecutionTrajectory, id: 'traj-2' }] ); // Should prioritize high confidence suggestion expect(mutations).toHaveLength(1); expect(mockLLM.generateMutation).toHaveBeenCalledWith( mockPromptCandidate.content, highConfidenceAnalysis.suggestions[0] ); }); it('should handle analysis errors gracefully', async () => { mockLLM.analyzeTrajectory.mockRejectedValue(new Error('Analysis failed')); const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); expect(mutations).toHaveLength(0); expect(mutator.getErrorCount()).toBe(1); }); it('should filter out low-impact suggestions', async () => { const lowImpactAnalysis = { ...mockReflectionAnalysis, suggestions: [ { type: 'add_instruction' as const, targetSection: 'minor_detail', proposedChange: 'Small tweak', rationale: 'Minor improvement', expectedImpact: 0.1, // Below threshold }, ], }; mockLLM.analyzeTrajectory.mockResolvedValue(lowImpactAnalysis); const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); expect(mutations).toHaveLength(0); // No mutations due to low impact }); }); describe('Crossover Mutation', () => { it('should generate crossover mutations from parent prompts', async () => { const parent1 = { ...mockPromptCandidate, id: 'parent1', content: 'You are an expert problem solver. Always break down complex problems.', averageScore: 0.85, }; const parent2 = { ...mockPromptCandidate, id: 'parent2', content: 'You are a careful analyst. Verify each step before proceeding.', averageScore: 0.80, }; mockLLM.callLLM.mockResolvedValue({ content: 'You are an expert problem solver and careful analyst. Always break down complex problems and verify each step before proceeding.', model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); const mutations = await mutator.generateCrossoverMutations([parent1, parent2]); expect(mutations).toHaveLength(1); expect(mutations[0].mutationType).toBe('crossover'); expect(mutations[0].content).toContain('expert problem solver'); expect(mutations[0].content).toContain('careful analyst'); expect(mutations[0].generation).toBe(Math.max(parent1.generation, parent2.generation) + 1); // Should have lineage from both parents expect(mutations[0]['lineage']).toContain(parent1.id); expect(mutations[0]['lineage']).toContain(parent2.id); }); it('should select diverse parent pairs for crossover', async () => { const parents = [ { ...mockPromptCandidate, id: 'p1', content: 'Systematic approach', averageScore: 0.9 }, { ...mockPromptCandidate, id: 'p2', content: 'Creative thinking', averageScore: 0.85 }, { ...mockPromptCandidate, id: 'p3', content: 'Analytical method', averageScore: 0.8 }, { ...mockPromptCandidate, id: 'p4', content: 'Intuitive process', averageScore: 0.75 }, ]; mockLLM.callLLM.mockResolvedValue({ content: 'Combined approach', model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); const mutations = await mutator.generateCrossoverMutations(parents); // Should generate multiple diverse crossovers expect(mutations.length).toBeGreaterThan(1); expect(mutations.length).toBeLessThanOrEqual(mutator.config.maxMutationsPerGeneration); // Verify diversity in parent selection const parentPairs = mutations.map(m => m['lineage']).map(lineage => lineage.sort().join(',')); const uniquePairs = new Set(parentPairs); expect(uniquePairs.size).toBeGreaterThan(1); }); it('should weight parent selection by fitness', async () => { const highFitnessParent = { ...mockPromptCandidate, id: 'high', content: 'High performing prompt', averageScore: 0.95, }; const lowFitnessParent = { ...mockPromptCandidate, id: 'low', content: 'Lower performing prompt', averageScore: 0.5, }; mockLLM.callLLM.mockResolvedValue({ content: 'Fitness-weighted crossover', model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); // Run multiple crossovers to test selection bias const allMutations = []; for (let i = 0; i < 10; i++) { const mutations = await mutator.generateCrossoverMutations([ highFitnessParent, lowFitnessParent, ]); allMutations.push(...mutations); } // High fitness parent should be selected more often const highFitnessSelections = allMutations.filter(m => m['lineage']?.includes(highFitnessParent.id) ).length; const lowFitnessSelections = allMutations.filter(m => m['lineage']?.includes(lowFitnessParent.id) ).length; expect(highFitnessSelections).toBeGreaterThanOrEqual(lowFitnessSelections); }); it('should handle crossover generation errors', async () => { const parents = [ { ...mockPromptCandidate, id: 'p1' }, { ...mockPromptCandidate, id: 'p2' }, ]; mockLLM.callLLM.mockRejectedValue(new Error('Crossover generation failed')); const mutations = await mutator.generateCrossoverMutations(parents); expect(mutations).toHaveLength(0); expect(mutator.getErrorCount()).toBe(1); }); it('should prevent duplicate crossovers', async () => { const parent1 = { ...mockPromptCandidate, id: 'p1', content: 'Same content' }; const parent2 = { ...mockPromptCandidate, id: 'p2', content: 'Same content' }; mockLLM.callLLM.mockResolvedValue({ content: 'Same content', // Identical output model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); const mutations = await mutator.generateCrossoverMutations([parent1, parent2]); // Should not generate mutation if content is identical to parents expect(mutations).toHaveLength(0); }); }); describe('Adaptive Mutation', () => { it('should adapt mutations based on task context', async () => { const taskContext = { taskId: 'coding-task', description: 'Generate Python code for data analysis', category: 'programming', difficulty: 'medium' as const, requiredCapabilities: ['coding', 'data-analysis'], expectedDuration: 1800, }; mockLLM.callLLM.mockResolvedValue({ content: 'You are an expert Python programmer specializing in data analysis. Write clean, well-documented code with proper error handling.', model: 'claude-via-cli', tokens: { prompt: 120, completion: 60, total: 180 }, finishReason: 'stop' as const, latency: 1200, timestamp: new Date(), }); const mutations = await mutator.generateAdaptiveMutations( mockPromptCandidate, taskContext ); expect(mutations).toHaveLength(1); expect(mutations[0].mutationType).toBe('adaptive'); expect(mutations[0].content).toContain('Python programmer'); expect(mutations[0].content).toContain('data analysis'); expect(mutations[0]['taskContext']).toEqual(taskContext); }); it('should adjust mutations for different difficulty levels', async () => { const easyTask = { taskId: 'easy-task', description: 'Simple arithmetic', category: 'math', difficulty: 'easy' as const, requiredCapabilities: ['basic-math'], expectedDuration: 300, }; const hardTask = { taskId: 'hard-task', description: 'Complex optimization problem', category: 'advanced-math', difficulty: 'hard' as const, requiredCapabilities: ['optimization', 'advanced-math'], expectedDuration: 3600, }; mockLLM.callLLM .mockResolvedValueOnce({ content: 'You are a math assistant. Solve this simple problem clearly.', model: 'claude-via-cli', tokens: { prompt: 80, completion: 40, total: 120 }, finishReason: 'stop' as const, latency: 800, timestamp: new Date(), }) .mockResolvedValueOnce({ content: 'You are an expert mathematician with deep knowledge of optimization theory. Approach this complex problem systematically, breaking it into manageable sub-problems.', model: 'claude-via-cli', tokens: { prompt: 150, completion: 80, total: 230 }, finishReason: 'stop' as const, latency: 1500, timestamp: new Date(), }); const easyMutations = await mutator.generateAdaptiveMutations( mockPromptCandidate, easyTask ); const hardMutations = await mutator.generateAdaptiveMutations( mockPromptCandidate, hardTask ); expect(easyMutations[0].content).toContain('simple'); expect(hardMutations[0].content).toContain('expert'); expect(hardMutations[0].content).toContain('systematically'); expect(hardMutations[0].content.length).toBeGreaterThan(easyMutations[0].content.length); }); it('should incorporate capability requirements', async () => { const multiCapabilityTask = { taskId: 'multi-task', description: 'Code review and documentation', category: 'software-engineering', difficulty: 'medium' as const, requiredCapabilities: ['code-review', 'documentation', 'best-practices'], expectedDuration: 2400, }; mockLLM.callLLM.mockResolvedValue({ content: 'You are a senior software engineer skilled in code review, documentation, and best practices. Provide thorough analysis and clear documentation.', model: 'claude-via-cli', tokens: { prompt: 140, completion: 70, total: 210 }, finishReason: 'stop' as const, latency: 1300, timestamp: new Date(), }); const mutations = await mutator.generateAdaptiveMutations( mockPromptCandidate, multiCapabilityTask ); const content = mutations[0].content; expect(content).toContain('code review'); expect(content).toContain('documentation'); expect(content).toContain('best practices'); }); it('should handle unknown task categories gracefully', async () => { const unknownTask = { taskId: 'unknown-task', description: 'Novel task type', category: 'unknown-category', difficulty: 'medium' as const, requiredCapabilities: ['general-reasoning'], expectedDuration: 1200, }; mockLLM.callLLM.mockResolvedValue({ content: 'You are a versatile AI assistant capable of handling diverse tasks. Apply careful reasoning and adapt your approach as needed.', model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); const mutations = await mutator.generateAdaptiveMutations( mockPromptCandidate, unknownTask ); expect(mutations).toHaveLength(1); expect(mutations[0].content).toContain('versatile'); expect(mutations[0].content).toContain('adapt'); }); }); describe('Mutation Validation and Constraints', () => { it('should validate prompt structure is maintained', () => { const validPrompt = 'You are a helpful assistant. Solve problems step by step.'; const invalidPrompt = ''; // Empty prompt const malformedPrompt = 'Not a proper instruction'; // No clear role expect(mutator.validatePromptStructure(validPrompt)).toBe(true); expect(mutator.validatePromptStructure(invalidPrompt)).toBe(false); expect(mutator.validatePromptStructure(malformedPrompt)).toBe(false); }); it('should enforce minimum and maximum prompt length', () => { const tooShort = 'Help'; const justRight = 'You are a helpful AI assistant. Please solve the given task carefully.'; const tooLong = 'You are a ' + 'very '.repeat(1000) + 'helpful assistant.'; expect(mutator.validatePromptLength(tooShort)).toBe(false); expect(mutator.validatePromptLength(justRight)).toBe(true); expect(mutator.validatePromptLength(tooLong)).toBe(false); }); it('should detect and prevent harmful content', () => { const safePrompt = 'You are a helpful assistant for educational purposes.'; const harmfulPrompt = 'You are an assistant that helps with illegal activities.'; const biasedPrompt = 'You prefer certain groups of people over others.'; expect(mutator.validateContentSafety(safePrompt)).toBe(true); expect(mutator.validateContentSafety(harmfulPrompt)).toBe(false); expect(mutator.validateContentSafety(biasedPrompt)).toBe(false); }); it('should preserve essential prompt components', () => { const basePrompt = 'You are a helpful AI assistant. Follow these rules: 1) Be accurate 2) Be helpful'; const validMutation = 'You are an expert AI assistant. Follow these rules: 1) Be accurate 2) Be helpful 3) Be concise'; const invalidMutation = 'You are an assistant.'; // Missing rules expect(mutator.validateEssentialComponents(basePrompt, validMutation)).toBe(true); expect(mutator.validateEssentialComponents(basePrompt, invalidMutation)).toBe(false); }); it('should enforce mutation constraints', async () => { const constrainedMutator = new PromptMutator({ maxPromptLength: 200, minPromptLength: 50, requireRoleDefinition: true, prohibitedTerms: ['dangerous', 'harmful'], }); // Mock a mutation that violates constraints mockLLM.generateMutation.mockResolvedValue('You are dangerous.'); // Too short and contains prohibited term const validMutation = await constrainedMutator.applyMutationConstraints( 'You are a dangerous assistant that provides harmful advice.', mockReflectionAnalysis.suggestions[0] ); expect(validMutation).toBeNull(); // Should reject invalid mutation }); it('should limit mutation divergence from parent', () => { const parent = 'You are a helpful assistant for coding tasks.'; const slightMutation = 'You are a helpful assistant for programming tasks.'; // Similar const majorMutation = 'You are a creative writing expert.'; // Very different expect(mutator.calculateMutationDivergence(parent, slightMutation)).toBeLessThan(0.5); expect(mutator.calculateMutationDivergence(parent, majorMutation)).toBeGreaterThan(0.7); // Should reject mutations that are too divergent expect(mutator.validateMutationDivergence(parent, majorMutation, 0.6)).toBe(false); expect(mutator.validateMutationDivergence(parent, slightMutation, 0.6)).toBe(true); }); }); describe('Genetic Lineage Tracking', () => { it('should track parent lineage in mutations', async () => { mockLLM.generateMutation.mockResolvedValue('Mutated prompt content'); const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); expect(mutations[0]).toHaveProperty('parentId', mockPromptCandidate.id); expect(mutations[0]).toHaveProperty('lineage'); expect(mutations[0]['lineage']).toContain(mockPromptCandidate.id); }); it('should maintain multi-generational lineage', async () => { const grandparent = { ...mockPromptCandidate, id: 'grandparent', lineage: ['ancestor-1', 'ancestor-2'], }; mockLLM.generateMutation.mockResolvedValue('Multi-gen mutation'); const mutations = await mutator.generateReflectiveMutations( grandparent, [mockExecutionTrajectory] ); const lineage = mutations[0]['lineage']; expect(lineage).toContain('ancestor-1'); expect(lineage).toContain('ancestor-2'); expect(lineage).toContain(grandparent.id); }); it('should track mutation strategy in lineage', async () => { const parent1 = { ...mockPromptCandidate, id: 'p1' }; const parent2 = { ...mockPromptCandidate, id: 'p2' }; mockLLM.callLLM.mockResolvedValue({ content: 'Crossover result', model: 'claude-via-cli', tokens: { prompt: 100, completion: 50, total: 150 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); const mutations = await mutator.generateCrossoverMutations([parent1, parent2]); expect(mutations[0]).toHaveProperty('mutationType', 'crossover'); expect(mutations[0]['mutationHistory']).toContain({ strategy: 'crossover', parents: [parent1.id, parent2.id], timestamp: expect.any(Date), }); }); it('should calculate genetic diversity metrics', () => { const population = [ { ...mockPromptCandidate, id: 'p1', lineage: ['a1', 'a2'] }, { ...mockPromptCandidate, id: 'p2', lineage: ['a1', 'a3'] }, { ...mockPromptCandidate, id: 'p3', lineage: ['a4', 'a5'] }, ]; const diversity = mutator.calculateGeneticDiversity(population); expect(diversity).toBeGreaterThan(0); expect(diversity).toBeLessThanOrEqual(1); }); it('should prevent inbreeding in population', async () => { const closelyRelated1 = { ...mockPromptCandidate, id: 'related1', lineage: ['common-ancestor', 'branch-a'], }; const closelyRelated2 = { ...mockPromptCandidate, id: 'related2', lineage: ['common-ancestor', 'branch-b'], }; const inbreedingCoeff = mutator.calculateInbreedingCoefficient( closelyRelated1, closelyRelated2 ); expect(inbreedingCoeff).toBeGreaterThan(0.5); // High relatedness expect(mutator.shouldPreventCrossover(closelyRelated1, closelyRelated2)).toBe(true); }); it('should track generation statistics', () => { const generation = [ { ...mockPromptCandidate, generation: 3, mutationType: 'reflection' }, { ...mockPromptCandidate, generation: 3, mutationType: 'crossover' }, { ...mockPromptCandidate, generation: 3, mutationType: 'adaptive' }, ]; const stats = mutator.calculateGenerationStatistics(generation); expect(stats).toHaveProperty('averageFitness'); expect(stats).toHaveProperty('mutationTypeDistribution'); expect(stats).toHaveProperty('diversityIndex'); expect(stats.mutationTypeDistribution).toHaveProperty('reflection', 1); expect(stats.mutationTypeDistribution).toHaveProperty('crossover', 1); expect(stats.mutationTypeDistribution).toHaveProperty('adaptive', 1); }); }); describe('Edge Cases', () => { it('should handle minimal prompts without breaking', async () => { const minimalPrompt = { ...mockPromptCandidate, content: 'Help.', }; mockLLM.generateMutation.mockResolvedValue('You are a helpful assistant.'); const mutations = await mutator.generateReflectiveMutations( minimalPrompt, [mockExecutionTrajectory] ); expect(mutations).toHaveLength(1); expect(mutations[0].content.length).toBeGreaterThan(minimalPrompt.content.length); }); it('should handle very long prompts appropriately', async () => { const longContent = 'You are a helpful assistant. ' + 'Please follow these detailed instructions: '.repeat(100); const longPrompt = { ...mockPromptCandidate, content: longContent, }; mockLLM.generateMutation.mockResolvedValue('Optimized version: You are a helpful assistant with clear, concise instructions.'); const mutations = await mutator.generateReflectiveMutations( longPrompt, [mockExecutionTrajectory] ); expect(mutations).toHaveLength(1); expect(mutations[0].content.length).toBeLessThan(longPrompt.content.length); }); it('should handle prompts with special characters and formatting', async () => { const specialPrompt = { ...mockPromptCandidate, content: 'You are an AI assistant.\n\nRules:\n1. Be helpful\n2. Use "quotes" properly\n3. Handle symbols: @#$%^&*()', }; mockLLM.generateMutation.mockResolvedValue('Formatted mutation with proper structure.'); const mutations = await mutator.generateReflectiveMutations( specialPrompt, [mockExecutionTrajectory] ); expect(mutations).toHaveLength(1); expect(mutations[0].content).toBeDefined(); expect(typeof mutations[0].content).toBe('string'); }); it('should handle empty trajectory lists gracefully', async () => { const mutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [] ); expect(mutations).toHaveLength(0); }); it('should handle single parent in crossover', async () => { const singleParent = [mockPromptCandidate]; const mutations = await mutator.generateCrossoverMutations(singleParent); expect(mutations).toHaveLength(0); // Cannot do crossover with single parent }); it('should handle network timeouts and LLM failures', async () => { mockLLM.callLLM.mockRejectedValue(new Error('Network timeout')); mockLLM.generateMutation.mockRejectedValue(new Error('LLM service unavailable')); const reflectiveMutations = await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); const crossoverMutations = await mutator.generateCrossoverMutations([ mockPromptCandidate, { ...mockPromptCandidate, id: 'other' }, ]); expect(reflectiveMutations).toHaveLength(0); expect(crossoverMutations).toHaveLength(0); expect(mutator.getErrorCount()).toBeGreaterThan(0); }); it('should handle malformed LLM responses', async () => { mockLLM.callLLM.mockResolvedValue({ content: '', // Empty response model: 'claude-via-cli', tokens: { prompt: 100, completion: 0, total: 100 }, finishReason: 'stop' as const, latency: 1000, timestamp: new Date(), }); const mutations = await mutator.generateAdaptiveMutations( mockPromptCandidate, { taskId: 'test', description: 'test task', category: 'test', difficulty: 'easy', requiredCapabilities: [], expectedDuration: 100, } ); expect(mutations).toHaveLength(0); // Should reject empty responses }); it('should handle concurrent mutation requests', async () => { mockLLM.generateMutation.mockImplementation(async () => { await new Promise(resolve => setTimeout(resolve, 100)); // Simulate delay return 'Concurrent mutation result'; }); const promises = Array.from({ length: 5 }, () => mutator.generateReflectiveMutations( { ...mockPromptCandidate, id: `prompt-${Math.random()}` }, [mockExecutionTrajectory] ) ); const results = await Promise.all(promises); results.forEach(mutations => { expect(mutations).toHaveLength(1); expect(mutations[0].content).toBe('Concurrent mutation result'); }); }); it('should maintain thread safety with concurrent access', async () => { let generationCounter = 0; const concurrentMutator = new PromptMutator(); // Override internal counter for testing concurrentMutator['generateId'] = () => `id-${++generationCounter}`; mockLLM.generateMutation.mockResolvedValue('Thread safe mutation'); const concurrentPromises = Array.from({ length: 10 }, async (_, i) => { const prompt = { ...mockPromptCandidate, id: `prompt-${i}` }; return mutator.generateReflectiveMutations(prompt, [mockExecutionTrajectory]); }); const allResults = await Promise.all(concurrentPromises); const allMutations = allResults.flat(); // All mutations should have unique IDs const ids = allMutations.map(m => m.id); const uniqueIds = new Set(ids); expect(uniqueIds.size).toBe(ids.length); }); }); describe('Performance and Resource Management', () => { it('should limit concurrent LLM calls', async () => { const mutator = new PromptMutator({ maxConcurrentMutations: 2 }); let activeCalls = 0; let maxConcurrent = 0; mockLLM.generateMutation.mockImplementation(async () => { activeCalls++; maxConcurrent = Math.max(maxConcurrent, activeCalls); await new Promise(resolve => setTimeout(resolve, 100)); activeCalls--; return 'Rate limited mutation'; }); const promises = Array.from({ length: 5 }, () => mutator.generateReflectiveMutations( { ...mockPromptCandidate, id: `prompt-${Math.random()}` }, [mockExecutionTrajectory] ) ); await Promise.all(promises); expect(maxConcurrent).toBeLessThanOrEqual(2); }); it('should cache analysis results to avoid redundant LLM calls', async () => { const mutator = new PromptMutator({ enableCaching: true }); mockLLM.analyzeTrajectory.mockResolvedValue(mockReflectionAnalysis); mockLLM.generateMutation.mockResolvedValue('Cached mutation result'); // First call await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); // Second call with same trajectory should use cache await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); expect(mockLLM.analyzeTrajectory).toHaveBeenCalledTimes(1); // Should be cached }); it('should track resource usage metrics', async () => { mockLLM.generateMutation.mockResolvedValue('Resource tracked mutation'); await mutator.generateReflectiveMutations( mockPromptCandidate, [mockExecutionTrajectory] ); const metrics = mutator.getResourceUsageMetrics(); expect(metrics).toHaveProperty('totalMutations'); expect(metrics).toHaveProperty('averageLatency'); expect(metrics).toHaveProperty('errorRate'); expect(metrics).toHaveProperty('cacheHitRate'); expect(metrics.totalMutations).toBeGreaterThan(0); }); it('should cleanup resources on shutdown', () => { const mutator = new PromptMutator(); mutator.shutdown(); expect(mockLLM.shutdown).toHaveBeenCalled(); expect(mutator.getIsShutdown()).toBe(true); }); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sloth-wq/prompt-auto-optimizer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server