helpers.ts•13.4 kB
/**
* Test helpers and utilities for GEPA MCP Server
* Provides common testing utilities, mocks, and fixtures
*/
import { vi, type MockedFunction } from 'vitest';
import type {
PromptCandidate,
ExecutionTrajectory,
ReflectionAnalysis,
LLMResponse,
TaskContext,
} from '../types/gepa';
/**
* Mock factory for creating test data
*/
export class MockFactory {
/**
* Create a mock PromptCandidate
*/
static createPromptCandidate(overrides: Partial<PromptCandidate> = {}): PromptCandidate {
return {
id: `prompt-${Math.random().toString(36).substr(2, 9)}`,
content: 'You are a helpful assistant. Complete the given task step by step.',
generation: 1,
taskPerformance: new Map([
['task-1', 0.85],
['task-2', 0.78],
['task-3', 0.92],
]),
averageScore: 0.85,
rolloutCount: 5,
createdAt: new Date('2024-01-01T00:00:00Z'),
lastEvaluated: new Date('2024-01-01T01:00:00Z'),
mutationType: 'initial',
...overrides,
};
}
/**
* Create a mock ExecutionTrajectory
*/
static createExecutionTrajectory(
overrides: Partial<ExecutionTrajectory> = {}
): ExecutionTrajectory {
return {
id: `trajectory-${Math.random().toString(36).substr(2, 9)}`,
promptId: 'prompt-test-123',
taskId: 'task-test-456',
timestamp: new Date('2024-01-01T00:00:00Z'),
steps: [
{
stepNumber: 1,
action: 'analyze_question',
reasoning: 'Breaking down the question into components',
timestamp: new Date('2024-01-01T00:00:01Z'),
},
{
stepNumber: 2,
action: 'search_information',
reasoning: 'Looking for relevant information',
toolName: 'web_search',
toolInput: { query: 'test query' },
toolOutput: { results: ['result1', 'result2'] },
timestamp: new Date('2024-01-01T00:00:03Z'),
},
],
finalResult: {
success: true,
score: 0.88,
output: 'Successfully completed the task with high accuracy.',
},
llmCalls: [
{
model: 'claude-3-sonnet',
prompt: 'Test prompt for analysis',
response: 'Test response with analysis',
tokens: {
prompt: 150,
completion: 75,
},
latency: 1200,
timestamp: new Date(),
},
],
toolCalls: [
{
toolName: 'web_search',
input: { query: 'test query' },
output: { results: ['result1', 'result2'] },
success: true,
latency: 800,
},
],
totalTokens: 225,
executionTime: 5000,
...overrides,
};
}
/**
* Create a mock ReflectionAnalysis
*/
static createReflectionAnalysis(overrides: Partial<ReflectionAnalysis> = {}): ReflectionAnalysis {
return {
trajectoryId: 'trajectory-test-123',
promptId: 'prompt-test-456',
diagnosis: {
failurePoint: 'Step 2: Information synthesis',
rootCause: 'Insufficient context for complex reasoning',
moduleResponsibility: new Map([
['reasoning', 0.6],
['information_gathering', 0.3],
['synthesis', 0.1],
]),
patterns: [
{
type: 'incomplete_reasoning',
frequency: 3,
description: 'Failing to complete multi-step logical reasoning',
examples: ['Missing intermediate steps', 'Jumping to conclusions'],
},
],
},
suggestions: [
{
type: 'add_instruction',
targetSection: 'reasoning_guidelines',
proposedChange: 'Add explicit instruction to show all reasoning steps',
rationale: 'Users need to see the complete logical chain',
expectedImpact: 0.15,
},
{
type: 'add_example',
targetSection: 'examples',
proposedChange: 'Include example of step-by-step reasoning',
rationale: 'Concrete examples improve following instructions',
expectedImpact: 0.12,
},
],
confidence: 0.82,
rationale: 'Analysis based on 5 similar failure patterns in recent trajectories',
...overrides,
};
}
/**
* Create a mock LLMResponse
*/
static createLLMResponse(overrides: Partial<LLMResponse> = {}): LLMResponse {
return {
content: 'This is a mock LLM response for testing purposes.',
model: 'claude-3-sonnet',
tokens: {
prompt: 100,
completion: 50,
total: 150,
},
finishReason: 'stop',
latency: 1500,
timestamp: new Date('2024-01-01T00:00:00Z'),
...overrides,
};
}
/**
* Create a mock TaskContext
*/
static createTaskContext(overrides: Partial<TaskContext> = {}): TaskContext {
return {
taskId: 'task-test-123',
description: 'Answer the question using available tools and reasoning',
category: 'question_answering',
difficulty: 'medium',
requiredCapabilities: ['reasoning', 'web_search', 'synthesis'],
expectedDuration: 30000, // 30 seconds
...overrides,
};
}
}
/**
* Mock implementations for external dependencies
*/
export class MockImplementations {
/**
* Create a mock Claude Code process
*/
static createMockClaudeProcess(): {
mockProcess: {
stdin: { write: ReturnType<typeof vi.fn>; end: ReturnType<typeof vi.fn> };
stdout: { on: ReturnType<typeof vi.fn>; read: ReturnType<typeof vi.fn> };
stderr: { on: ReturnType<typeof vi.fn> };
on: ReturnType<typeof vi.fn>;
kill: ReturnType<typeof vi.fn>;
pid: number;
};
mockStdin: { write: ReturnType<typeof vi.fn>; end: ReturnType<typeof vi.fn> };
mockStdout: { on: ReturnType<typeof vi.fn>; read: ReturnType<typeof vi.fn> };
mockStderr: { on: ReturnType<typeof vi.fn> };
} {
const mockStdin = {
write: vi.fn().mockReturnValue(true),
end: vi.fn().mockReturnValue(undefined),
};
const mockStdout = {
on: vi.fn().mockReturnValue(undefined),
read: vi.fn().mockReturnValue(null),
};
const mockStderr = {
on: vi.fn().mockReturnValue(undefined),
};
const mockProcess = {
stdin: mockStdin,
stdout: mockStdout,
stderr: mockStderr,
on: vi.fn().mockReturnValue(undefined),
kill: vi.fn().mockReturnValue(true),
pid: 12345,
};
return {
mockProcess,
mockStdin,
mockStdout,
mockStderr,
};
}
/**
* Create a mock file system interface
*/
static createMockFileSystem(): {
readFile: ReturnType<typeof vi.fn>;
writeFile: ReturnType<typeof vi.fn>;
mkdir: ReturnType<typeof vi.fn>;
readdir: ReturnType<typeof vi.fn>;
stat: ReturnType<typeof vi.fn>;
rm: ReturnType<typeof vi.fn>;
exists: ReturnType<typeof vi.fn>;
} {
return {
readFile: vi.fn().mockResolvedValue(''),
writeFile: vi.fn().mockResolvedValue(undefined),
mkdir: vi.fn().mockResolvedValue(undefined),
readdir: vi.fn().mockResolvedValue([]),
stat: vi.fn().mockResolvedValue({ isFile: () => true, isDirectory: () => false }),
rm: vi.fn().mockResolvedValue(undefined),
exists: vi.fn().mockResolvedValue(true),
};
}
/**
* Create mock MCP server tools
*/
static createMockMCPTools(): {
gepa_start_evolution: ReturnType<typeof vi.fn>;
gepa_record_trajectory: ReturnType<typeof vi.fn>;
gepa_evaluate_prompt: ReturnType<typeof vi.fn>;
gepa_reflect: ReturnType<typeof vi.fn>;
gepa_get_pareto_frontier: ReturnType<typeof vi.fn>;
gepa_select_optimal: ReturnType<typeof vi.fn>;
} {
return {
gepa_start_evolution: vi.fn().mockResolvedValue({ success: true }),
gepa_record_trajectory: vi.fn().mockResolvedValue({ success: true }),
gepa_evaluate_prompt: vi.fn().mockResolvedValue({ score: 0.8 }),
gepa_reflect: vi.fn().mockResolvedValue({ suggestions: [] }),
gepa_get_pareto_frontier: vi.fn().mockResolvedValue({ candidates: [] }),
gepa_select_optimal: vi.fn().mockResolvedValue({ candidate: null }),
};
}
}
/**
* Mock utility types and helpers
*/
export type MockedFn<T extends (...args: any[]) => any> = MockedFunction<T>;
/**
* Create a properly typed mock function
*/
export const createMockFn = <T extends (...args: any[]) => any>(): MockedFn<T> => {
return vi.fn() as MockedFn<T>;
};
/**
* Create a mock promise that resolves with a value
*/
export const createMockPromise = <T>(value: T): Promise<T> => {
return Promise.resolve(value);
};
/**
* Create a mock promise that rejects with an error
*/
export const createMockRejection = (error: Error): Promise<never> => {
return Promise.reject(error);
};
/**
* Test assertion helpers
*/
export class TestAssertions {
/**
* Assert that a PromptCandidate is valid
*/
static assertValidPromptCandidate(candidate: unknown): asserts candidate is PromptCandidate {
if (!candidate || typeof candidate !== 'object') {
throw new Error('Expected PromptCandidate to be an object');
}
const candidateObj = candidate as Record<string, unknown>;
if (typeof candidateObj.id !== 'string') {
throw new Error('PromptCandidate.id must be a string');
}
if (typeof candidateObj.content !== 'string') {
throw new Error('PromptCandidate.content must be a string');
}
if (typeof candidateObj.generation !== 'number') {
throw new Error('PromptCandidate.generation must be a number');
}
if (!(candidateObj.taskPerformance instanceof Map)) {
throw new Error('PromptCandidate.taskPerformance must be a Map');
}
if (typeof candidateObj.averageScore !== 'number') {
throw new Error('PromptCandidate.averageScore must be a number');
}
}
/**
* Assert that an ExecutionTrajectory is valid
*/
static assertValidExecutionTrajectory(
trajectory: unknown
): asserts trajectory is ExecutionTrajectory {
if (!trajectory || typeof trajectory !== 'object') {
throw new Error('Expected ExecutionTrajectory to be an object');
}
const trajectoryObj = trajectory as Record<string, unknown>;
if (typeof trajectoryObj.id !== 'string') {
throw new Error('ExecutionTrajectory.id must be a string');
}
if (!Array.isArray(trajectoryObj.steps)) {
throw new Error('ExecutionTrajectory.steps must be an array');
}
if (!trajectoryObj.finalResult || typeof trajectoryObj.finalResult !== 'object') {
throw new Error('ExecutionTrajectory.finalResult must be an object');
}
const finalResult = trajectoryObj.finalResult as Record<string, unknown>;
if (typeof finalResult.success !== 'boolean') {
throw new Error('ExecutionTrajectory.finalResult.success must be a boolean');
}
}
/**
* Assert that a performance score is valid
*/
static assertValidPerformanceScore(score: number): void {
if (typeof score !== 'number') {
throw new Error('Performance score must be a number');
}
if (score < 0 || score > 1) {
throw new Error('Performance score must be between 0 and 1');
}
if (isNaN(score)) {
throw new Error('Performance score cannot be NaN');
}
}
}
/**
* Async test utilities
*/
export class AsyncTestUtils {
/**
* Wait for a condition to be true
*/
static async waitFor(
condition: () => boolean | Promise<boolean>,
timeout = 5000,
interval = 100
): Promise<void> {
const start = Date.now();
while (Date.now() - start < timeout) {
if (await condition()) {
return;
}
await new Promise(resolve => setTimeout(resolve, interval));
}
throw new Error(`Condition not met within ${timeout}ms`);
}
/**
* Create a delayed promise for testing async behavior
*/
static delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Create a promise that rejects after a timeout
*/
static timeout<T>(promise: Promise<T>, ms: number): Promise<T> {
return Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error(`Operation timed out after ${ms}ms`)), ms)
),
]);
}
}
/**
* Performance testing utilities
*/
export class PerformanceTestUtils {
/**
* Measure execution time of a function
*/
static async measureTime<T>(fn: () => Promise<T>): Promise<{ result: T; duration: number }> {
const start = process.hrtime.bigint();
const result = await fn();
const end = process.hrtime.bigint();
const duration = Number(end - start) / 1_000_000; // Convert to milliseconds
return { result, duration };
}
/**
* Assert that an operation completes within a time limit
*/
static async assertTimeLimit<T>(
fn: () => Promise<T>,
maxDuration: number,
operation = 'Operation'
): Promise<T> {
const { result, duration } = await this.measureTime(fn);
if (duration > maxDuration) {
throw new Error(`${operation} took ${duration}ms, expected < ${maxDuration}ms`);
}
return result;
}
}
// Export everything for easy importing
export default {
MockFactory,
MockImplementations,
TestAssertions,
AsyncTestUtils,
PerformanceTestUtils,
};