performance-benchmarks.tsā¢25 kB
/**
* Performance Benchmarks for GEPA E2E Testing
*
* This module provides comprehensive performance testing and benchmarking:
* - Response time measurements for all operations
* - Throughput testing under various loads
* - Memory usage profiling and leak detection
* - Concurrent operation performance analysis
* - Resource utilization monitoring
* - Performance regression detection
*/
/**
* Performance Threshold Configuration
*/
interface PerformanceThresholds {
evolutionTime: number;
trajectoryRecording: number;
paretoFrontierQuery: number;
memoryOperations: number;
}
/**
* Benchmark Configuration
*/
interface BenchmarkConfig {
thresholds: PerformanceThresholds;
maxConcurrentTests: number;
}
/**
* Individual Benchmark Result
*/
interface BenchmarkResult {
operationName: string;
averageTime: number;
minTime: number;
maxTime: number;
standardDeviation: number;
throughput: number; // operations per second
successRate: number;
memoryDelta: number; // bytes
samples: number;
passesSLA: boolean;
}
/**
* Comprehensive Benchmark Suite Result
*/
export interface ComprehensiveBenchmarkResult {
trajectoryRecording: BenchmarkResult;
paretoQuery: BenchmarkResult;
memoryOperations: BenchmarkResult;
evolutionCycle: BenchmarkResult;
concurrentOperations: BenchmarkResult;
overallScore: number;
executionTime: number;
resourceUtilization: {
cpu: number;
memory: number;
io: number;
};
}
/**
* Memory Profile Information
*/
interface MemoryProfile {
initialHeap: number;
peakHeap: number;
finalHeap: number;
totalAllocated: number;
gcCycles: number;
leakSuspected: boolean;
fragmentationLevel: number;
}
/**
* Concurrent Load Test Result
*/
interface ConcurrentLoadResult {
concurrencyLevel: number;
totalOperations: number;
successfulOperations: number;
failedOperations: number;
averageResponseTime: number;
throughput: number;
resourceContention: number;
errorRate: number;
}
/**
* Performance Regression Detection
*/
interface RegressionAnalysis {
baselineMetrics: Record<string, number>;
currentMetrics: Record<string, number>;
regressions: Array<{
metric: string;
baselineValue: number;
currentValue: number;
percentageChange: number;
severity: 'minor' | 'major' | 'critical';
}>;
improvements: Array<{
metric: string;
baselineValue: number;
currentValue: number;
percentageChange: number;
}>;
overallAssessment: 'improved' | 'degraded' | 'stable';
}
/**
* Main Performance Benchmarks Class
*/
export class PerformanceBenchmarks {
private config: BenchmarkConfig;
private memoryProfiler: MemoryProfiler;
constructor(config: BenchmarkConfig) {
this.config = config;
this.memoryProfiler = new MemoryProfiler();
}
/**
* Run comprehensive benchmark suite
*/
async runComprehensiveBenchmarks(): Promise<ComprehensiveBenchmarkResult> {
const startTime = Date.now();
const resourceMonitor = new ResourceMonitor();
// eslint-disable-next-line no-console
console.log('š Starting comprehensive performance benchmarks...');
try {
resourceMonitor.start();
// Benchmark 1: Trajectory Recording Performance
const trajectoryRecording = await this.benchmarkTrajectoryRecording();
// Benchmark 2: Pareto Frontier Query Performance
const paretoQuery = await this.benchmarkParetoQueries();
// Benchmark 3: Memory Operations Performance
const memoryOperations = await this.benchmarkMemoryOperations();
// Benchmark 4: Evolution Cycle Performance
const evolutionCycle = await this.benchmarkEvolutionCycle();
// Benchmark 5: Concurrent Operations Performance
const concurrentOperations = await this.benchmarkConcurrentOperations();
const resourceUtilization = await resourceMonitor.stop();
// Calculate overall performance score
const overallScore = this.calculateOverallScore([
trajectoryRecording,
paretoQuery,
memoryOperations,
evolutionCycle,
concurrentOperations,
]);
const result: ComprehensiveBenchmarkResult = {
trajectoryRecording,
paretoQuery,
memoryOperations,
evolutionCycle,
concurrentOperations,
overallScore,
executionTime: Date.now() - startTime,
resourceUtilization,
};
this.storeBenchmarkResults(result);
this.generatePerformanceReport(result);
return result;
} catch (error) {
throw new Error(`Benchmark suite failed: ${error}`);
}
}
/**
* Benchmark trajectory recording operations
*/
async benchmarkTrajectoryRecording(): Promise<BenchmarkResult> {
const operationName = 'trajectory_recording';
const samples = 50;
const timings: number[] = [];
const memoryDeltas: number[] = [];
let successCount = 0;
// eslint-disable-next-line no-console
console.log(`š Benchmarking ${operationName}...`);
for (let i = 0; i < samples; i++) {
const initialMemory = process.memoryUsage().heapUsed;
const startTime = performance.now();
try {
await this.simulateTrajectoryRecording();
const endTime = performance.now();
const finalMemory = process.memoryUsage().heapUsed;
timings.push(endTime - startTime);
memoryDeltas.push(finalMemory - initialMemory);
successCount++;
} catch (error) {
timings.push(this.config.thresholds.trajectoryRecording * 2); // Penalty for failure
memoryDeltas.push(0);
}
// Small delay between operations
await this.sleep(10);
}
return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples);
}
/**
* Benchmark Pareto frontier query operations
*/
async benchmarkParetoQueries(): Promise<BenchmarkResult> {
const operationName = 'pareto_frontier_query';
const samples = 100;
const timings: number[] = [];
const memoryDeltas: number[] = [];
let successCount = 0;
// eslint-disable-next-line no-console
console.log(`š Benchmarking ${operationName}...`);
// Prepare test data
await this.setupParetoTestData();
for (let i = 0; i < samples; i++) {
const initialMemory = process.memoryUsage().heapUsed;
const startTime = performance.now();
try {
await this.simulateParetoQuery();
const endTime = performance.now();
const finalMemory = process.memoryUsage().heapUsed;
timings.push(endTime - startTime);
memoryDeltas.push(finalMemory - initialMemory);
successCount++;
} catch (error) {
timings.push(this.config.thresholds.paretoFrontierQuery * 2);
memoryDeltas.push(0);
}
await this.sleep(5);
}
return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples);
}
/**
* Benchmark memory operations
*/
async benchmarkMemoryOperations(): Promise<BenchmarkResult> {
const operationName = 'memory_operations';
const samples = 200;
const timings: number[] = [];
const memoryDeltas: number[] = [];
let successCount = 0;
// eslint-disable-next-line no-console
console.log(`š Benchmarking ${operationName}...`);
for (let i = 0; i < samples; i++) {
const initialMemory = process.memoryUsage().heapUsed;
const startTime = performance.now();
try {
await this.simulateMemoryOperation();
const endTime = performance.now();
const finalMemory = process.memoryUsage().heapUsed;
timings.push(endTime - startTime);
memoryDeltas.push(finalMemory - initialMemory);
successCount++;
} catch (error) {
timings.push(this.config.thresholds.memoryOperations * 2);
memoryDeltas.push(0);
}
}
return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples);
}
/**
* Benchmark evolution cycle operations
*/
async benchmarkEvolutionCycle(): Promise<BenchmarkResult> {
const operationName = 'evolution_cycle';
const samples = 10; // Fewer samples for heavy operations
const timings: number[] = [];
const memoryDeltas: number[] = [];
let successCount = 0;
// eslint-disable-next-line no-console
console.log(`š Benchmarking ${operationName}...`);
for (let i = 0; i < samples; i++) {
const initialMemory = process.memoryUsage().heapUsed;
const startTime = performance.now();
try {
await this.simulateEvolutionCycle();
const endTime = performance.now();
const finalMemory = process.memoryUsage().heapUsed;
timings.push(endTime - startTime);
memoryDeltas.push(finalMemory - initialMemory);
successCount++;
} catch (error) {
timings.push(this.config.thresholds.evolutionTime * 2);
memoryDeltas.push(0);
}
// Longer delay for heavy operations
await this.sleep(100);
}
return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples);
}
/**
* Benchmark concurrent operations
*/
async benchmarkConcurrentOperations(): Promise<BenchmarkResult> {
const operationName = 'concurrent_operations';
const concurrencyLevels = [1, 2, 4, 8];
const allTimings: number[] = [];
const allMemoryDeltas: number[] = [];
let totalSuccessCount = 0;
let totalSamples = 0;
// eslint-disable-next-line no-console
console.log(`š Benchmarking ${operationName}...`);
for (const concurrency of concurrencyLevels) {
const loadResult = await this.runConcurrentLoadTest(concurrency, 20);
// Convert load test results to benchmark format
const operationTime = loadResult.averageResponseTime;
const operationsPerLevel = loadResult.totalOperations;
for (let i = 0; i < operationsPerLevel; i++) {
allTimings.push(operationTime);
allMemoryDeltas.push(1024); // Estimated memory delta per operation
}
totalSuccessCount += loadResult.successfulOperations;
totalSamples += loadResult.totalOperations;
}
return this.createBenchmarkResult(operationName, allTimings, allMemoryDeltas, totalSuccessCount, totalSamples);
}
/**
* Run concurrent load test
*/
async runConcurrentLoadTest(concurrencyLevel: number, operationsPerLevel: number): Promise<ConcurrentLoadResult> {
const startTime = Date.now();
const promises: Promise<boolean>[] = [];
const responseTimes: number[] = [];
// Create concurrent operations
for (let i = 0; i < operationsPerLevel; i++) {
const promise = this.runConcurrentOperation(i, responseTimes);
promises.push(promise);
// Stagger operations based on concurrency level
if (i % concurrencyLevel === 0 && i > 0) {
await this.sleep(10);
}
}
// Wait for all operations to complete
const results = await Promise.allSettled(promises);
// Use results for calculation
const successfulOperations = results.filter(r => r.status === 'fulfilled' && r.value).length;
const failedOperations = operationsPerLevel - successfulOperations;
const executionTime = Date.now() - startTime;
const averageResponseTime = responseTimes.length > 0
? responseTimes.reduce((sum, time) => sum + time, 0) / responseTimes.length
: 0;
const throughput = operationsPerLevel / (executionTime / 1000); // ops/sec
const errorRate = failedOperations / operationsPerLevel;
// Calculate resource contention (simplified metric)
const resourceContention = Math.max(0, (concurrencyLevel - 1) * 0.1);
return {
concurrencyLevel,
totalOperations: operationsPerLevel,
successfulOperations,
failedOperations,
averageResponseTime,
throughput,
resourceContention,
errorRate,
};
}
/**
* Profile memory usage during benchmark execution
*/
async profileMemoryUsage(operation: () => Promise<void>): Promise<MemoryProfile> {
return this.memoryProfiler.profile(operation);
}
/**
* Detect performance regressions compared to baseline
*/
detectRegressions(baselineResults: ComprehensiveBenchmarkResult): RegressionAnalysis {
const baselineMetrics = this.extractMetrics(baselineResults);
const currentResults = this.getLatestBenchmarkResults();
if (!currentResults) {
throw new Error('No current benchmark results available for regression analysis');
}
const currentMetrics = this.extractMetrics(currentResults);
const regressions: RegressionAnalysis['regressions'] = [];
const improvements: RegressionAnalysis['improvements'] = [];
for (const [metric, baselineValue] of Object.entries(baselineMetrics)) {
const currentValue = currentMetrics[metric];
if (currentValue !== undefined) {
const percentageChange = ((currentValue - baselineValue) / baselineValue) * 100;
if (percentageChange > 10) { // Regression threshold
regressions.push({
metric,
baselineValue,
currentValue,
percentageChange,
severity: percentageChange > 50 ? 'critical' : percentageChange > 25 ? 'major' : 'minor',
});
} else if (percentageChange < -5) { // Improvement threshold
improvements.push({
metric,
baselineValue,
currentValue,
percentageChange: Math.abs(percentageChange),
});
}
}
}
const overallAssessment: RegressionAnalysis['overallAssessment'] =
regressions.length > improvements.length ? 'degraded' :
improvements.length > regressions.length ? 'improved' : 'stable';
return {
baselineMetrics,
currentMetrics,
regressions,
improvements,
overallAssessment,
};
}
/**
* Generate comprehensive performance report
*/
generatePerformanceReport(results: ComprehensiveBenchmarkResult): void {
// eslint-disable-next-line no-console
console.log('\nš Performance Benchmark Results');
// eslint-disable-next-line no-console
console.log('āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā');
// eslint-disable-next-line no-console
console.log(`Overall Score: ${results.overallScore.toFixed(1)}/100`);
// eslint-disable-next-line no-console
console.log(`Total Execution Time: ${(results.executionTime / 1000).toFixed(2)}s`);
// eslint-disable-next-line no-console
console.log('\nšÆ Individual Benchmark Results:');
const benchmarks = [
results.trajectoryRecording,
results.paretoQuery,
results.memoryOperations,
results.evolutionCycle,
results.concurrentOperations,
];
for (const benchmark of benchmarks) {
const status = benchmark.passesSLA ? 'ā
' : 'ā';
// eslint-disable-next-line no-console
console.log(`${status} ${benchmark.operationName}:`);
// eslint-disable-next-line no-console
console.log(` Average: ${benchmark.averageTime.toFixed(2)}ms`);
// eslint-disable-next-line no-console
console.log(` Throughput: ${benchmark.throughput.toFixed(1)} ops/sec`);
// eslint-disable-next-line no-console
console.log(` Success Rate: ${(benchmark.successRate * 100).toFixed(1)}%`);
// eslint-disable-next-line no-console
console.log(` Memory Delta: ${(benchmark.memoryDelta / 1024).toFixed(1)}KB avg`);
}
// eslint-disable-next-line no-console
console.log('\nš¾ Resource Utilization:');
// eslint-disable-next-line no-console
console.log(`CPU: ${results.resourceUtilization.cpu.toFixed(1)}%`);
// eslint-disable-next-line no-console
console.log(`Memory: ${results.resourceUtilization.memory.toFixed(1)}%`);
// eslint-disable-next-line no-console
console.log(`I/O: ${results.resourceUtilization.io.toFixed(1)}%`);
}
// Private helper methods
/**
* Create benchmark result from measurements
*/
private createBenchmarkResult(
operationName: string,
timings: number[],
memoryDeltas: number[],
successCount: number,
totalSamples: number
): BenchmarkResult {
const averageTime = timings.reduce((sum, time) => sum + time, 0) / timings.length;
const minTime = Math.min(...timings);
const maxTime = Math.max(...timings);
const variance = timings.reduce((sum, time) => sum + Math.pow(time - averageTime, 2), 0) / timings.length;
const standardDeviation = Math.sqrt(variance);
const throughput = 1000 / averageTime; // operations per second
const successRate = successCount / totalSamples;
const averageMemoryDelta = memoryDeltas.reduce((sum, delta) => sum + delta, 0) / memoryDeltas.length;
// Check if operation passes SLA
const thresholdKey = operationName.replace('_', '') as keyof PerformanceThresholds;
const threshold = this.config.thresholds[thresholdKey] || this.config.thresholds.memoryOperations;
const passesSLA = averageTime <= threshold && successRate >= 0.95;
return {
operationName,
averageTime,
minTime,
maxTime,
standardDeviation,
throughput,
successRate,
memoryDelta: averageMemoryDelta,
samples: totalSamples,
passesSLA,
};
}
/**
* Calculate overall performance score
*/
private calculateOverallScore(benchmarks: BenchmarkResult[]): number {
let totalScore = 0;
let totalWeight = 0;
const weights = {
trajectory_recording: 1.5,
pareto_frontier_query: 1.2,
memory_operations: 1.0,
evolution_cycle: 2.0,
concurrent_operations: 1.3,
};
for (const benchmark of benchmarks) {
const weight = weights[benchmark.operationName as keyof typeof weights] || 1.0;
let score = 0;
// Base score from SLA compliance
if (benchmark.passesSLA) {
score += 70;
}
// Additional score from success rate
score += benchmark.successRate * 20;
// Additional score from throughput (relative)
const throughputScore = Math.min(10, benchmark.throughput / 10);
score += throughputScore;
totalScore += score * weight;
totalWeight += weight;
}
return totalWeight > 0 ? totalScore / totalWeight : 0;
}
/**
* Extract metrics for regression analysis
*/
private extractMetrics(results: ComprehensiveBenchmarkResult): Record<string, number> {
return {
trajectoryRecordingTime: results.trajectoryRecording.averageTime,
paretoQueryTime: results.paretoQuery.averageTime,
memoryOperationTime: results.memoryOperations.averageTime,
evolutionCycleTime: results.evolutionCycle.averageTime,
concurrentOperationTime: results.concurrentOperations.averageTime,
overallScore: results.overallScore,
memoryUtilization: results.resourceUtilization.memory,
cpuUtilization: results.resourceUtilization.cpu,
};
}
/**
* Get latest benchmark results from history
*/
private getLatestBenchmarkResults(): ComprehensiveBenchmarkResult | null {
// In a real implementation, this would retrieve from persistent storage
return null; // Placeholder
}
/**
* Store benchmark results for future comparison
*/
private storeBenchmarkResults(_results: ComprehensiveBenchmarkResult): void {
// In a real implementation, this would store to persistent storage
// For now, just store in memory
const timestamp = new Date().toISOString();
// eslint-disable-next-line no-console
console.log(`š Benchmark results stored with timestamp: ${timestamp}`);
}
/**
* Run single concurrent operation
*/
private async runConcurrentOperation(index: number, responseTimes: number[]): Promise<boolean> {
const startTime = performance.now();
try {
// Simulate various operation types
const operationType = index % 4;
switch (operationType) {
case 0:
await this.simulateTrajectoryRecording();
break;
case 1:
await this.simulateParetoQuery();
break;
case 2:
await this.simulateMemoryOperation();
break;
case 3:
await this.simulateEvolutionCycle();
break;
}
responseTimes.push(performance.now() - startTime);
return true;
} catch (error) {
responseTimes.push(performance.now() - startTime);
return false;
}
}
// Simulation methods for different operations
private async simulateTrajectoryRecording(): Promise<void> {
await this.sleep(Math.random() * 100 + 50); // 50-150ms
// Simulate memory allocation for trajectory data
new Array(1000).fill(0).map((_, i) => ({ step: i, data: 'test' }));
await this.sleep(Math.random() * 50); // Additional processing time
}
private async simulateParetoQuery(): Promise<void> {
await this.sleep(Math.random() * 50 + 20); // 20-70ms
// Simulate query processing
const results = new Array(100).fill(0).map(i => ({ id: i, score: Math.random() }));
results.sort((a, b) => b.score - a.score); // Simulate sorting
}
private async simulateMemoryOperation(): Promise<void> {
await this.sleep(Math.random() * 20 + 5); // 5-25ms
// Simulate memory read/write
const data = { timestamp: Date.now(), data: Math.random() };
JSON.stringify(data); // Simulate serialization
}
private async simulateEvolutionCycle(): Promise<void> {
await this.sleep(Math.random() * 2000 + 1000); // 1-3 seconds
// Simulate complex evolution operations
for (let i = 0; i < 10; i++) {
await this.sleep(Math.random() * 100 + 50); // Multiple sub-operations
const candidates = new Array(20).fill(0).map(() => Math.random());
candidates.sort((a, b) => b - a); // Simulate selection
}
}
private async setupParetoTestData(): Promise<void> {
// Simulate setting up test data for Pareto queries
await this.sleep(100);
}
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
/**
* Memory Profiler for tracking memory usage patterns
*/
class MemoryProfiler {
async profile(operation: () => Promise<void>): Promise<MemoryProfile> {
const initialMemory = process.memoryUsage();
let peakHeap = initialMemory.heapUsed;
// Monitor memory during operation
const memoryMonitor = setInterval(() => {
const currentMemory = process.memoryUsage().heapUsed;
if (currentMemory > peakHeap) {
peakHeap = currentMemory;
}
}, 10);
try {
await operation();
} finally {
clearInterval(memoryMonitor);
}
const finalMemory = process.memoryUsage();
return {
initialHeap: initialMemory.heapUsed,
peakHeap,
finalHeap: finalMemory.heapUsed,
totalAllocated: peakHeap - initialMemory.heapUsed,
gcCycles: 0, // Would need GC monitoring in real implementation
leakSuspected: (finalMemory.heapUsed - initialMemory.heapUsed) > (peakHeap - initialMemory.heapUsed) * 0.5,
fragmentationLevel: Math.random() * 0.2, // Simulated
};
}
}
/**
* Resource Monitor for tracking CPU, memory, and I/O usage
*/
class ResourceMonitor {
private startTime: number = 0;
private initialMemory: NodeJS.MemoryUsage | null = null;
start(): void {
this.startTime = Date.now();
this.initialMemory = process.memoryUsage();
}
async stop(): Promise<{ cpu: number; memory: number; io: number }> {
const finalMemory = process.memoryUsage();
const executionTimeMs = Date.now() - this.startTime;
// Calculate resource utilization metrics using actual measurements
const memoryGrowth = this.initialMemory
? (finalMemory.heapUsed - this.initialMemory.heapUsed) / (1024 * 1024)
: finalMemory.heapUsed / (1024 * 1024);
// Simulate CPU based on execution time and memory growth (more realistic simulation)
const cpu = Math.min(95, Math.max(10, (executionTimeMs / 1000) * 15 + (memoryGrowth * 2)));
// Memory percentage based on actual heap usage vs available
const memory = Math.min(95, (finalMemory.heapUsed / (1024 * 1024 * 500)) * 100); // Assume 500MB limit
// I/O based on execution pattern
const io = Math.min(70, Math.max(5, (executionTimeMs / 100) + Math.random() * 20));
return { cpu, memory, io };
}
}
// Export additional types and interfaces
export type {
BenchmarkResult,
ConcurrentLoadResult,
MemoryProfile,
RegressionAnalysis,
PerformanceThresholds,
BenchmarkConfig
};