Skip to main content
Glama

Prompt Auto-Optimizer MCP

by sloth-wq
performance-benchmarks.ts•25 kB
/** * Performance Benchmarks for GEPA E2E Testing * * This module provides comprehensive performance testing and benchmarking: * - Response time measurements for all operations * - Throughput testing under various loads * - Memory usage profiling and leak detection * - Concurrent operation performance analysis * - Resource utilization monitoring * - Performance regression detection */ /** * Performance Threshold Configuration */ interface PerformanceThresholds { evolutionTime: number; trajectoryRecording: number; paretoFrontierQuery: number; memoryOperations: number; } /** * Benchmark Configuration */ interface BenchmarkConfig { thresholds: PerformanceThresholds; maxConcurrentTests: number; } /** * Individual Benchmark Result */ interface BenchmarkResult { operationName: string; averageTime: number; minTime: number; maxTime: number; standardDeviation: number; throughput: number; // operations per second successRate: number; memoryDelta: number; // bytes samples: number; passesSLA: boolean; } /** * Comprehensive Benchmark Suite Result */ export interface ComprehensiveBenchmarkResult { trajectoryRecording: BenchmarkResult; paretoQuery: BenchmarkResult; memoryOperations: BenchmarkResult; evolutionCycle: BenchmarkResult; concurrentOperations: BenchmarkResult; overallScore: number; executionTime: number; resourceUtilization: { cpu: number; memory: number; io: number; }; } /** * Memory Profile Information */ interface MemoryProfile { initialHeap: number; peakHeap: number; finalHeap: number; totalAllocated: number; gcCycles: number; leakSuspected: boolean; fragmentationLevel: number; } /** * Concurrent Load Test Result */ interface ConcurrentLoadResult { concurrencyLevel: number; totalOperations: number; successfulOperations: number; failedOperations: number; averageResponseTime: number; throughput: number; resourceContention: number; errorRate: number; } /** * Performance Regression Detection */ interface RegressionAnalysis { baselineMetrics: Record<string, number>; currentMetrics: Record<string, number>; regressions: Array<{ metric: string; baselineValue: number; currentValue: number; percentageChange: number; severity: 'minor' | 'major' | 'critical'; }>; improvements: Array<{ metric: string; baselineValue: number; currentValue: number; percentageChange: number; }>; overallAssessment: 'improved' | 'degraded' | 'stable'; } /** * Main Performance Benchmarks Class */ export class PerformanceBenchmarks { private config: BenchmarkConfig; private memoryProfiler: MemoryProfiler; constructor(config: BenchmarkConfig) { this.config = config; this.memoryProfiler = new MemoryProfiler(); } /** * Run comprehensive benchmark suite */ async runComprehensiveBenchmarks(): Promise<ComprehensiveBenchmarkResult> { const startTime = Date.now(); const resourceMonitor = new ResourceMonitor(); // eslint-disable-next-line no-console console.log('šŸš€ Starting comprehensive performance benchmarks...'); try { resourceMonitor.start(); // Benchmark 1: Trajectory Recording Performance const trajectoryRecording = await this.benchmarkTrajectoryRecording(); // Benchmark 2: Pareto Frontier Query Performance const paretoQuery = await this.benchmarkParetoQueries(); // Benchmark 3: Memory Operations Performance const memoryOperations = await this.benchmarkMemoryOperations(); // Benchmark 4: Evolution Cycle Performance const evolutionCycle = await this.benchmarkEvolutionCycle(); // Benchmark 5: Concurrent Operations Performance const concurrentOperations = await this.benchmarkConcurrentOperations(); const resourceUtilization = await resourceMonitor.stop(); // Calculate overall performance score const overallScore = this.calculateOverallScore([ trajectoryRecording, paretoQuery, memoryOperations, evolutionCycle, concurrentOperations, ]); const result: ComprehensiveBenchmarkResult = { trajectoryRecording, paretoQuery, memoryOperations, evolutionCycle, concurrentOperations, overallScore, executionTime: Date.now() - startTime, resourceUtilization, }; this.storeBenchmarkResults(result); this.generatePerformanceReport(result); return result; } catch (error) { throw new Error(`Benchmark suite failed: ${error}`); } } /** * Benchmark trajectory recording operations */ async benchmarkTrajectoryRecording(): Promise<BenchmarkResult> { const operationName = 'trajectory_recording'; const samples = 50; const timings: number[] = []; const memoryDeltas: number[] = []; let successCount = 0; // eslint-disable-next-line no-console console.log(`šŸ“Š Benchmarking ${operationName}...`); for (let i = 0; i < samples; i++) { const initialMemory = process.memoryUsage().heapUsed; const startTime = performance.now(); try { await this.simulateTrajectoryRecording(); const endTime = performance.now(); const finalMemory = process.memoryUsage().heapUsed; timings.push(endTime - startTime); memoryDeltas.push(finalMemory - initialMemory); successCount++; } catch (error) { timings.push(this.config.thresholds.trajectoryRecording * 2); // Penalty for failure memoryDeltas.push(0); } // Small delay between operations await this.sleep(10); } return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples); } /** * Benchmark Pareto frontier query operations */ async benchmarkParetoQueries(): Promise<BenchmarkResult> { const operationName = 'pareto_frontier_query'; const samples = 100; const timings: number[] = []; const memoryDeltas: number[] = []; let successCount = 0; // eslint-disable-next-line no-console console.log(`šŸ“Š Benchmarking ${operationName}...`); // Prepare test data await this.setupParetoTestData(); for (let i = 0; i < samples; i++) { const initialMemory = process.memoryUsage().heapUsed; const startTime = performance.now(); try { await this.simulateParetoQuery(); const endTime = performance.now(); const finalMemory = process.memoryUsage().heapUsed; timings.push(endTime - startTime); memoryDeltas.push(finalMemory - initialMemory); successCount++; } catch (error) { timings.push(this.config.thresholds.paretoFrontierQuery * 2); memoryDeltas.push(0); } await this.sleep(5); } return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples); } /** * Benchmark memory operations */ async benchmarkMemoryOperations(): Promise<BenchmarkResult> { const operationName = 'memory_operations'; const samples = 200; const timings: number[] = []; const memoryDeltas: number[] = []; let successCount = 0; // eslint-disable-next-line no-console console.log(`šŸ“Š Benchmarking ${operationName}...`); for (let i = 0; i < samples; i++) { const initialMemory = process.memoryUsage().heapUsed; const startTime = performance.now(); try { await this.simulateMemoryOperation(); const endTime = performance.now(); const finalMemory = process.memoryUsage().heapUsed; timings.push(endTime - startTime); memoryDeltas.push(finalMemory - initialMemory); successCount++; } catch (error) { timings.push(this.config.thresholds.memoryOperations * 2); memoryDeltas.push(0); } } return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples); } /** * Benchmark evolution cycle operations */ async benchmarkEvolutionCycle(): Promise<BenchmarkResult> { const operationName = 'evolution_cycle'; const samples = 10; // Fewer samples for heavy operations const timings: number[] = []; const memoryDeltas: number[] = []; let successCount = 0; // eslint-disable-next-line no-console console.log(`šŸ“Š Benchmarking ${operationName}...`); for (let i = 0; i < samples; i++) { const initialMemory = process.memoryUsage().heapUsed; const startTime = performance.now(); try { await this.simulateEvolutionCycle(); const endTime = performance.now(); const finalMemory = process.memoryUsage().heapUsed; timings.push(endTime - startTime); memoryDeltas.push(finalMemory - initialMemory); successCount++; } catch (error) { timings.push(this.config.thresholds.evolutionTime * 2); memoryDeltas.push(0); } // Longer delay for heavy operations await this.sleep(100); } return this.createBenchmarkResult(operationName, timings, memoryDeltas, successCount, samples); } /** * Benchmark concurrent operations */ async benchmarkConcurrentOperations(): Promise<BenchmarkResult> { const operationName = 'concurrent_operations'; const concurrencyLevels = [1, 2, 4, 8]; const allTimings: number[] = []; const allMemoryDeltas: number[] = []; let totalSuccessCount = 0; let totalSamples = 0; // eslint-disable-next-line no-console console.log(`šŸ“Š Benchmarking ${operationName}...`); for (const concurrency of concurrencyLevels) { const loadResult = await this.runConcurrentLoadTest(concurrency, 20); // Convert load test results to benchmark format const operationTime = loadResult.averageResponseTime; const operationsPerLevel = loadResult.totalOperations; for (let i = 0; i < operationsPerLevel; i++) { allTimings.push(operationTime); allMemoryDeltas.push(1024); // Estimated memory delta per operation } totalSuccessCount += loadResult.successfulOperations; totalSamples += loadResult.totalOperations; } return this.createBenchmarkResult(operationName, allTimings, allMemoryDeltas, totalSuccessCount, totalSamples); } /** * Run concurrent load test */ async runConcurrentLoadTest(concurrencyLevel: number, operationsPerLevel: number): Promise<ConcurrentLoadResult> { const startTime = Date.now(); const promises: Promise<boolean>[] = []; const responseTimes: number[] = []; // Create concurrent operations for (let i = 0; i < operationsPerLevel; i++) { const promise = this.runConcurrentOperation(i, responseTimes); promises.push(promise); // Stagger operations based on concurrency level if (i % concurrencyLevel === 0 && i > 0) { await this.sleep(10); } } // Wait for all operations to complete const results = await Promise.allSettled(promises); // Use results for calculation const successfulOperations = results.filter(r => r.status === 'fulfilled' && r.value).length; const failedOperations = operationsPerLevel - successfulOperations; const executionTime = Date.now() - startTime; const averageResponseTime = responseTimes.length > 0 ? responseTimes.reduce((sum, time) => sum + time, 0) / responseTimes.length : 0; const throughput = operationsPerLevel / (executionTime / 1000); // ops/sec const errorRate = failedOperations / operationsPerLevel; // Calculate resource contention (simplified metric) const resourceContention = Math.max(0, (concurrencyLevel - 1) * 0.1); return { concurrencyLevel, totalOperations: operationsPerLevel, successfulOperations, failedOperations, averageResponseTime, throughput, resourceContention, errorRate, }; } /** * Profile memory usage during benchmark execution */ async profileMemoryUsage(operation: () => Promise<void>): Promise<MemoryProfile> { return this.memoryProfiler.profile(operation); } /** * Detect performance regressions compared to baseline */ detectRegressions(baselineResults: ComprehensiveBenchmarkResult): RegressionAnalysis { const baselineMetrics = this.extractMetrics(baselineResults); const currentResults = this.getLatestBenchmarkResults(); if (!currentResults) { throw new Error('No current benchmark results available for regression analysis'); } const currentMetrics = this.extractMetrics(currentResults); const regressions: RegressionAnalysis['regressions'] = []; const improvements: RegressionAnalysis['improvements'] = []; for (const [metric, baselineValue] of Object.entries(baselineMetrics)) { const currentValue = currentMetrics[metric]; if (currentValue !== undefined) { const percentageChange = ((currentValue - baselineValue) / baselineValue) * 100; if (percentageChange > 10) { // Regression threshold regressions.push({ metric, baselineValue, currentValue, percentageChange, severity: percentageChange > 50 ? 'critical' : percentageChange > 25 ? 'major' : 'minor', }); } else if (percentageChange < -5) { // Improvement threshold improvements.push({ metric, baselineValue, currentValue, percentageChange: Math.abs(percentageChange), }); } } } const overallAssessment: RegressionAnalysis['overallAssessment'] = regressions.length > improvements.length ? 'degraded' : improvements.length > regressions.length ? 'improved' : 'stable'; return { baselineMetrics, currentMetrics, regressions, improvements, overallAssessment, }; } /** * Generate comprehensive performance report */ generatePerformanceReport(results: ComprehensiveBenchmarkResult): void { // eslint-disable-next-line no-console console.log('\nšŸ“Š Performance Benchmark Results'); // eslint-disable-next-line no-console console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'); // eslint-disable-next-line no-console console.log(`Overall Score: ${results.overallScore.toFixed(1)}/100`); // eslint-disable-next-line no-console console.log(`Total Execution Time: ${(results.executionTime / 1000).toFixed(2)}s`); // eslint-disable-next-line no-console console.log('\nšŸŽÆ Individual Benchmark Results:'); const benchmarks = [ results.trajectoryRecording, results.paretoQuery, results.memoryOperations, results.evolutionCycle, results.concurrentOperations, ]; for (const benchmark of benchmarks) { const status = benchmark.passesSLA ? 'āœ…' : 'āŒ'; // eslint-disable-next-line no-console console.log(`${status} ${benchmark.operationName}:`); // eslint-disable-next-line no-console console.log(` Average: ${benchmark.averageTime.toFixed(2)}ms`); // eslint-disable-next-line no-console console.log(` Throughput: ${benchmark.throughput.toFixed(1)} ops/sec`); // eslint-disable-next-line no-console console.log(` Success Rate: ${(benchmark.successRate * 100).toFixed(1)}%`); // eslint-disable-next-line no-console console.log(` Memory Delta: ${(benchmark.memoryDelta / 1024).toFixed(1)}KB avg`); } // eslint-disable-next-line no-console console.log('\nšŸ’¾ Resource Utilization:'); // eslint-disable-next-line no-console console.log(`CPU: ${results.resourceUtilization.cpu.toFixed(1)}%`); // eslint-disable-next-line no-console console.log(`Memory: ${results.resourceUtilization.memory.toFixed(1)}%`); // eslint-disable-next-line no-console console.log(`I/O: ${results.resourceUtilization.io.toFixed(1)}%`); } // Private helper methods /** * Create benchmark result from measurements */ private createBenchmarkResult( operationName: string, timings: number[], memoryDeltas: number[], successCount: number, totalSamples: number ): BenchmarkResult { const averageTime = timings.reduce((sum, time) => sum + time, 0) / timings.length; const minTime = Math.min(...timings); const maxTime = Math.max(...timings); const variance = timings.reduce((sum, time) => sum + Math.pow(time - averageTime, 2), 0) / timings.length; const standardDeviation = Math.sqrt(variance); const throughput = 1000 / averageTime; // operations per second const successRate = successCount / totalSamples; const averageMemoryDelta = memoryDeltas.reduce((sum, delta) => sum + delta, 0) / memoryDeltas.length; // Check if operation passes SLA const thresholdKey = operationName.replace('_', '') as keyof PerformanceThresholds; const threshold = this.config.thresholds[thresholdKey] || this.config.thresholds.memoryOperations; const passesSLA = averageTime <= threshold && successRate >= 0.95; return { operationName, averageTime, minTime, maxTime, standardDeviation, throughput, successRate, memoryDelta: averageMemoryDelta, samples: totalSamples, passesSLA, }; } /** * Calculate overall performance score */ private calculateOverallScore(benchmarks: BenchmarkResult[]): number { let totalScore = 0; let totalWeight = 0; const weights = { trajectory_recording: 1.5, pareto_frontier_query: 1.2, memory_operations: 1.0, evolution_cycle: 2.0, concurrent_operations: 1.3, }; for (const benchmark of benchmarks) { const weight = weights[benchmark.operationName as keyof typeof weights] || 1.0; let score = 0; // Base score from SLA compliance if (benchmark.passesSLA) { score += 70; } // Additional score from success rate score += benchmark.successRate * 20; // Additional score from throughput (relative) const throughputScore = Math.min(10, benchmark.throughput / 10); score += throughputScore; totalScore += score * weight; totalWeight += weight; } return totalWeight > 0 ? totalScore / totalWeight : 0; } /** * Extract metrics for regression analysis */ private extractMetrics(results: ComprehensiveBenchmarkResult): Record<string, number> { return { trajectoryRecordingTime: results.trajectoryRecording.averageTime, paretoQueryTime: results.paretoQuery.averageTime, memoryOperationTime: results.memoryOperations.averageTime, evolutionCycleTime: results.evolutionCycle.averageTime, concurrentOperationTime: results.concurrentOperations.averageTime, overallScore: results.overallScore, memoryUtilization: results.resourceUtilization.memory, cpuUtilization: results.resourceUtilization.cpu, }; } /** * Get latest benchmark results from history */ private getLatestBenchmarkResults(): ComprehensiveBenchmarkResult | null { // In a real implementation, this would retrieve from persistent storage return null; // Placeholder } /** * Store benchmark results for future comparison */ private storeBenchmarkResults(_results: ComprehensiveBenchmarkResult): void { // In a real implementation, this would store to persistent storage // For now, just store in memory const timestamp = new Date().toISOString(); // eslint-disable-next-line no-console console.log(`šŸ“ˆ Benchmark results stored with timestamp: ${timestamp}`); } /** * Run single concurrent operation */ private async runConcurrentOperation(index: number, responseTimes: number[]): Promise<boolean> { const startTime = performance.now(); try { // Simulate various operation types const operationType = index % 4; switch (operationType) { case 0: await this.simulateTrajectoryRecording(); break; case 1: await this.simulateParetoQuery(); break; case 2: await this.simulateMemoryOperation(); break; case 3: await this.simulateEvolutionCycle(); break; } responseTimes.push(performance.now() - startTime); return true; } catch (error) { responseTimes.push(performance.now() - startTime); return false; } } // Simulation methods for different operations private async simulateTrajectoryRecording(): Promise<void> { await this.sleep(Math.random() * 100 + 50); // 50-150ms // Simulate memory allocation for trajectory data new Array(1000).fill(0).map((_, i) => ({ step: i, data: 'test' })); await this.sleep(Math.random() * 50); // Additional processing time } private async simulateParetoQuery(): Promise<void> { await this.sleep(Math.random() * 50 + 20); // 20-70ms // Simulate query processing const results = new Array(100).fill(0).map(i => ({ id: i, score: Math.random() })); results.sort((a, b) => b.score - a.score); // Simulate sorting } private async simulateMemoryOperation(): Promise<void> { await this.sleep(Math.random() * 20 + 5); // 5-25ms // Simulate memory read/write const data = { timestamp: Date.now(), data: Math.random() }; JSON.stringify(data); // Simulate serialization } private async simulateEvolutionCycle(): Promise<void> { await this.sleep(Math.random() * 2000 + 1000); // 1-3 seconds // Simulate complex evolution operations for (let i = 0; i < 10; i++) { await this.sleep(Math.random() * 100 + 50); // Multiple sub-operations const candidates = new Array(20).fill(0).map(() => Math.random()); candidates.sort((a, b) => b - a); // Simulate selection } } private async setupParetoTestData(): Promise<void> { // Simulate setting up test data for Pareto queries await this.sleep(100); } private sleep(ms: number): Promise<void> { return new Promise(resolve => setTimeout(resolve, ms)); } } /** * Memory Profiler for tracking memory usage patterns */ class MemoryProfiler { async profile(operation: () => Promise<void>): Promise<MemoryProfile> { const initialMemory = process.memoryUsage(); let peakHeap = initialMemory.heapUsed; // Monitor memory during operation const memoryMonitor = setInterval(() => { const currentMemory = process.memoryUsage().heapUsed; if (currentMemory > peakHeap) { peakHeap = currentMemory; } }, 10); try { await operation(); } finally { clearInterval(memoryMonitor); } const finalMemory = process.memoryUsage(); return { initialHeap: initialMemory.heapUsed, peakHeap, finalHeap: finalMemory.heapUsed, totalAllocated: peakHeap - initialMemory.heapUsed, gcCycles: 0, // Would need GC monitoring in real implementation leakSuspected: (finalMemory.heapUsed - initialMemory.heapUsed) > (peakHeap - initialMemory.heapUsed) * 0.5, fragmentationLevel: Math.random() * 0.2, // Simulated }; } } /** * Resource Monitor for tracking CPU, memory, and I/O usage */ class ResourceMonitor { private startTime: number = 0; private initialMemory: NodeJS.MemoryUsage | null = null; start(): void { this.startTime = Date.now(); this.initialMemory = process.memoryUsage(); } async stop(): Promise<{ cpu: number; memory: number; io: number }> { const finalMemory = process.memoryUsage(); const executionTimeMs = Date.now() - this.startTime; // Calculate resource utilization metrics using actual measurements const memoryGrowth = this.initialMemory ? (finalMemory.heapUsed - this.initialMemory.heapUsed) / (1024 * 1024) : finalMemory.heapUsed / (1024 * 1024); // Simulate CPU based on execution time and memory growth (more realistic simulation) const cpu = Math.min(95, Math.max(10, (executionTimeMs / 1000) * 15 + (memoryGrowth * 2))); // Memory percentage based on actual heap usage vs available const memory = Math.min(95, (finalMemory.heapUsed / (1024 * 1024 * 500)) * 100); // Assume 500MB limit // I/O based on execution pattern const io = Math.min(70, Math.max(5, (executionTimeMs / 100) + Math.random() * 20)); return { cpu, memory, io }; } } // Export additional types and interfaces export type { BenchmarkResult, ConcurrentLoadResult, MemoryProfile, RegressionAnalysis, PerformanceThresholds, BenchmarkConfig };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sloth-wq/prompt-auto-optimizer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server