Skip to main content
Glama

Prompt Auto-Optimizer MCP

by sloth-wq
performance-optimization-techniques.md21.2 kB
# Performance Optimization Techniques ## Overview This guide covers advanced performance optimization techniques for GEPA systems, focusing on memory management, computational efficiency, and scalability considerations. ## Memory Optimization ### 1. Object Pooling Reduce garbage collection pressure by reusing objects: ```typescript class CandidatePool { private pool: PromptCandidate[] = []; private maxPoolSize = 100; acquire(): PromptCandidate { if (this.pool.length > 0) { return this.pool.pop()!; } return this.createNew(); } release(candidate: PromptCandidate): void { if (this.pool.length < this.maxPoolSize) { this.reset(candidate); this.pool.push(candidate); } } private reset(candidate: PromptCandidate): void { candidate.taskPerformance.clear(); candidate.averageScore = 0; candidate.rolloutCount = 0; candidate.lastEvaluated = new Date(); } private createNew(): PromptCandidate { return { id: this.generateId(), content: '', generation: 0, taskPerformance: new Map(), averageScore: 0, rolloutCount: 0, createdAt: new Date(), lastEvaluated: new Date(), mutationType: 'initial' }; } } ``` ### 2. Memory-Efficient Collections Use appropriate data structures for different access patterns: ```typescript class OptimizedParetoFrontier { // Use Set for fast membership testing private candidateIds = new Set<string>(); // Use Map for fast lookups private candidateMap = new Map<string, ParetoPoint>(); // Use Array for iteration (when needed) private sortedCandidates: ParetoPoint[] = []; private sortedDirty = true; addCandidate(candidate: PromptCandidate): boolean { if (this.candidateIds.has(candidate.id)) { return false; } const point = this.createPoint(candidate); // Check dominance using optimized structures if (this.isDominatedFast(point)) { return false; } // Remove dominated candidates this.removeDominated(point); // Add new candidate this.candidateIds.add(candidate.id); this.candidateMap.set(candidate.id, point); this.sortedDirty = true; return true; } private isDominatedFast(point: ParetoPoint): boolean { // Use spatial indexing for O(log n) dominance checking return this.spatialIndex.isDominated(point); } } ``` ### 3. Weak References for Caches Prevent memory leaks in caching systems: ```typescript class MemoryEfficientCache<K, V> { private cache = new Map<K, WeakRef<V>>(); private registry = new FinalizationRegistry<K>((key) => { this.cache.delete(key); }); set(key: K, value: V): void { const weakRef = new WeakRef(value); this.cache.set(key, weakRef); this.registry.register(value, key); } get(key: K): V | undefined { const weakRef = this.cache.get(key); if (!weakRef) return undefined; const value = weakRef.deref(); if (!value) { this.cache.delete(key); return undefined; } return value; } } ``` ## Computational Optimization ### 1. Batch Processing Process multiple items efficiently: ```typescript class BatchProcessor<T, R> { private queue: T[] = []; private batchSize: number; private processingTimeout: NodeJS.Timeout | null = null; constructor( private processor: (items: T[]) => Promise<R[]>, batchSize = 10, private maxWaitTime = 100 ) { this.batchSize = batchSize; } async process(item: T): Promise<R> { return new Promise((resolve, reject) => { this.queue.push({ item, resolve, reject }); this.scheduleBatch(); }); } private scheduleBatch(): void { if (this.queue.length >= this.batchSize) { this.processBatch(); } else if (!this.processingTimeout) { this.processingTimeout = setTimeout(() => { this.processBatch(); }, this.maxWaitTime); } } private async processBatch(): void { if (this.processingTimeout) { clearTimeout(this.processingTimeout); this.processingTimeout = null; } const batch = this.queue.splice(0, this.batchSize); if (batch.length === 0) return; try { const items = batch.map(b => b.item); const results = await this.processor(items); batch.forEach((b, index) => { b.resolve(results[index]); }); } catch (error) { batch.forEach(b => b.reject(error)); } } } ``` ### 2. Parallel Processing with Resource Management ```typescript class ResourceManagedParallelProcessor { private semaphore: Semaphore; private activeJobs = new Map<string, AbortController>(); constructor( private maxConcurrent: number, private memoryThreshold: number ) { this.semaphore = new Semaphore(maxConcurrent); } async processInParallel<T, R>( items: T[], processor: (item: T, signal: AbortSignal) => Promise<R> ): Promise<R[]> { const chunks = this.createAdaptiveChunks(items); const results: R[] = []; for (const chunk of chunks) { const chunkResults = await Promise.all( chunk.map(item => this.processWithResourceControl(item, processor)) ); results.push(...chunkResults); // Check memory pressure and adapt if (this.getMemoryUsage() > this.memoryThreshold) { await this.waitForMemoryRelief(); } } return results; } private async processWithResourceControl<T, R>( item: T, processor: (item: T, signal: AbortSignal) => Promise<R> ): Promise<R> { await this.semaphore.acquire(); const jobId = this.generateJobId(); const controller = new AbortController(); this.activeJobs.set(jobId, controller); try { return await processor(item, controller.signal); } finally { this.activeJobs.delete(jobId); this.semaphore.release(); } } private createAdaptiveChunks<T>(items: T[]): T[][] { const memoryUsage = this.getMemoryUsage(); const chunkSize = memoryUsage > this.memoryThreshold * 0.8 ? Math.max(1, Math.floor(this.maxConcurrent / 2)) : this.maxConcurrent; return this.chunkArray(items, chunkSize); } abortAll(): void { for (const controller of this.activeJobs.values()) { controller.abort(); } this.activeJobs.clear(); } } ``` ### 3. Caching with TTL and LRU ```typescript class OptimizedCache<K, V> { private cache = new Map<K, CacheEntry<V>>(); private accessOrder: K[] = []; private ttlTimeouts = new Map<K, NodeJS.Timeout>(); constructor( private maxSize: number, private defaultTtl: number ) {} set(key: K, value: V, ttl = this.defaultTtl): void { // Remove if exists this.delete(key); // Check size limit if (this.cache.size >= this.maxSize) { this.evictLRU(); } // Add new entry const entry: CacheEntry<V> = { value, timestamp: Date.now(), accessCount: 1 }; this.cache.set(key, entry); this.accessOrder.push(key); // Set TTL timeout if (ttl > 0) { const timeout = setTimeout(() => this.delete(key), ttl); this.ttlTimeouts.set(key, timeout); } } get(key: K): V | undefined { const entry = this.cache.get(key); if (!entry) return undefined; // Update access tracking entry.accessCount++; this.moveToEnd(key); return entry.value; } private evictLRU(): void { if (this.accessOrder.length === 0) return; const lruKey = this.accessOrder[0]; this.delete(lruKey); } private moveToEnd(key: K): void { const index = this.accessOrder.indexOf(key); if (index > -1) { this.accessOrder.splice(index, 1); this.accessOrder.push(key); } } } ``` ## Algorithm Optimization ### 1. Efficient Dominance Checking ```typescript class SpatialDominanceIndex { private kdTree: KDTree<ParetoPoint>; constructor(objectives: ParetoObjective[]) { this.kdTree = new KDTree(objectives.length, this.comparePoints.bind(this)); } addPoint(point: ParetoPoint): void { this.kdTree.insert(point); } isDominated(point: ParetoPoint): boolean { // Use spatial queries to check only nearby points const candidates = this.kdTree.nearestNeighbors(point, 50); return candidates.some(candidate => this.dominates(candidate, point)); } findDominated(point: ParetoPoint): ParetoPoint[] { // Use range query to find potentially dominated points const range = this.createDominanceRange(point); const candidates = this.kdTree.rangeQuery(range); return candidates.filter(candidate => this.dominates(point, candidate)); } private createDominanceRange(point: ParetoPoint): Range { // Create a spatial range that covers all potentially dominated points return { min: this.createMinBounds(point), max: this.createMaxBounds(point) }; } } ``` ### 2. Optimized Hypervolume Calculation ```typescript class HypervolumeCalculator { // Use incremental calculation for large frontiers calculateIncremental( previousHypervolume: number, newPoint: ParetoPoint, removedPoints: ParetoPoint[], referencePoint: Map<string, number> ): number { let hypervolume = previousHypervolume; // Subtract contribution of removed points for (const removed of removedPoints) { hypervolume -= this.pointContribution(removed, referencePoint); } // Add contribution of new point hypervolume += this.pointContribution(newPoint, referencePoint); return hypervolume; } // Use Monte Carlo for high-dimensional spaces calculateMonteCarlo( frontier: ParetoPoint[], referencePoint: Map<string, number>, sampleCount = 10000 ): number { if (frontier.length === 0) return 0; const bounds = this.calculateBounds(frontier, referencePoint); const totalVolume = this.calculateTotalVolume(bounds); let dominatedSamples = 0; for (let i = 0; i < sampleCount; i++) { const randomPoint = this.generateRandomPoint(bounds); if (this.isPointDominated(randomPoint, frontier)) { dominatedSamples++; } } return (dominatedSamples / sampleCount) * totalVolume; } } ``` ## Database and Storage Optimization ### 1. Efficient Trajectory Storage ```typescript class OptimizedTrajectoryStore implements TrajectoryStore { private connectionPool: ConnectionPool; private writeBuffer: TrajectoryBuffer; private indexManager: IndexManager; constructor(config: StorageConfig) { this.connectionPool = new ConnectionPool(config); this.writeBuffer = new TrajectoryBuffer(config.bufferSize); this.indexManager = new IndexManager(); } async save(trajectory: ExecutionTrajectory): Promise<void> { // Use write buffer for high-throughput scenarios this.writeBuffer.add(trajectory); if (this.writeBuffer.isFull()) { await this.flushBuffer(); } } async query(filter: TrajectoryFilter): Promise<ExecutionTrajectory[]> { // Use optimal query strategy based on filter const queryPlan = this.planQuery(filter); switch (queryPlan.strategy) { case 'index': return this.queryByIndex(filter, queryPlan.index); case 'cache': return this.queryFromCache(filter); case 'full-scan': return this.fullScanQuery(filter); } } private async flushBuffer(): Promise<void> { const trajectories = this.writeBuffer.flush(); // Batch insert for efficiency await this.batchInsert(trajectories); // Update indexes asynchronously setImmediate(() => this.updateIndexes(trajectories)); } private planQuery(filter: TrajectoryFilter): QueryPlan { // Analyze filter to determine optimal query strategy if (filter.promptId && this.indexManager.hasIndex('promptId')) { return { strategy: 'index', index: 'promptId' }; } if (this.isSimpleFilter(filter) && this.cacheManager.canSatisfy(filter)) { return { strategy: 'cache' }; } return { strategy: 'full-scan' }; } } ``` ### 2. Partitioned Storage ```typescript class PartitionedStorage { private partitions = new Map<string, StoragePartition>(); constructor(private partitionStrategy: PartitionStrategy) {} async store(trajectory: ExecutionTrajectory): Promise<void> { const partitionKey = this.partitionStrategy.getPartition(trajectory); const partition = this.getOrCreatePartition(partitionKey); await partition.store(trajectory); } async query(filter: TrajectoryFilter): Promise<ExecutionTrajectory[]> { const relevantPartitions = this.partitionStrategy.getRelevantPartitions(filter); // Query partitions in parallel const partitionResults = await Promise.all( relevantPartitions.map(partition => partition.query(filter)) ); return partitionResults.flat(); } // Time-based partitioning strategy static createTimeBasedStrategy(intervalMs: number): PartitionStrategy { return { getPartition: (trajectory) => { const timeSlot = Math.floor(trajectory.startTime / intervalMs); return `time_${timeSlot}`; }, getRelevantPartitions: (filter) => { if (!filter.timeRange) return Array.from(this.partitions.values()); const startSlot = Math.floor(filter.timeRange.start / intervalMs); const endSlot = Math.floor(filter.timeRange.end / intervalMs); const partitions = []; for (let slot = startSlot; slot <= endSlot; slot++) { const partition = this.partitions.get(`time_${slot}`); if (partition) partitions.push(partition); } return partitions; } }; } } ``` ## Network and I/O Optimization ### 1. Connection Pooling ```typescript class OptimizedLLMAdapter extends LLMAdapter { private connectionPool: ConnectionPool; private requestQueue = new PriorityQueue<LLMRequest>(); private rateLimiter: RateLimiter; constructor(config: LLMConfig) { super(config); this.connectionPool = new ConnectionPool({ maxConnections: config.maxConnections || 10, connectionTimeout: config.connectionTimeout || 30000, keepAlive: true }); this.rateLimiter = new RateLimiter({ requestsPerSecond: config.rateLimit || 50, burstSize: config.burstSize || 100 }); } async callLLM(prompt: string, options?: LLMOptions): Promise<LLMResponse> { // Wait for rate limit await this.rateLimiter.waitForToken(); // Get connection from pool const connection = await this.connectionPool.acquire(); try { return await this.executeRequest(connection, prompt, options); } finally { this.connectionPool.release(connection); } } // Batch multiple requests for efficiency async callLLMBatch(requests: LLMRequest[]): Promise<LLMResponse[]> { const chunks = this.chunkRequests(requests, this.connectionPool.size); const results: LLMResponse[] = []; for (const chunk of chunks) { const chunkResults = await Promise.all( chunk.map(request => this.callLLM(request.prompt, request.options)) ); results.push(...chunkResults); } return results; } } ``` ### 2. Request Deduplication ```typescript class DeduplicatingLLMAdapter { private pendingRequests = new Map<string, Promise<LLMResponse>>(); private responseCache = new Map<string, LLMResponse>(); async callLLM(prompt: string, options?: LLMOptions): Promise<LLMResponse> { const requestKey = this.createRequestKey(prompt, options); // Check cache first const cached = this.responseCache.get(requestKey); if (cached && !this.isExpired(cached)) { return cached; } // Check if request is already pending const pending = this.pendingRequests.get(requestKey); if (pending) { return pending; } // Create new request const requestPromise = this.executeRequest(prompt, options); this.pendingRequests.set(requestKey, requestPromise); try { const response = await requestPromise; this.responseCache.set(requestKey, response); return response; } finally { this.pendingRequests.delete(requestKey); } } private createRequestKey(prompt: string, options?: LLMOptions): string { const optionsHash = options ? this.hashObject(options) : ''; return `${this.hashString(prompt)}-${optionsHash}`; } } ``` ## Monitoring and Profiling ### 1. Performance Monitoring ```typescript class PerformanceMonitor { private metrics = new Map<string, PerformanceMetric[]>(); private alerts = new Map<string, AlertConfig>(); startTiming(operation: string): PerformanceTimer { return new PerformanceTimer(operation, this.recordMetric.bind(this)); } recordMetric(metric: PerformanceMetric): void { const operationMetrics = this.metrics.get(metric.operation) || []; operationMetrics.push(metric); // Keep only recent metrics if (operationMetrics.length > 1000) { operationMetrics.splice(0, operationMetrics.length - 1000); } this.metrics.set(metric.operation, operationMetrics); // Check alerts this.checkAlerts(metric); } getPerformanceStats(operation: string): PerformanceStats { const metrics = this.metrics.get(operation) || []; if (metrics.length === 0) { return { count: 0, mean: 0, p95: 0, p99: 0 }; } const durations = metrics.map(m => m.duration).sort((a, b) => a - b); return { count: metrics.length, mean: durations.reduce((sum, d) => sum + d, 0) / durations.length, p95: this.percentile(durations, 0.95), p99: this.percentile(durations, 0.99), errorRate: metrics.filter(m => m.error).length / metrics.length }; } private checkAlerts(metric: PerformanceMetric): void { const alert = this.alerts.get(metric.operation); if (!alert) return; if (metric.duration > alert.maxDuration) { this.triggerAlert('slow_operation', { operation: metric.operation, duration: metric.duration, threshold: alert.maxDuration }); } if (metric.memoryUsage && metric.memoryUsage > alert.maxMemory) { this.triggerAlert('high_memory', { operation: metric.operation, memoryUsage: metric.memoryUsage, threshold: alert.maxMemory }); } } } ``` ### 2. Memory Profiling ```typescript class MemoryProfiler { private snapshots: MemorySnapshot[] = []; private gcObserver: PerformanceObserver; constructor() { this.gcObserver = new PerformanceObserver((list) => { for (const entry of list.getEntries()) { this.recordGCEvent(entry); } }); this.gcObserver.observe({ entryTypes: ['gc'] }); } takeSnapshot(label: string): MemorySnapshot { const snapshot: MemorySnapshot = { label, timestamp: Date.now(), heapUsed: process.memoryUsage().heapUsed, heapTotal: process.memoryUsage().heapTotal, external: process.memoryUsage().external, rss: process.memoryUsage().rss, objectCounts: this.getObjectCounts() }; this.snapshots.push(snapshot); // Keep only recent snapshots if (this.snapshots.length > 100) { this.snapshots.shift(); } return snapshot; } detectLeaks(): LeakDetectionResult[] { if (this.snapshots.length < 10) { return []; } const recentSnapshots = this.snapshots.slice(-10); const leaks: LeakDetectionResult[] = []; // Analyze object count trends const objectTypes = new Set<string>(); recentSnapshots.forEach(snapshot => { Object.keys(snapshot.objectCounts).forEach(type => objectTypes.add(type)); }); for (const objectType of objectTypes) { const counts = recentSnapshots.map(s => s.objectCounts[objectType] || 0); const trend = this.calculateTrend(counts); if (trend.slope > 5 && trend.correlation > 0.8) { leaks.push({ objectType, trend, severity: this.calculateSeverity(trend), recommendation: this.generateRecommendation(objectType, trend) }); } } return leaks; } private getObjectCounts(): Record<string, number> { // Simplified object counting - in practice would use heap profiling return { 'PromptCandidate': this.estimateObjectCount('PromptCandidate'), 'ExecutionTrajectory': this.estimateObjectCount('ExecutionTrajectory'), 'ParetoPoint': this.estimateObjectCount('ParetoPoint'), 'Map': this.estimateObjectCount('Map'), 'Array': this.estimateObjectCount('Array') }; } } ``` This comprehensive performance optimization guide provides the foundation for building high-performance GEPA systems that can scale effectively while maintaining optimal resource utilization.

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sloth-wq/prompt-auto-optimizer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server