Skip to main content
Glama

documcp

by tosin2013
pruning.ts24.1 kB
/** * Memory Pruning & Optimization System for DocuMCP * Intelligent memory cleanup, storage optimization, and performance tuning */ import { EventEmitter } from 'events'; import { MemoryEntry, JSONLStorage } from './storage.js'; import { MemoryManager } from './manager.js'; import { IncrementalLearningSystem } from './learning.js'; import { KnowledgeGraph } from './knowledge-graph.js'; export interface PruningPolicy { maxAge: number; // Maximum age in days maxSize: number; // Maximum storage size in MB maxEntries: number; // Maximum number of entries preservePatterns: string[]; // Pattern types to preserve compressionThreshold: number; // Compress entries older than X days redundancyThreshold: number; // Remove similar entries with similarity > X } export interface OptimizationMetrics { totalEntries: number; storageSize: number; indexSize: number; compressionRatio: number; duplicatesRemoved: number; entriesPruned: number; performanceGain: number; lastOptimization: Date; } export interface PruningResult { entriesRemoved: number; spaceSaved: number; patternsPreserved: number; compressionApplied: number; optimizationApplied: boolean; metrics: OptimizationMetrics; } export interface CompressionStrategy { type: 'gzip' | 'lz4' | 'semantic'; threshold: number; ratio: number; } export interface RedundancyPattern { similarity: number; count: number; representative: string; duplicates: string[]; canMerge: boolean; } export class MemoryPruningSystem extends EventEmitter { private storage: JSONLStorage; private manager: MemoryManager; private learningSystem: IncrementalLearningSystem; private knowledgeGraph: KnowledgeGraph; private defaultPolicy: PruningPolicy; private compressionCache: Map<string, any>; private similarityCache: Map<string, Map<string, number>>; constructor( storage: JSONLStorage, manager: MemoryManager, learningSystem: IncrementalLearningSystem, knowledgeGraph: KnowledgeGraph, ) { super(); this.storage = storage; this.manager = manager; this.learningSystem = learningSystem; this.knowledgeGraph = knowledgeGraph; this.compressionCache = new Map(); this.similarityCache = new Map(); this.defaultPolicy = { maxAge: 180, // 6 months maxSize: 500, // 500MB maxEntries: 50000, preservePatterns: ['successful_deployment', 'user_preference', 'critical_error'], compressionThreshold: 30, // Compress after 30 days redundancyThreshold: 0.85, // 85% similarity threshold }; this.setupPeriodicCleanup(); } /** * Execute comprehensive memory pruning */ async prune(policy?: Partial<PruningPolicy>): Promise<PruningResult> { const activePolicy = { ...this.defaultPolicy, ...policy }; const startTime = Date.now(); this.emit('pruning_started', { policy: activePolicy }); try { // Get current metrics const initialMetrics = await this.getOptimizationMetrics(); // Phase 1: Remove aged entries const agedResult = await this.removeAgedEntries(activePolicy); // Phase 2: Apply size-based pruning const sizeResult = await this.applySizePruning(activePolicy); // Phase 3: Remove redundant entries const redundancyResult = await this.removeRedundantEntries(activePolicy); // Phase 4: Apply compression const compressionResult = await this.applyCompression(activePolicy); // Phase 5: Optimize storage structure const optimizationResult = await this.optimizeStorage(); // Get final metrics const finalMetrics = await this.getOptimizationMetrics(); const result: PruningResult = { entriesRemoved: agedResult.removed + sizeResult.removed + redundancyResult.removed, spaceSaved: initialMetrics.storageSize - finalMetrics.storageSize, patternsPreserved: agedResult.preserved + sizeResult.preserved, compressionApplied: compressionResult.compressed, optimizationApplied: optimizationResult.applied, metrics: finalMetrics, }; // Update learning system with pruning results await this.updateLearningFromPruning(result); this.emit('pruning_completed', { result, duration: Date.now() - startTime, }); return result; } catch (error) { this.emit('pruning_error', { error: error instanceof Error ? error.message : String(error) }); throw error; } } /** * Remove entries older than policy threshold */ private async removeAgedEntries( policy: PruningPolicy, ): Promise<{ removed: number; preserved: number }> { const cutoffDate = new Date(Date.now() - policy.maxAge * 24 * 60 * 60 * 1000); const allEntries = await this.storage.getAll(); let removed = 0; let preserved = 0; for (const entry of allEntries) { const entryDate = new Date(entry.timestamp); if (entryDate < cutoffDate) { // Check if entry should be preserved if (this.shouldPreserveEntry(entry, policy)) { preserved++; continue; } // Remove from storage await this.storage.delete(entry.id); // Remove from knowledge graph await this.knowledgeGraph.removeNode(entry.id); removed++; } } return { removed, preserved }; } /** * Apply size-based pruning to stay within limits */ private async applySizePruning( policy: PruningPolicy, ): Promise<{ removed: number; preserved: number }> { const metrics = await this.getOptimizationMetrics(); if (metrics.storageSize <= policy.maxSize && metrics.totalEntries <= policy.maxEntries) { return { removed: 0, preserved: 0 }; } // Get entries sorted by importance score const allEntries = await this.storage.getAll(); const scoredEntries = await Promise.all( allEntries.map(async (entry) => ({ entry, score: await this.calculateImportanceScore(entry), })), ); // Sort by score (ascending - remove least important first) scoredEntries.sort((a, b) => a.score - b.score); let removed = 0; let preserved = 0; let currentSize = metrics.storageSize; let currentEntries = metrics.totalEntries; for (const { entry, score } of scoredEntries) { if (currentSize <= policy.maxSize && currentEntries <= policy.maxEntries) { break; } if (this.shouldPreserveEntry(entry, policy) || score > 0.8) { preserved++; continue; } // Remove entry await this.storage.delete(entry.id); await this.knowledgeGraph.removeNode(entry.id); // Estimate size reduction (rough approximation) const entrySize = JSON.stringify(entry).length / (1024 * 1024); currentSize -= entrySize; currentEntries--; removed++; } return { removed, preserved }; } /** * Remove redundant and duplicate entries */ private async removeRedundantEntries( policy: PruningPolicy, ): Promise<{ removed: number; merged: number }> { const redundantPatterns = await this.findRedundantPatterns(policy.redundancyThreshold); let removed = 0; let merged = 0; for (const pattern of redundantPatterns) { if (pattern.canMerge && pattern.duplicates.length > 1) { // Keep the representative, remove duplicates for (let i = 1; i < pattern.duplicates.length; i++) { await this.storage.delete(pattern.duplicates[i]); removed++; } // Optionally merge information into representative if (pattern.count > 2) { await this.mergeRedundantEntries(pattern.representative, pattern.duplicates.slice(1)); merged++; } } } return { removed, merged }; } /** * Apply compression to old entries */ private async applyCompression( policy: PruningPolicy, ): Promise<{ compressed: number; spaceSaved: number }> { const cutoffDate = new Date(Date.now() - policy.compressionThreshold * 24 * 60 * 60 * 1000); const allEntries = await this.storage.getAll(); let compressed = 0; let spaceSaved = 0; for (const entry of allEntries) { const entryDate = new Date(entry.timestamp); if (entryDate < cutoffDate && !this.isCompressed(entry)) { const originalSize = JSON.stringify(entry).length; const compressedEntry = await this.compressEntry(entry); const compressedSize = JSON.stringify(compressedEntry).length; await this.storage.update(entry.id, compressedEntry); compressed++; spaceSaved += originalSize - compressedSize; } } return { compressed, spaceSaved }; } /** * Optimize storage structure and indices */ private async optimizeStorage(): Promise<{ applied: boolean; improvements: string[] }> { const improvements: string[] = []; try { // Rebuild indices await this.storage.rebuildIndex(); improvements.push('rebuilt_indices'); // Defragment storage files await this.defragmentStorage(); improvements.push('defragmented_storage'); // Optimize cache sizes this.optimizeCaches(); improvements.push('optimized_caches'); return { applied: true, improvements }; } catch (error) { return { applied: false, improvements }; } } /** * Calculate importance score for an entry */ private async calculateImportanceScore(entry: MemoryEntry): Promise<number> { let score = 0; // Recency score (0-0.3) const age = Date.now() - new Date(entry.timestamp).getTime(); const maxAge = 180 * 24 * 60 * 60 * 1000; // 180 days score += Math.max(0, 1 - age / maxAge) * 0.3; // Type importance (0-0.2) const typeScores: Record<string, number> = { successful_deployment: 0.2, user_preference: 0.18, configuration: 0.15, analysis: 0.12, recommendation: 0.12, interaction: 0.08, error: 0.05, }; score += typeScores[entry.type] || 0.05; // Learning value (0-0.2) const patterns = await this.learningSystem.getPatterns(); const relevantPatterns = patterns.filter( (p) => p.metadata.technologies?.includes(entry.data.language) || p.metadata.technologies?.includes(entry.data.framework), ); score += Math.min(0.2, relevantPatterns.length * 0.05); // Knowledge graph centrality (0-0.15) try { const connections = await this.knowledgeGraph.getConnections(entry.id); score += Math.min(0.15, connections.length * 0.02); } catch { // Node might not exist in graph } // Success indicator (0-0.15) if (entry.data.outcome === 'success' || entry.data.success === true) { score += 0.15; } return Math.min(1, score); } /** * Check if entry should be preserved based on policy */ private shouldPreserveEntry(entry: MemoryEntry, policy: PruningPolicy): boolean { // Check preserve patterns for (const pattern of policy.preservePatterns) { if (entry.type.includes(pattern) || JSON.stringify(entry.data).includes(pattern)) { return true; } } // Preserve high-value entries if ( entry.data.outcome === 'success' || entry.data.success === true || entry.data.critical === true ) { return true; } return false; } /** * Find patterns of redundant entries */ private async findRedundantPatterns(threshold: number): Promise<RedundancyPattern[]> { const allEntries = await this.storage.getAll(); const patterns: RedundancyPattern[] = []; const processed = new Set<string>(); for (const entry of allEntries) { if (processed.has(entry.id)) continue; const similar = await this.findSimilarEntries(entry, allEntries, threshold); if (similar.length > 1) { patterns.push({ similarity: threshold, count: similar.length, representative: similar[0].id, duplicates: similar.map((e) => e.id), canMerge: this.canMergeEntries(similar), }); similar.forEach((s) => processed.add(s.id)); } } return patterns; } /** * Find entries similar to given entry */ private async findSimilarEntries( target: MemoryEntry, entries: MemoryEntry[], threshold: number, ): Promise<MemoryEntry[]> { const similar: MemoryEntry[] = [target]; for (const entry of entries) { if (entry.id === target.id) continue; const similarity = await this.calculateSimilarity(target, entry); if (similarity >= threshold) { similar.push(entry); } } return similar; } /** * Calculate similarity between two entries */ private async calculateSimilarity(entry1: MemoryEntry, entry2: MemoryEntry): Promise<number> { // Check cache first if ( this.similarityCache.has(entry1.id) && this.similarityCache.get(entry1.id)?.has(entry2.id) ) { return this.similarityCache.get(entry1.id)!.get(entry2.id)!; } let similarity = 0; // Type similarity (0-0.3) if (entry1.type === entry2.type) { similarity += 0.3; } // Temporal similarity (0-0.2) const timeDiff = Math.abs( new Date(entry1.timestamp).getTime() - new Date(entry2.timestamp).getTime(), ); const maxTimeDiff = 7 * 24 * 60 * 60 * 1000; // 7 days similarity += Math.max(0, 1 - timeDiff / maxTimeDiff) * 0.2; // Data similarity (0-0.5) const dataSimilarity = this.calculateDataSimilarity(entry1.data, entry2.data); similarity += dataSimilarity * 0.5; // Cache result if (!this.similarityCache.has(entry1.id)) { this.similarityCache.set(entry1.id, new Map()); } this.similarityCache.get(entry1.id)!.set(entry2.id, similarity); return similarity; } /** * Calculate similarity between data objects */ private calculateDataSimilarity(data1: any, data2: any): number { const keys1 = new Set(Object.keys(data1)); const keys2 = new Set(Object.keys(data2)); const allKeys = new Set([...keys1, ...keys2]); let matches = 0; let total = 0; for (const key of allKeys) { total++; if (keys1.has(key) && keys2.has(key)) { if (data1[key] === data2[key]) { matches++; } else if (typeof data1[key] === 'string' && typeof data2[key] === 'string') { // String similarity for text fields const stringSim = this.calculateStringSimilarity(data1[key], data2[key]); matches += stringSim; } } } return total > 0 ? matches / total : 0; } /** * Calculate string similarity (simple Jaccard similarity) */ private calculateStringSimilarity(str1: string, str2: string): number { const words1 = new Set(str1.toLowerCase().split(/\s+/)); const words2 = new Set(str2.toLowerCase().split(/\s+/)); const intersection = new Set([...words1].filter((w) => words2.has(w))); const union = new Set([...words1, ...words2]); return union.size > 0 ? intersection.size / union.size : 0; } /** * Check if entries can be safely merged */ private canMergeEntries(entries: MemoryEntry[]): boolean { if (entries.length < 2) return false; // All entries must have the same type const firstType = entries[0].type; if (!entries.every((e) => e.type === firstType)) { return false; } // Check for conflicting data const firstData = entries[0].data; for (const entry of entries.slice(1)) { if (this.hasConflictingData(firstData, entry.data)) { return false; } } return true; } /** * Check for conflicting data between entries */ private hasConflictingData(data1: any, data2: any): boolean { for (const key of Object.keys(data1)) { if (key in data2 && data1[key] !== data2[key]) { // Special handling for arrays and objects if (Array.isArray(data1[key]) && Array.isArray(data2[key])) { continue; // Arrays can be merged } if (typeof data1[key] === 'object' && typeof data2[key] === 'object') { continue; // Objects can be merged } return true; // Conflicting primitive values } } return false; } /** * Merge redundant entries into representative */ private async mergeRedundantEntries( representativeId: string, duplicateIds: string[], ): Promise<void> { const representative = await this.storage.get(representativeId); if (!representative) return; const duplicates = await Promise.all(duplicateIds.map((id) => this.storage.get(id))); // Merge data from duplicates const mergedData = { ...representative.data }; for (const duplicate of duplicates) { if (!duplicate) continue; // Merge arrays for (const [key, value] of Object.entries(duplicate.data)) { if (Array.isArray(value) && Array.isArray(mergedData[key])) { mergedData[key] = [...new Set([...mergedData[key], ...value])]; } else if (typeof value === 'object' && typeof mergedData[key] === 'object') { mergedData[key] = { ...mergedData[key], ...value }; } else if (!(key in mergedData)) { mergedData[key] = value; } } } // Update representative with merged data await this.storage.update(representativeId, { ...representative, data: mergedData, metadata: { ...representative.metadata, merged: true, mergedCount: duplicateIds.length, mergedAt: new Date().toISOString(), }, }); } /** * Check if entry is already compressed */ private isCompressed(entry: MemoryEntry): boolean { return Boolean(entry.metadata?.compressed); } /** * Compress entry data */ private async compressEntry(entry: MemoryEntry): Promise<MemoryEntry> { // Simple compression - in production, use actual compression library const compressedData = this.simpleCompress(entry.data); return { ...entry, data: compressedData, metadata: { ...entry.metadata, compressed: true, compressionType: 'simple', compressedAt: new Date().toISOString(), originalSize: JSON.stringify(entry.data).length, }, }; } /** * Simple compression simulation */ private simpleCompress(data: any): any { // This is a placeholder - in production, use proper compression const stringified = JSON.stringify(data); const compressed = stringified.replace(/\s+/g, ' ').trim(); return { _compressed: true, _data: compressed, _type: 'simple', }; } /** * Defragment storage files */ private async defragmentStorage(): Promise<void> { // Rebuild storage with optimal layout const allEntries = await this.storage.getAll(); // Sort entries for optimal access patterns allEntries.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()); // This would typically rewrite storage files // For now, just trigger a rebuild await this.storage.rebuildIndex(); } /** * Optimize cache sizes based on usage patterns */ private optimizeCaches(): void { // Clear old cache entries // Clear similarity cache entries older than 24 hours for (const [key1, innerMap] of this.similarityCache.entries()) { for (const [key2] of innerMap.entries()) { // Simple heuristic - remove if keys suggest old timestamps if (Math.random() < 0.1) { // 10% chance to clear each entry innerMap.delete(key2); } } if (innerMap.size === 0) { this.similarityCache.delete(key1); } } // Limit cache sizes if (this.compressionCache.size > 10000) { const entries = Array.from(this.compressionCache.entries()); this.compressionCache.clear(); // Keep only the most recent 5000 entries entries.slice(-5000).forEach(([key, value]) => { this.compressionCache.set(key, value); }); } } /** * Get comprehensive optimization metrics */ async getOptimizationMetrics(): Promise<OptimizationMetrics> { const allEntries = await this.storage.getAll(); const totalEntries = allEntries.length; // Calculate storage size (approximate) const storageSize = allEntries.reduce((total, entry) => { return total + JSON.stringify(entry).length; }, 0) / (1024 * 1024); // Convert to MB // Calculate index size (approximate) const indexSize = (totalEntries * 100) / (1024 * 1024); // Rough estimate // Calculate compression ratio const compressedEntries = allEntries.filter((e) => this.isCompressed(e)); const compressionRatio = compressedEntries.length / totalEntries; return { totalEntries, storageSize, indexSize, compressionRatio, duplicatesRemoved: 0, // Would be tracked during runtime entriesPruned: 0, // Would be tracked during runtime performanceGain: 0, // Would be calculated based on before/after metrics lastOptimization: new Date(), }; } /** * Update learning system based on pruning results */ private async updateLearningFromPruning(result: PruningResult): Promise<void> { // Create a learning entry about pruning effectiveness const pruningLearning = { action: 'memory_pruning', outcome: result.spaceSaved > 0 ? 'success' : 'neutral', metrics: { entriesRemoved: result.entriesRemoved, spaceSaved: result.spaceSaved, patternsPreserved: result.patternsPreserved, }, timestamp: new Date().toISOString(), }; // This would integrate with the learning system // For now, just emit an event this.emit('learning_update', pruningLearning); } /** * Setup periodic cleanup */ private setupPeriodicCleanup(): void { // Run optimization every 24 hours setInterval( async () => { try { await this.prune(); this.emit('periodic_cleanup_completed'); } catch (error) { this.emit('periodic_cleanup_error', { error: error instanceof Error ? error.message : String(error), }); } }, 24 * 60 * 60 * 1000, ); } /** * Get pruning recommendations */ async getPruningRecommendations(): Promise<{ shouldPrune: boolean; reasons: string[]; estimatedSavings: number; recommendedPolicy: Partial<PruningPolicy>; }> { const metrics = await this.getOptimizationMetrics(); const reasons: string[] = []; let shouldPrune = false; let estimatedSavings = 0; // Check storage size if (metrics.storageSize > this.defaultPolicy.maxSize * 0.8) { shouldPrune = true; reasons.push(`Storage size (${metrics.storageSize.toFixed(2)}MB) approaching limit`); estimatedSavings += metrics.storageSize * 0.2; } // Check entry count if (metrics.totalEntries > this.defaultPolicy.maxEntries * 0.8) { shouldPrune = true; reasons.push(`Entry count (${metrics.totalEntries}) approaching limit`); } // Check compression ratio if (metrics.compressionRatio < 0.3) { reasons.push('Low compression ratio indicates optimization opportunity'); estimatedSavings += metrics.storageSize * 0.15; } // Time-based recommendation const daysSinceLastOptimization = (Date.now() - metrics.lastOptimization.getTime()) / (24 * 60 * 60 * 1000); if (daysSinceLastOptimization > 7) { shouldPrune = true; reasons.push('Regular maintenance window (weekly optimization)'); } return { shouldPrune, reasons, estimatedSavings, recommendedPolicy: { maxAge: Math.max(30, this.defaultPolicy.maxAge - 30), // More aggressive if needed compressionThreshold: Math.max(7, this.defaultPolicy.compressionThreshold - 7), }, }; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/documcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server