Skip to main content
Glama
memory-optimizer.js10.1 kB
import { encoding_for_model } from "tiktoken"; export class MemoryOptimizer { tokenizer; config; // Technical abbreviations for memory storage - CONSERVATIVE to preserve searchability abbreviations = { // Only abbreviate very long technical terms that are unambiguous configuration: "config", implementation: "impl", application: "app", environment: "env", development: "dev", production: "prod", repository: "repo", documentation: "docs", requirements: "reqs", specification: "spec", // REMOVED: authentication, authorization, database, interface, component, function, parameter, variable // These are commonly searched terms and should be preserved performance: "perf", optimization: "opt", management: "mgmt", information: "info", technology: "tech", framework: "fw", library: "lib", service: "svc", server: "srv", client: "cli", request: "req", response: "resp", message: "msg", session: "sess", transaction: "txn", operation: "op", process: "proc", system: "sys", network: "net", security: "sec", encryption: "enc", validation: "val", }; // Filler words to remove for space efficiency fillerWords = new Set([ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "very", "quite", "really", "just", "only", "also", "even", "still", "rather", "pretty", "fairly", "somewhat", "basically", "actually", "literally", "obviously", "clearly", "essentially", "generally", "typically", "usually", "normally", "please", "thank", "thanks", "hello", "hi", "hey", "well", "um", "uh", "oh", ]); constructor(config = {}) { this.config = { compressionLevel: "aggressive", // Default to aggressive for better compression extractKeywords: true, extractEntities: true, ...config, }; try { this.tokenizer = encoding_for_model("gpt-4"); } catch (error) { console.warn("Failed to initialize tokenizer:", error); this.tokenizer = null; } } /** * Optimize text for memory storage */ optimize(text) { const originalTokenCount = this.countTokens(text); const keywords = this.config.extractKeywords ? this.extractKeywords(text) : []; const entities = this.config.extractEntities ? this.extractEntities(text) : []; let optimized = text; // Apply compression based on level switch (this.config.compressionLevel) { case "minimal": optimized = this.minimalCompression(text); break; case "balanced": optimized = this.balancedCompression(text); break; case "aggressive": optimized = this.aggressiveCompression(text); break; } const optimizedTokenCount = this.countTokens(optimized); const compressionRatio = originalTokenCount > 0 ? optimizedTokenCount / originalTokenCount : 1; return { original: text, optimized, tokenCount: optimizedTokenCount, originalTokenCount, compressionRatio, keywords, entities, }; } countTokens(text) { if (!this.tokenizer) { // Fallback: rough estimation return Math.ceil(text.length / 4); } try { return this.tokenizer.encode(text).length; } catch { return Math.ceil(text.length / 4); } } minimalCompression(text) { return text .replace(/\s+/g, " ") // Multiple spaces to single .replace(/\n\s*\n/g, "\n") // Multiple newlines to single .trim(); } balancedCompression(text) { let result = this.minimalCompression(text); // Apply technical abbreviations for (const [full, abbrev] of Object.entries(this.abbreviations)) { const regex = new RegExp(`\\b${full}\\b`, "gi"); result = result.replace(regex, abbrev); } // Remove some common filler words but preserve meaning const words = result.split(/\s+/); const filtered = words.filter((word, index) => { const cleanWord = word.toLowerCase().replace(/[^\w]/g, ""); // Keep filler words that are important for meaning if (this.fillerWords.has(cleanWord)) { // Keep if it's at the beginning, end, or followed by important words const next = words[index + 1]; const prev = words[index - 1]; if (index === 0 || index === words.length - 1) return true; if (next && this.isImportantWord(next)) return true; if (prev && this.isImportantWord(prev)) return true; return false; } return true; }); return filtered.join(" "); } aggressiveCompression(text) { let result = this.balancedCompression(text); // Remove more filler words aggressively const words = result.split(/\s+/); const filtered = words.filter((word) => { const cleanWord = word.toLowerCase().replace(/[^\w]/g, ""); return !this.fillerWords.has(cleanWord); }); // Convert to more compact format result = filtered.join(" "); // Additional aggressive shortcuts result = result .replace(/\bis\s+/gi, "=") .replace(/\bhas\s+/gi, ">") .replace(/\bwith\s+/gi, "+") .replace(/\band\s+/gi, "&") .replace(/\bthat\s+/gi, ":") .replace(/\bwhich\s+/gi, ":"); return result; } isImportantWord(word) { const clean = word.toLowerCase().replace(/[^\w]/g, ""); // Check if word appears to be technical, a name, or otherwise important return clean.length > 3 || /[A-Z]/.test(word) || /\d/.test(word); } /** * Extract keywords from text for indexing - Enhanced for technical content */ extractKeywords(text) { const words = text.toLowerCase().match(/\b[a-zA-Z]{2,}\b/g) || []; const technicalTerms = text.match(/\b[A-Z][a-zA-Z]*\b|\b[a-z]+[A-Z][a-zA-Z]*\b|\b[a-zA-Z]*\d+[a-zA-Z]*\b/g) || []; const wordCounts = {}; // Count word frequency with higher weight for technical terms words.forEach((word) => { if (!this.fillerWords.has(word) && word.length >= 2) { wordCounts[word] = (wordCounts[word] || 0) + 1; } }); // Add technical terms with higher weight technicalTerms.forEach((term) => { const lowerTerm = term.toLowerCase(); if (lowerTerm.length >= 2) { wordCounts[lowerTerm] = (wordCounts[lowerTerm] || 0) + 2; // Double weight for technical terms } }); // Also extract file paths, URLs, and technical patterns const patterns = [ /\/[a-zA-Z0-9._/-]+\.[a-zA-Z0-9]+/g, // File paths /https?:\/\/[a-zA-Z0-9.-]+[a-zA-Z0-9._/-]*/g, // URLs /@[a-zA-Z0-9/-]+/g, // NPM packages /[A-Z_][A-Z0-9_]+=\w+/g, // Environment variables /\b[a-zA-Z]+\([^)]*\)/g, // Function calls ]; patterns.forEach((pattern) => { const matches = text.match(pattern) || []; matches.forEach((match) => { const cleanMatch = match.toLowerCase().replace(/[^\w]/g, ""); if (cleanMatch.length >= 3) { wordCounts[cleanMatch] = (wordCounts[cleanMatch] || 0) + 3; // Triple weight for patterns } }); }); // Return top keywords by frequency, prioritizing technical content return Object.entries(wordCounts) .sort(([, a], [, b]) => b - a) .slice(0, 15) // Increased from 10 to 15 for better coverage .map(([word]) => word); } /** * Extract entities (capitalized words, technical terms) for indexing */ extractEntities(text) { const entities = new Set(); // Find capitalized words (potential proper nouns) const capitalizedWords = text.match(/\b[A-Z][a-zA-Z]{2,}\b/g) || []; capitalizedWords.forEach((word) => entities.add(word)); // Find technical terms (camelCase, PascalCase, snake_case) const technicalTerms = text.match(/\b[a-z]+[A-Z][a-zA-Z]*\b|\b[A-Z][a-z]*[A-Z][a-zA-Z]*\b|\b[a-z]+_[a-z]+\b/g) || []; technicalTerms.forEach((term) => entities.add(term)); // Find terms with numbers (versions, IDs, etc.) const numberedTerms = text.match(/\b[a-zA-Z]+\d+[a-zA-Z]*\b|\b[a-zA-Z]*\d+[a-zA-Z]+\b/g) || []; numberedTerms.forEach((term) => entities.add(term)); return Array.from(entities).slice(0, 15); // Limit to top 15 entities } /** * Calculate similarity score between two texts (for deduplication) */ calculateSimilarity(text1, text2) { const words1 = new Set(text1.toLowerCase().split(/\s+/)); const words2 = new Set(text2.toLowerCase().split(/\s+/)); const intersection = new Set([...words1].filter((x) => words2.has(x))); const union = new Set([...words1, ...words2]); return union.size > 0 ? intersection.size / union.size : 0; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PrismAero/agentic-memory-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server