Skip to main content
Glama
similar-memory-finder.ts15.5 kB
/** * Similar Memory Finder * * Multi-factor similarity calculation for finding similar memories. * Implements weighted composite scoring using: * - Keyword overlap (30% weight) - Jaccard similarity * - Tag similarity (25% weight) - Jaccard similarity * - Content similarity (20% weight) - Cosine similarity on embeddings * - Category matching (15% weight) - Binary match * - Temporal proximity (10% weight) - Exponential decay * * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5 */ import type { DatabaseConnectionManager } from "../database/connection-manager"; import { DEFAULT_SIMILARITY_CONFIG, SimilarityCalculationError, type SimilarityConfig, type SimilarMemory, } from "./types"; /** * Options for finding similar memories */ export interface FindSimilarOptions { /** Maximum number of results to return */ limit?: number; /** Minimum similarity threshold (0-1) */ minSimilarity?: number; /** Include detailed explanation for each result */ includeExplanation?: boolean; } /** * Breakdown of similarity calculation for debugging */ export interface SimilarityBreakdown { keywordSimilarity: number; tagSimilarity: number; contentSimilarity: number; categorySimilarity: number; temporalProximity: number; keywordWeight: number; tagWeight: number; contentWeight: number; categoryWeight: number; temporalWeight: number; compositeSimilarity: number; } /** * Similar Memory Finder * * Calculates multi-factor similarity between memories and finds similar memories. */ export class SimilarMemoryFinder { private readonly db: DatabaseConnectionManager; private readonly config: SimilarityConfig; private readonly cache: Map<string, number>; constructor(db: DatabaseConnectionManager, config?: Partial<SimilarityConfig>) { this.db = db; this.config = { ...DEFAULT_SIMILARITY_CONFIG, ...config }; this.cache = new Map(); } /** * Calculate keyword overlap using Jaccard similarity * Jaccard = |A ∩ B| / |A ∪ B| */ async calculateKeywordSimilarity(memoryId1: string, memoryId2: string): Promise<number> { const client = await this.db.getConnection(); try { const result = await client.query( `SELECT m1.keywords as keywords1, m2.keywords as keywords2 FROM memory_metadata m1 CROSS JOIN memory_metadata m2 WHERE m1.memory_id = $1 AND m2.memory_id = $2`, [memoryId1, memoryId2] ); if (result.rows.length === 0) { return 0; } const keywords1 = result.rows[0].keywords1 ?? []; const keywords2 = result.rows[0].keywords2 ?? []; return this.calculateJaccardSimilarity(keywords1, keywords2); } finally { this.db.releaseConnection(client); } } /** * Calculate tag similarity using Jaccard similarity */ async calculateTagSimilarity(memoryId1: string, memoryId2: string): Promise<number> { const client = await this.db.getConnection(); try { const result = await client.query( `SELECT m1.tags as tags1, m2.tags as tags2 FROM memory_metadata m1 CROSS JOIN memory_metadata m2 WHERE m1.memory_id = $1 AND m2.memory_id = $2`, [memoryId1, memoryId2] ); if (result.rows.length === 0) { return 0; } const tags1 = result.rows[0].tags1 ?? []; const tags2 = result.rows[0].tags2 ?? []; return this.calculateJaccardSimilarity(tags1, tags2); } finally { this.db.releaseConnection(client); } } /** * Calculate content similarity using cosine similarity on embeddings */ async calculateContentSimilarity( memoryId1: string, memoryId2: string, sector: string = "semantic" ): Promise<number> { const client = await this.db.getConnection(); try { const result = await client.query( `SELECT e1.embedding as embedding1, e2.embedding as embedding2 FROM memory_embeddings e1 CROSS JOIN memory_embeddings e2 WHERE e1.memory_id = $1 AND e1.sector = $3 AND e2.memory_id = $2 AND e2.sector = $3`, [memoryId1, memoryId2, sector] ); if (result.rows.length === 0) { // No embeddings available return 0; } const embedding1 = result.rows[0].embedding1; const embedding2 = result.rows[0].embedding2; // Handle null embeddings if (!embedding1 || !embedding2) { return 0; } return this.calculateCosineSimilarity(embedding1, embedding2); } finally { this.db.releaseConnection(client); } } /** * Calculate category match (binary: 1.0 if same, 0.0 if different) */ async calculateCategoryMatch(memoryId1: string, memoryId2: string): Promise<number> { const client = await this.db.getConnection(); try { const result = await client.query( `SELECT m1.category as category1, m2.category as category2 FROM memory_metadata m1 CROSS JOIN memory_metadata m2 WHERE m1.memory_id = $1 AND m2.memory_id = $2`, [memoryId1, memoryId2] ); if (result.rows.length === 0) { return 0; } const category1 = result.rows[0].category1; const category2 = result.rows[0].category2; // Handle empty/null categories if (!category1 || !category2 || category1.trim() === "" || category2.trim() === "") { return 0; } // Case-insensitive comparison return category1.toLowerCase() === category2.toLowerCase() ? 1.0 : 0.0; } finally { this.db.releaseConnection(client); } } /** * Alias for calculateCategoryMatch (for backward compatibility) */ async calculateCategorySimilarity(memoryId1: string, memoryId2: string): Promise<number> { return this.calculateCategoryMatch(memoryId1, memoryId2); } /** * Calculate temporal proximity using exponential decay * proximity = exp(-λ × |t1 - t2|) */ async calculateTemporalProximity(memoryId1: string, memoryId2: string): Promise<number> { const client = await this.db.getConnection(); try { const result = await client.query( `SELECT m1.created_at as created_at1, m2.created_at as created_at2 FROM memories m1 CROSS JOIN memories m2 WHERE m1.id = $1 AND m2.id = $2`, [memoryId1, memoryId2] ); if (result.rows.length === 0) { return 0; } const timestamp1 = result.rows[0].created_at1; const timestamp2 = result.rows[0].created_at2; // Handle null timestamps if (!timestamp1 || !timestamp2) { return 0; } const time1 = new Date(timestamp1).getTime(); const time2 = new Date(timestamp2).getTime(); // Handle invalid dates if (isNaN(time1) || isNaN(time2)) { return 0; } const timeDiffMs = Math.abs(time1 - time2); const timeDiffSeconds = timeDiffMs / 1000; // Exponential decay: exp(-λ × time_diff) // λ = 0.001 means half-life of ~693 seconds (~11.5 minutes) const proximity = Math.exp(-this.config.temporalDecayRate * timeDiffSeconds); return proximity; } finally { this.db.releaseConnection(client); } } /** * Calculate composite similarity score with all factors */ async calculateCompositeSimilarity(memoryId1: string, memoryId2: string): Promise<number> { // Check cache const cacheKey = `${memoryId1}:${memoryId2}`; if (this.config.enableCache && this.cache.has(cacheKey)) { const cachedValue = this.cache.get(cacheKey); if (cachedValue !== undefined) { return cachedValue; } } try { // Validate memory IDs exist await this.validateMemoryIds(memoryId1, memoryId2); // Calculate all factors in parallel const [keyword, tag, content, category, temporal] = await Promise.all([ this.calculateKeywordSimilarity(memoryId1, memoryId2), this.calculateTagSimilarity(memoryId1, memoryId2), this.calculateContentSimilarity(memoryId1, memoryId2), this.calculateCategoryMatch(memoryId1, memoryId2), this.calculateTemporalProximity(memoryId1, memoryId2), ]); // Apply weights: 30%, 25%, 20%, 15%, 10% const composite = keyword * this.config.keywordWeight + tag * this.config.tagWeight + content * this.config.contentWeight + category * this.config.categoryWeight + temporal * this.config.temporalWeight; // Cache result if (this.config.enableCache) { this.cache.set(cacheKey, composite); } return composite; } catch (error) { throw new SimilarityCalculationError( `Failed to calculate composite similarity between ${memoryId1} and ${memoryId2}`, memoryId1, memoryId2, error as Error ); } } /** * Validate memory IDs exist */ private async validateMemoryIds(memoryId1: string, memoryId2: string): Promise<void> { const client = await this.db.getConnection(); try { const result = await client.query( `SELECT COUNT(*) as count FROM memories WHERE id IN ($1, $2)`, [memoryId1, memoryId2] ); const count = parseInt(result.rows[0].count); if (count !== 2) { throw new Error(`One or both memory IDs not found: ${memoryId1}, ${memoryId2}`); } } finally { this.db.releaseConnection(client); } } /** * Get detailed breakdown of similarity calculation */ async getSimilarityBreakdown(memoryId1: string, memoryId2: string): Promise<SimilarityBreakdown> { const [keyword, tag, content, category, temporal] = await Promise.all([ this.calculateKeywordSimilarity(memoryId1, memoryId2), this.calculateTagSimilarity(memoryId1, memoryId2), this.calculateContentSimilarity(memoryId1, memoryId2), this.calculateCategoryMatch(memoryId1, memoryId2), this.calculateTemporalProximity(memoryId1, memoryId2), ]); const composite = keyword * this.config.keywordWeight + tag * this.config.tagWeight + content * this.config.contentWeight + category * this.config.categoryWeight + temporal * this.config.temporalWeight; return { keywordSimilarity: keyword, tagSimilarity: tag, contentSimilarity: content, categorySimilarity: category, temporalProximity: temporal, keywordWeight: this.config.keywordWeight, tagWeight: this.config.tagWeight, contentWeight: this.config.contentWeight, categoryWeight: this.config.categoryWeight, temporalWeight: this.config.temporalWeight, compositeSimilarity: composite, }; } /** * Find similar memories ranked by composite similarity score */ async findSimilar(queryMemoryId: string, options?: FindSimilarOptions): Promise<SimilarMemory[]> { const limit = options?.limit ?? this.config.defaultMaxResults; const minSimilarity = options?.minSimilarity ?? this.config.defaultThreshold; const includeExplanation = options?.includeExplanation ?? false; const client = await this.db.getConnection(); try { // Get all candidate memories (excluding query memory) const candidatesResult = await client.query( `SELECT m.id, m.content, m.created_at, m.salience, m.strength FROM memories m WHERE m.id != $1 LIMIT 1000`, [queryMemoryId] ); const candidates = candidatesResult.rows; // Calculate similarity for each candidate const similarityPromises = candidates.map(async (candidate) => { const similarity = await this.calculateCompositeSimilarity(queryMemoryId, candidate.id); if (similarity < minSimilarity) { return null; } let explanation = ""; if (includeExplanation) { const breakdown = await this.getSimilarityBreakdown(queryMemoryId, candidate.id); explanation = this.generateExplanation(breakdown); } return { memoryId: candidate.id, content: candidate.content, similarity: { overall: similarity, factors: { keywordOverlap: 0, // Will be filled if needed tagSimilarity: 0, contentSimilarity: 0, categoryMatch: 0, temporalProximity: 0, }, }, explanation, createdAt: candidate.created_at, salience: candidate.salience, strength: candidate.strength, } as SimilarMemory; }); const results = (await Promise.all(similarityPromises)).filter( (result): result is SimilarMemory => result !== null ); // Sort by similarity descending results.sort((a, b) => b.similarity.overall - a.similarity.overall); // Apply limit return results.slice(0, limit); } finally { this.db.releaseConnection(client); } } /** * Generate human-readable explanation of similarity */ private generateExplanation(breakdown: SimilarityBreakdown): string { const parts: string[] = []; // Always include all factors for transparency parts.push(`keyword: ${(breakdown.keywordSimilarity * 100).toFixed(1)}%`); parts.push(`tag: ${(breakdown.tagSimilarity * 100).toFixed(1)}%`); parts.push(`content: ${(breakdown.contentSimilarity * 100).toFixed(1)}%`); parts.push(`category: ${(breakdown.categorySimilarity * 100).toFixed(1)}%`); parts.push(`temporal: ${(breakdown.temporalProximity * 100).toFixed(1)}%`); const overall = (breakdown.compositeSimilarity * 100).toFixed(1); return `Overall similarity: ${overall}% (${parts.join(", ")})`; } /** * Calculate Jaccard similarity between two arrays * Jaccard = |A ∩ B| / |A ∪ B| */ private calculateJaccardSimilarity(arr1: string[], arr2: string[]): number { if (arr1.length === 0 && arr2.length === 0) { return 0; } if (arr1.length === 0 || arr2.length === 0) { return 0; } // Convert to lowercase sets for case-insensitive comparison const set1 = new Set(arr1.map((s) => s.toLowerCase())); const set2 = new Set(arr2.map((s) => s.toLowerCase())); // Calculate intersection const intersection = new Set([...set1].filter((x) => set2.has(x))); // Calculate union const union = new Set([...set1, ...set2]); return intersection.size / union.size; } /** * Calculate cosine similarity between two embedding vectors * cosine = (A · B) / (||A|| × ||B||) */ private calculateCosineSimilarity(embedding1: number[], embedding2: number[]): number { if (embedding1.length !== embedding2.length) { return 0; } // Calculate dot product let dotProduct = 0; for (let i = 0; i < embedding1.length; i++) { dotProduct += embedding1[i] * embedding2[i]; } // Calculate magnitudes let magnitude1 = 0; let magnitude2 = 0; for (let i = 0; i < embedding1.length; i++) { magnitude1 += embedding1[i] * embedding1[i]; magnitude2 += embedding2[i] * embedding2[i]; } magnitude1 = Math.sqrt(magnitude1); magnitude2 = Math.sqrt(magnitude2); // Avoid division by zero if (magnitude1 === 0 || magnitude2 === 0) { return 0; } return dotProduct / (magnitude1 * magnitude2); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server