Skip to main content
Glama
similar-memory-finder.test.ts74.9 kB
/** * Similar Memory Finder Tests * * Tests for multi-factor similarity calculation including: * - Keyword overlap (Jaccard similarity) * - Tag similarity * - Content similarity (cosine) * - Category matching * - Temporal proximity * - Composite scoring * * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5 */ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { DatabaseConnectionManager } from "../../../database/connection-manager"; import { SimilarMemoryFinder } from "../../../search/similar-memory-finder"; import { createTestMemory, createTestMemoryMetadata } from "../../utils/test-fixtures"; describe("SimilarMemoryFinder - Keyword Overlap Calculation", () => { let finder: SimilarMemoryFinder; let mockDb: DatabaseConnectionManager; let mockClient: { query: ReturnType<typeof vi.fn>; release: ReturnType<typeof vi.fn>; }; beforeEach(() => { // Create mock database client mockClient = { query: vi.fn(), release: vi.fn(), }; // Create mock database connection manager mockDb = { getConnection: vi.fn().mockResolvedValue(mockClient), releaseConnection: vi.fn(), } as unknown as DatabaseConnectionManager; // Create finder instance with caching disabled for predictable test behavior finder = new SimilarMemoryFinder(mockDb, { enableCache: false }); }); afterEach(() => { vi.clearAllMocks(); }); describe("calculateKeywordSimilarity", () => { it("should return 1.0 for identical keywords", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "algorithms"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "algorithms"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(1.0); expect(mockDb.getConnection).toHaveBeenCalledOnce(); expect(mockDb.releaseConnection).toHaveBeenCalledOnce(); }); it("should return 0.0 for no keyword overlap", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "algorithms"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["cooking", "recipes", "food"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(0.0); }); it("should calculate correct Jaccard similarity for partial overlap", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "algorithms", "neural"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "data", "science"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert // Intersection: {machine, learning} = 2 // Union: {machine, learning, algorithms, neural, data, science} = 6 // Jaccard = 2/6 = 0.333... expect(similarity).toBeCloseTo(0.333, 2); }); it("should return 0.0 when first memory has empty keywords", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: [], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(0.0); }); it("should return 0.0 when second memory has empty keywords", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: [], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(0.0); }); it("should return 0.0 when both memories have empty keywords", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: [], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: [], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(0.0); }); it("should handle single keyword match correctly", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: ["machine"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["machine"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(1.0); }); it("should be case-insensitive", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: ["Machine", "Learning", "ALGORITHMS"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "algorithms"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(1.0); }); it("should handle realistic keyword sets (3-10 keywords)", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ keywords: [ "artificial", "intelligence", "machine", "learning", "neural", "networks", "deep", ], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ keywords: ["machine", "learning", "algorithms", "data", "science", "statistics"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: memory1.metadata.keywords, keywords2: memory2.metadata.keywords, }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity(memory1.id, memory2.id); // Assert // Intersection: {machine, learning} = 2 // Union: {artificial, intelligence, machine, learning, neural, networks, deep, algorithms, data, science, statistics} = 11 // Jaccard = 2/11 = 0.181... expect(similarity).toBeCloseTo(0.182, 2); }); it("should return 0.0 when query returns no rows", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [], }); // Act const similarity = await finder.calculateKeywordSimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(0.0); }); it("should handle null keywords gracefully", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { keywords1: null, keywords2: ["machine", "learning"], }, ], }); // Act const similarity = await finder.calculateKeywordSimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(0.0); }); it("should release connection even if query fails", async () => { // Arrange mockClient.query.mockRejectedValueOnce(new Error("Database error")); // Act & Assert await expect(finder.calculateKeywordSimilarity("mem1", "mem2")).rejects.toThrow(); expect(mockDb.releaseConnection).toHaveBeenCalledOnce(); }); }); describe("calculateTagSimilarity", () => { it("should return 1.0 for identical tags", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ tags: ["ai", "ml", "research"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ tags: ["ai", "ml", "research"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: memory1.metadata.tags, tags2: memory2.metadata.tags, }, ], }); // Act const similarity = await finder.calculateTagSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(1.0); }); it("should return 0.0 for no tag overlap", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ tags: ["ai", "ml", "research"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ tags: ["cooking", "recipes", "food"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: memory1.metadata.tags, tags2: memory2.metadata.tags, }, ], }); // Act const similarity = await finder.calculateTagSimilarity(memory1.id, memory2.id); // Assert expect(similarity).toBe(0.0); }); it("should calculate correct Jaccard similarity for partial tag overlap", async () => { // Arrange const memory1 = createTestMemory({ id: "mem1", metadata: createTestMemoryMetadata({ tags: ["ai", "ml", "neural", "deep"], }), }); const memory2 = createTestMemory({ id: "mem2", metadata: createTestMemoryMetadata({ tags: ["ai", "ml", "data", "science"], }), }); mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: memory1.metadata.tags, tags2: memory2.metadata.tags, }, ], }); // Act const similarity = await finder.calculateTagSimilarity(memory1.id, memory2.id); // Assert // Intersection: {ai, ml} = 2 // Union: {ai, ml, neural, deep, data, science} = 6 // Jaccard = 2/6 = 0.333... expect(similarity).toBeCloseTo(0.333, 2); }); it("should return 0.0 when first memory has empty tags", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: [], tags2: ["ai", "ml"], }, ], }); // Act const similarity = await finder.calculateTagSimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(0.0); }); it("should return 0.0 when both memories have empty tags", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: [], tags2: [], }, ], }); // Act const similarity = await finder.calculateTagSimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(0.0); }); it("should be case-insensitive for tags", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: ["AI", "ML", "Research"], tags2: ["ai", "ml", "research"], }, ], }); // Act const similarity = await finder.calculateTagSimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(1.0); }); it("should handle null tags gracefully", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { tags1: null, tags2: ["ai", "ml"], }, ], }); // Act const similarity = await finder.calculateTagSimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(0.0); }); }); describe("calculateContentSimilarity", () => { it("should return 1.0 for identical embeddings", async () => { // Arrange const embedding = [0.5, 0.5, 0.5, 0.5]; mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1: embedding, embedding2: embedding, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBe(1.0); }); it("should return 0.0 for orthogonal embeddings", async () => { // Arrange const embedding1 = [1.0, 0.0, 0.0, 0.0]; const embedding2 = [0.0, 1.0, 0.0, 0.0]; mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1, embedding2, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBe(0.0); }); it("should calculate correct cosine similarity for partial overlap", async () => { // Arrange const embedding1 = [1.0, 0.0, 0.0, 0.0]; const embedding2 = [0.5, 0.5, 0.0, 0.0]; mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1, embedding2, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert // Cosine similarity = dot(v1, v2) / (||v1|| * ||v2||) // dot = 1.0 * 0.5 = 0.5 // ||v1|| = 1.0, ||v2|| = sqrt(0.25 + 0.25) = 0.707... // cosine = 0.5 / (1.0 * 0.707) = 0.707... expect(similarity).toBeCloseTo(0.707, 2); }); it("should return 0.0 when first embedding is missing", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1: null, embedding2: [0.5, 0.5, 0.5, 0.5], }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBe(0.0); }); it("should return 0.0 when second embedding is missing", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1: [0.5, 0.5, 0.5, 0.5], embedding2: null, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBe(0.0); }); it("should return 0.0 when both embeddings are missing", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1: null, embedding2: null, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBe(0.0); }); it("should handle high-dimensional embeddings (1536 dimensions)", async () => { // Arrange const embedding1 = Array(1536).fill(0.5); const embedding2 = Array(1536).fill(0.5); mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1, embedding2, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBeCloseTo(1.0, 5); }); it("should work with different memory sectors", async () => { // Arrange const embedding = [0.5, 0.5, 0.5, 0.5]; mockClient.query.mockResolvedValueOnce({ rows: [ { embedding1: embedding, embedding2: embedding, }, ], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "episodic"); // Assert expect(similarity).toBe(1.0); }); it("should return 0.0 when query returns no rows", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [], }); // Act const similarity = await finder.calculateContentSimilarity("mem1", "mem2", "semantic"); // Assert expect(similarity).toBe(0.0); }); }); describe("calculateCategoryMatch", () => { it("should return 1.0 for matching categories", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: "technical", category2: "technical", }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(1.0); }); it("should return 0.0 for different categories", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: "technical", category2: "personal", }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(0.0); }); it("should be case-insensitive", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: "Technical", category2: "technical", }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(1.0); }); it("should return 0.0 when first category is null", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: null, category2: "technical", }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(0.0); }); it("should return 0.0 when second category is null", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: "technical", category2: null, }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(0.0); }); it("should return 0.0 when both categories are null", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: null, category2: null, }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(0.0); }); it("should handle empty string categories", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: "", category2: "", }, ], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(0.0); }); it("should return 0.0 when query returns no rows", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [], }); // Act const match = await finder.calculateCategoryMatch("mem1", "mem2"); // Assert expect(match).toBe(0.0); }); }); describe("calculateTemporalProximity", () => { it("should return 1.0 for identical timestamps", async () => { // Arrange const timestamp = new Date("2024-01-01T00:00:00Z"); mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: timestamp, created_at2: timestamp, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert expect(proximity).toBe(1.0); }); it("should calculate exponential decay for time difference", async () => { // Arrange const time1 = new Date("2024-01-01T00:00:00Z"); const time2 = new Date("2024-01-02T00:00:00Z"); // 1 day = 86400 seconds mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: time1, created_at2: time2, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert // exp(-0.001 × 86400) = exp(-86.4) ≈ 0 expect(proximity).toBeCloseTo(0, 5); }); it("should use lambda = 0.001 for decay rate", async () => { // Arrange const time1 = new Date("2024-01-01T00:00:00Z"); const time2 = new Date("2024-01-01T00:01:00Z"); // 60 seconds mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: time1, created_at2: time2, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert // exp(-0.001 × 60) = exp(-0.06) ≈ 0.9418 expect(proximity).toBeCloseTo(0.9418, 3); }); it("should handle time difference in either direction", async () => { // Arrange const time1 = new Date("2024-01-02T00:00:00Z"); const time2 = new Date("2024-01-01T00:00:00Z"); // Earlier mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: time1, created_at2: time2, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert // Should use absolute difference expect(proximity).toBeCloseTo(0, 5); }); it("should return high proximity for recent memories (1 hour)", async () => { // Arrange const time1 = new Date("2024-01-01T00:00:00Z"); const time2 = new Date("2024-01-01T01:00:00Z"); // 3600 seconds mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: time1, created_at2: time2, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert // exp(-0.001 × 3600) = exp(-3.6) ≈ 0.0273 expect(proximity).toBeCloseTo(0.0273, 3); }); it("should return low proximity for distant memories (1 year)", async () => { // Arrange const time1 = new Date("2024-01-01T00:00:00Z"); const time2 = new Date("2025-01-01T00:00:00Z"); // 365 days mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: time1, created_at2: time2, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert // exp(-0.001 × 31536000) ≈ 0 (very small) expect(proximity).toBeCloseTo(0, 10); }); it("should return 0.0 when first timestamp is null", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: null, created_at2: new Date("2024-01-01T00:00:00Z"), }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert expect(proximity).toBe(0.0); }); it("should return 0.0 when second timestamp is null", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: null, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert expect(proximity).toBe(0.0); }); it("should return 0.0 when query returns no rows", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert expect(proximity).toBe(0.0); }); it("should handle millisecond precision", async () => { // Arrange const time1 = new Date("2024-01-01T00:00:00.000Z"); const time2 = new Date("2024-01-01T00:00:00.500Z"); // 500ms mockClient.query.mockResolvedValueOnce({ rows: [ { created_at1: time1, created_at2: time2, }, ], }); // Act const proximity = await finder.calculateTemporalProximity("mem1", "mem2"); // Assert // exp(-0.001 × 0.5) = exp(-0.0005) ≈ 0.9995 expect(proximity).toBeCloseTo(0.9995, 4); }); }); describe("calculateCompositeSimilarity", () => { it("should calculate composite similarity with all factors", async () => { // Arrange mockClient.query // validateMemoryIds .mockResolvedValueOnce({ rows: [{ count: "2" }], }) // calculateKeywordSimilarity .mockResolvedValueOnce({ rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }], }) // calculateTagSimilarity .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }], }) // calculateContentSimilarity .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0, 0], embedding2: [1, 0, 0] }], }) // calculateCategoryMatch .mockResolvedValueOnce({ rows: [{ category1: "technical", category2: "technical" }], }) // calculateTemporalProximity .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: new Date("2024-01-01T00:00:00Z"), }, ], }); // Act const similarity = await finder.calculateCompositeSimilarity("mem1", "mem2"); // Assert // All factors = 1.0 // Composite = 0.3×1.0 + 0.25×1.0 + 0.2×1.0 + 0.15×1.0 + 0.1×1.0 = 1.0 expect(similarity).toBe(1.0); }); it("should apply correct weights to factors", async () => { // Arrange mockClient.query // validateMemoryIds .mockResolvedValueOnce({ rows: [{ count: "2" }], }) // calculateKeywordSimilarity = 0.8 .mockResolvedValueOnce({ rows: [{ keywords1: ["ai", "ml", "data"], keywords2: ["ai", "ml", "science"] }], }) // calculateTagSimilarity = 0.5 .mockResolvedValueOnce({ rows: [{ tags1: ["tech", "ai"], tags2: ["tech", "ml"] }], }) // calculateContentSimilarity = 0.7 .mockResolvedValueOnce({ rows: [{ embedding1: [0.7, 0.3], embedding2: [0.8, 0.2] }], }) // calculateCategoryMatch = 1.0 .mockResolvedValueOnce({ rows: [{ category1: "technical", category2: "technical" }], }) // calculateTemporalProximity = 0.9 .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: new Date("2024-01-01T00:01:00Z"), }, ], }); // Act const similarity = await finder.calculateCompositeSimilarity("mem1", "mem2"); // Assert // Weights: keyword=0.3, tag=0.25, content=0.2, category=0.15, temporal=0.1 // Composite = 0.3×keyword + 0.25×tag + 0.2×content + 0.15×category + 0.1×temporal expect(similarity).toBeGreaterThan(0); expect(similarity).toBeLessThanOrEqual(1.0); }); it("should cache results when caching is enabled", async () => { // Arrange - Create a separate finder with caching enabled const cachedFinder = new SimilarMemoryFinder(mockDb, { enableCache: true }); mockClient.query // validateMemoryIds .mockResolvedValueOnce({ rows: [{ count: "2" }], }) // calculateKeywordSimilarity .mockResolvedValueOnce({ rows: [{ keywords1: ["ai"], keywords2: ["ai"] }], }) // calculateTagSimilarity .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }], }) // calculateContentSimilarity .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0], embedding2: [1, 0] }], }) // calculateCategoryMatch .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "tech" }], }) // calculateTemporalProximity .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: new Date("2024-01-01T00:00:00Z"), }, ], }); // Act - First call const similarity1 = await cachedFinder.calculateCompositeSimilarity("mem1", "mem2"); // Act - Second call (should use cache) const similarity2 = await cachedFinder.calculateCompositeSimilarity("mem1", "mem2"); // Assert expect(similarity1).toBe(similarity2); // Should only call validateMemoryIds once (first time) expect(mockClient.query).toHaveBeenCalledTimes(6); // Not 12 }); it("should throw SimilarityCalculationError when memory validation fails", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [{ count: "1" }], // Only one memory found }); // Act & Assert await expect(finder.calculateCompositeSimilarity("mem1", "invalid")).rejects.toThrow( "Failed to calculate composite similarity" ); }); it("should throw SimilarityCalculationError when calculation fails", async () => { // Arrange mockClient.query // validateMemoryIds succeeds .mockResolvedValueOnce({ rows: [{ count: "2" }], }) // calculateKeywordSimilarity fails .mockRejectedValueOnce(new Error("Database error")); // Act & Assert await expect(finder.calculateCompositeSimilarity("mem1", "mem2")).rejects.toThrow( "Failed to calculate composite similarity" ); }); }); describe("getSimilarityBreakdown", () => { it("should return detailed breakdown of all factors", async () => { // Arrange mockClient.query // calculateKeywordSimilarity .mockResolvedValueOnce({ rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }], }) // calculateTagSimilarity .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }], }) // calculateContentSimilarity .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0, 0], embedding2: [1, 0, 0] }], }) // calculateCategoryMatch .mockResolvedValueOnce({ rows: [{ category1: "technical", category2: "technical" }], }) // calculateTemporalProximity .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: new Date("2024-01-01T00:00:00Z"), }, ], }); // Act const breakdown = await finder.getSimilarityBreakdown("mem1", "mem2"); // Assert expect(breakdown).toHaveProperty("keywordSimilarity"); expect(breakdown).toHaveProperty("tagSimilarity"); expect(breakdown).toHaveProperty("contentSimilarity"); expect(breakdown).toHaveProperty("categorySimilarity"); expect(breakdown).toHaveProperty("temporalProximity"); expect(breakdown).toHaveProperty("keywordWeight"); expect(breakdown).toHaveProperty("tagWeight"); expect(breakdown).toHaveProperty("contentWeight"); expect(breakdown).toHaveProperty("categoryWeight"); expect(breakdown).toHaveProperty("temporalWeight"); expect(breakdown).toHaveProperty("compositeSimilarity"); }); it("should include correct weight values", async () => { // Arrange mockClient.query .mockResolvedValueOnce({ rows: [{ keywords1: ["ai"], keywords2: ["ai"] }], }) .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }], }) .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0], embedding2: [1, 0] }], }) .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "tech" }], }) .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: new Date("2024-01-01T00:00:00Z"), }, ], }); // Act const breakdown = await finder.getSimilarityBreakdown("mem1", "mem2"); // Assert expect(breakdown.keywordWeight).toBe(0.3); expect(breakdown.tagWeight).toBe(0.25); expect(breakdown.contentWeight).toBe(0.2); expect(breakdown.categoryWeight).toBe(0.15); expect(breakdown.temporalWeight).toBe(0.1); }); it("should calculate composite similarity correctly", async () => { // Arrange mockClient.query .mockResolvedValueOnce({ rows: [{ keywords1: ["ai"], keywords2: ["ai"] }], }) .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }], }) .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0], embedding2: [1, 0] }], }) .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "tech" }], }) .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01T00:00:00Z"), created_at2: new Date("2024-01-01T00:00:00Z"), }, ], }); // Act const breakdown = await finder.getSimilarityBreakdown("mem1", "mem2"); // Assert const expectedComposite = breakdown.keywordSimilarity * 0.3 + breakdown.tagSimilarity * 0.25 + breakdown.contentSimilarity * 0.2 + breakdown.categorySimilarity * 0.15 + breakdown.temporalProximity * 0.1; expect(breakdown.compositeSimilarity).toBeCloseTo(expectedComposite, 5); }); }); describe("findSimilar", () => { it("should find similar memories and rank by similarity", async () => { // Arrange const timestamp = new Date("2024-01-01"); // Use a more flexible mocking approach for parallel execution mockClient.query.mockImplementation(async (sql: string) => { // Get candidates query if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content 2", created_at: timestamp, salience: 0.9, strength: 0.9, }, ], }; } // Validation query if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // Keywords query if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }; } // Tags query if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } // Embeddings query if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } // Category query if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } // Timestamp query if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1"); // Assert expect(results).toHaveLength(1); expect(results[0].memoryId).toBe("mem2"); expect(results[0].similarity.overall).toBeGreaterThan(0); }); it("should find similar memories with threshold 0.5", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content 2", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { minSimilarity: 0.5 }); // Assert expect(results).toHaveLength(1); expect(results[0].similarity.overall).toBeGreaterThanOrEqual(0.5); }); it("should find similar memories with threshold 0.7", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content 2", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { minSimilarity: 0.7 }); // Assert expect(results).toHaveLength(1); expect(results[0].similarity.overall).toBeGreaterThanOrEqual(0.7); }); it("should find similar memories with threshold 0.9", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content 2", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { minSimilarity: 0.9 }); // Assert expect(results).toHaveLength(1); expect(results[0].similarity.overall).toBeGreaterThanOrEqual(0.9); }); it("should rank memories by composite similarity score in descending order", async () => { // Arrange const timestamp = new Date("2024-01-01"); let callCount = 0; mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "High similarity", created_at: timestamp, salience: 0.9, strength: 0.9, }, { id: "mem3", content: "Medium similarity", created_at: timestamp, salience: 0.7, strength: 0.8, }, { id: "mem4", content: "Low similarity", created_at: timestamp, salience: 0.5, strength: 0.6, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // Return different similarity scores for each memory if (sql.includes("keywords")) { callCount++; if (callCount === 1) { // mem2: high similarity return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }] }; } else if (callCount === 2) { // mem3: medium similarity return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "data"] }] }; } else { // mem4: low similarity return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["cooking"] }] }; } } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { minSimilarity: 0.0 }); // Assert expect(results.length).toBeGreaterThan(1); // Verify descending order for (let i = 0; i < results.length - 1; i++) { expect(results[i].similarity.overall).toBeGreaterThanOrEqual( results[i + 1].similarity.overall ); } }); it("should return top 5 results when limit is 5", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: Array.from({ length: 10 }, (_, i) => ({ id: `mem${i + 2}`, content: `Content ${i + 2}`, created_at: timestamp, salience: 0.8, strength: 0.9, })), }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { limit: 5 }); // Assert expect(results).toHaveLength(5); }); it("should return top 10 results when limit is 10", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: Array.from({ length: 20 }, (_, i) => ({ id: `mem${i + 2}`, content: `Content ${i + 2}`, created_at: timestamp, salience: 0.8, strength: 0.9, })), }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { limit: 10 }); // Assert expect(results).toHaveLength(10); }); it("should return empty array when no memories meet threshold", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content 2", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // Return very low similarity scores if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["cooking", "recipes"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["food"] }] }; } if (sql.includes("embedding")) { return { rows: [] }; // No embeddings } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "personal" }] }; } if (sql.includes("created_at")) { return { rows: [ { created_at1: new Date("2024-01-01"), created_at2: new Date("2025-01-01"), }, ], }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { minSimilarity: 0.8 }); // Assert expect(results).toHaveLength(0); }); it("should handle large memory set (100+ memories) efficiently", async () => { // Arrange const timestamp = new Date("2024-01-01"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { // Return 150 candidate memories return { rows: Array.from({ length: 150 }, (_, i) => ({ id: `mem${i + 2}`, content: `Content ${i + 2}`, created_at: timestamp, salience: 0.8, strength: 0.9, })), }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const startTime = Date.now(); const results = await finder.findSimilar("mem1", { limit: 10 }); const duration = Date.now() - startTime; // Assert expect(results).toHaveLength(10); // Performance target: should complete in reasonable time (<1000ms for 150 memories) expect(duration).toBeLessThan(1000); }); it("should exclude query memory from results", async () => { // Arrange mockClient.query // Get candidates .mockResolvedValueOnce({ rows: [ { id: "mem2", content: "Content 2", created_at: new Date("2024-01-01"), salience: 0.8, strength: 0.9, }, ], }) // validateMemoryIds .mockResolvedValueOnce({ rows: [{ count: "2" }] }) // Calculate similarity for mem2 (all factors) .mockResolvedValueOnce({ rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }) .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }] }) .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }) .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "tech" }] }) .mockResolvedValueOnce({ rows: [{ created_at1: new Date(), created_at2: new Date() }], }); // Act const results = await finder.findSimilar("mem1"); // Assert expect(mockClient.query).toHaveBeenCalledWith(expect.stringContaining("WHERE m.id != $1"), [ "mem1", ]); expect(results).toHaveLength(1); expect(results[0].memoryId).toBe("mem2"); }); it("should filter by minimum similarity threshold", async () => { // Arrange mockClient.query // Get candidates .mockResolvedValueOnce({ rows: [ { id: "mem2", content: "Content 2", created_at: new Date("2024-01-01"), salience: 0.8, strength: 0.9, }, ], }) // validateMemoryIds .mockResolvedValueOnce({ rows: [{ count: "2" }] }) // Calculate low similarity (below threshold) .mockResolvedValueOnce({ rows: [{ keywords1: [], keywords2: ["ai"] }] }) .mockResolvedValueOnce({ rows: [{ tags1: [], tags2: ["tech"] }] }) .mockResolvedValueOnce({ rows: [] }) // No embeddings .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "science" }] }) .mockResolvedValueOnce({ rows: [ { created_at1: new Date("2024-01-01"), created_at2: new Date("2025-01-01"), }, ], }); // Act const results = await finder.findSimilar("mem1", { minSimilarity: 0.5 }); // Assert expect(results).toHaveLength(0); // Filtered out due to low similarity }); it("should apply limit to results", async () => { // Arrange const timestamp = new Date(); // Use flexible mocking for parallel execution mockClient.query.mockImplementation(async (sql: string) => { // Get candidates query if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "C2", created_at: timestamp, salience: 0.8, strength: 0.9 }, { id: "mem3", content: "C3", created_at: timestamp, salience: 0.7, strength: 0.8 }, { id: "mem4", content: "C4", created_at: timestamp, salience: 0.6, strength: 0.7 }, ], }; } // Validation query if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // Keywords query if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }; } // Tags query if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } // Embeddings query if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } // Category query if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } // Timestamp query if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { limit: 2 }); // Assert expect(results).toHaveLength(2); }); it("should include explanation when requested", async () => { // Arrange mockClient.query // Get candidates .mockResolvedValueOnce({ rows: [ { id: "mem2", content: "Content 2", created_at: new Date("2024-01-01"), salience: 0.8, strength: 0.9, }, ], }) // validateMemoryIds .mockResolvedValueOnce({ rows: [{ count: "2" }] }) // Calculate similarity .mockResolvedValueOnce({ rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }) .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }] }) .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }) .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "tech" }] }) .mockResolvedValueOnce({ rows: [{ created_at1: new Date(), created_at2: new Date() }], }) // getSimilarityBreakdown .mockResolvedValueOnce({ rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }) .mockResolvedValueOnce({ rows: [{ tags1: ["tech"], tags2: ["tech"] }] }) .mockResolvedValueOnce({ rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }) .mockResolvedValueOnce({ rows: [{ category1: "tech", category2: "tech" }] }) .mockResolvedValueOnce({ rows: [{ created_at1: new Date(), created_at2: new Date() }], }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true }); // Assert expect(results).toHaveLength(1); expect(results[0].explanation).toBeTruthy(); expect(results[0].explanation).toContain("Overall similarity:"); }); it("should return empty array when no candidates found", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [], }); // Act const results = await finder.findSimilar("mem1"); // Assert expect(results).toHaveLength(0); }); it("should handle database errors gracefully", async () => { // Arrange mockClient.query.mockRejectedValueOnce(new Error("Database error")); // Act & Assert await expect(finder.findSimilar("mem1")).rejects.toThrow(); expect(mockDb.releaseConnection).toHaveBeenCalled(); }); }); describe("calculateCategorySimilarity (alias)", () => { it("should be an alias for calculateCategoryMatch", async () => { // Arrange mockClient.query.mockResolvedValueOnce({ rows: [ { category1: "technical", category2: "technical", }, ], }); // Act const similarity = await finder.calculateCategorySimilarity("mem1", "mem2"); // Assert expect(similarity).toBe(1.0); }); }); describe("Similarity Explanation Generation", () => { it("should generate explanation for high similarity (>0.8)", async () => { // Arrange - High similarity across all factors const timestamp = new Date("2024-01-01T00:00:00Z"); mockClient.query.mockImplementation(async (sql: string) => { // Get candidates query if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Very similar content", created_at: timestamp, salience: 0.9, strength: 0.9, }, ], }; } // Validation query if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // High keyword similarity if (sql.includes("keywords")) { return { rows: [ { keywords1: ["ai", "ml", "neural", "networks"], keywords2: ["ai", "ml", "neural", "networks"], }, ], }; } // High tag similarity if (sql.includes("tags")) { return { rows: [{ tags1: ["tech", "ai", "research"], tags2: ["tech", "ai", "research"] }], }; } // High content similarity if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0, 0, 0], embedding2: [1, 0, 0, 0] }] }; } // Category match if (sql.includes("category")) { return { rows: [{ category1: "technical", category2: "technical" }] }; } // Same timestamp (high temporal proximity) if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true }); // Assert expect(results).toHaveLength(1); expect(results[0].explanation).toBeTruthy(); expect(results[0].explanation).toContain("Overall similarity:"); expect(results[0].explanation).toContain("keyword:"); expect(results[0].explanation).toContain("tag:"); expect(results[0].explanation).toContain("content:"); expect(results[0].explanation).toContain("category:"); expect(results[0].explanation).toContain("temporal:"); // Verify high similarity is reflected in explanation const overallMatch = results[0].explanation.match(/Overall similarity: ([\d.]+)%/); expect(overallMatch).toBeTruthy(); if (overallMatch) { const overallSimilarity = parseFloat(overallMatch[1]); expect(overallSimilarity).toBeGreaterThan(80); } }); it("should generate explanation for low similarity (<0.3)", async () => { // Arrange - Low similarity across all factors const timestamp1 = new Date("2024-01-01T00:00:00Z"); const timestamp2 = new Date("2025-01-01T00:00:00Z"); // 1 year apart mockClient.query.mockImplementation(async (sql: string) => { // Get candidates query if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Very different content", created_at: timestamp2, salience: 0.5, strength: 0.6, }, ], }; } // Validation query if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // Low keyword similarity if (sql.includes("keywords")) { return { rows: [ { keywords1: ["ai", "ml", "neural"], keywords2: ["cooking", "recipes", "food"], }, ], }; } // Low tag similarity if (sql.includes("tags")) { return { rows: [{ tags1: ["tech", "ai"], tags2: ["lifestyle", "food"] }] }; } // No content similarity (no embeddings) if (sql.includes("embedding")) { return { rows: [] }; } // Different categories if (sql.includes("category")) { return { rows: [{ category1: "technical", category2: "personal" }] }; } // Different timestamps (low temporal proximity) if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp1, created_at2: timestamp2 }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true, minSimilarity: 0.0, // Allow low similarity results }); // Assert expect(results).toHaveLength(1); expect(results[0].explanation).toBeTruthy(); expect(results[0].explanation).toContain("Overall similarity:"); // Verify low similarity is reflected in explanation const overallMatch = results[0].explanation.match(/Overall similarity: ([\d.]+)%/); expect(overallMatch).toBeTruthy(); if (overallMatch) { const overallSimilarity = parseFloat(overallMatch[1]); expect(overallSimilarity).toBeLessThan(30); } }); it("should include all 5 factors with scores in explanation", async () => { // Arrange const timestamp = new Date("2024-01-01T00:00:00Z"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "data"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech", "science"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [0.7, 0.3], embedding2: [0.8, 0.2] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "technical", category2: "technical" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true }); // Assert expect(results).toHaveLength(1); const explanation = results[0].explanation; // Verify all 5 factors are present expect(explanation).toContain("keyword:"); expect(explanation).toContain("tag:"); expect(explanation).toContain("content:"); expect(explanation).toContain("category:"); expect(explanation).toContain("temporal:"); // Verify each factor has a percentage value expect(explanation).toMatch(/keyword: \d+\.\d+%/); expect(explanation).toMatch(/tag: \d+\.\d+%/); expect(explanation).toMatch(/content: \d+\.\d+%/); expect(explanation).toMatch(/category: \d+\.\d+%/); expect(explanation).toMatch(/temporal: \d+\.\d+%/); }); it("should format explanation as human-readable text", async () => { // Arrange const timestamp = new Date("2024-01-01T00:00:00Z"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["ai"] }] }; } if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["tech"] }] }; } if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [1, 0] }] }; } if (sql.includes("category")) { return { rows: [{ category1: "tech", category2: "tech" }] }; } if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true }); // Assert expect(results).toHaveLength(1); const explanation = results[0].explanation; // Verify format: "Overall similarity: X% (factor1: Y%, factor2: Z%, ...)" expect(explanation).toMatch(/^Overall similarity: \d+\.\d+% \(/); expect(explanation).toContain(", "); expect(explanation).toContain(")"); // Verify no technical jargon or confusing terms expect(explanation).not.toContain("Jaccard"); expect(explanation).not.toContain("cosine"); expect(explanation).not.toContain("exponential"); expect(explanation).not.toContain("lambda"); }); it("should show contributions from each factor clearly", async () => { // Arrange - Mixed similarity scores const timestamp = new Date("2024-01-01T00:00:00Z"); mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content", created_at: timestamp, salience: 0.8, strength: 0.9, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // High keyword similarity if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai", "ml"], keywords2: ["ai", "ml"] }] }; } // Medium tag similarity if (sql.includes("tags")) { return { rows: [{ tags1: ["tech", "ai"], tags2: ["tech", "science"] }] }; } // Low content similarity if (sql.includes("embedding")) { return { rows: [{ embedding1: [1, 0], embedding2: [0.5, 0.5] }] }; } // No category match if (sql.includes("category")) { return { rows: [{ category1: "technical", category2: "personal" }] }; } // High temporal proximity if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp, created_at2: timestamp }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true }); // Assert expect(results).toHaveLength(1); const explanation = results[0].explanation; // Extract individual factor scores const keywordMatch = explanation.match(/keyword: ([\d.]+)%/); const tagMatch = explanation.match(/tag: ([\d.]+)%/); const contentMatch = explanation.match(/content: ([\d.]+)%/); const categoryMatch = explanation.match(/category: ([\d.]+)%/); const temporalMatch = explanation.match(/temporal: ([\d.]+)%/); expect(keywordMatch).toBeTruthy(); expect(tagMatch).toBeTruthy(); expect(contentMatch).toBeTruthy(); expect(categoryMatch).toBeTruthy(); expect(temporalMatch).toBeTruthy(); // Verify scores are in expected ranges if (keywordMatch) { const keywordScore = parseFloat(keywordMatch[1]); expect(keywordScore).toBeGreaterThan(80); // High } if (categoryMatch) { const categoryScore = parseFloat(categoryMatch[1]); expect(categoryScore).toBe(0); // No match } }); it("should handle zero similarity gracefully in explanation", async () => { // Arrange - All factors at zero const timestamp1 = new Date("2024-01-01T00:00:00Z"); const timestamp2 = new Date("2030-01-01T00:00:00Z"); // Far future mockClient.query.mockImplementation(async (sql: string) => { if (sql.includes("WHERE m.id != $1")) { return { rows: [ { id: "mem2", content: "Content", created_at: timestamp2, salience: 0.5, strength: 0.6, }, ], }; } if (sql.includes("COUNT(*) as count FROM memories WHERE id IN")) { return { rows: [{ count: "2" }] }; } // No keyword overlap if (sql.includes("keywords")) { return { rows: [{ keywords1: ["ai"], keywords2: ["cooking"] }] }; } // No tag overlap if (sql.includes("tags")) { return { rows: [{ tags1: ["tech"], tags2: ["food"] }] }; } // No embeddings if (sql.includes("embedding")) { return { rows: [] }; } // Different categories if (sql.includes("category")) { return { rows: [{ category1: "technical", category2: "personal" }] }; } // Very different timestamps if (sql.includes("created_at")) { return { rows: [{ created_at1: timestamp1, created_at2: timestamp2 }] }; } return { rows: [] }; }); // Act const results = await finder.findSimilar("mem1", { includeExplanation: true, minSimilarity: 0.0, }); // Assert expect(results).toHaveLength(1); const explanation = results[0].explanation; // Verify explanation still includes all factors even at zero expect(explanation).toContain("keyword: 0.0%"); expect(explanation).toContain("tag: 0.0%"); expect(explanation).toContain("content: 0.0%"); expect(explanation).toContain("category: 0.0%"); expect(explanation).toMatch(/temporal: [0-9.]+%/); // Near zero but not exactly // Verify overall similarity is very low const overallMatch = explanation.match(/Overall similarity: ([\d.]+)%/); expect(overallMatch).toBeTruthy(); if (overallMatch) { const overallSimilarity = parseFloat(overallMatch[1]); expect(overallSimilarity).toBeLessThan(5); } }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server