Skip to main content
Glama
vector-search-engine.test.ts17.5 kB
/** * Unit Tests for VectorSearchEngine * * Tests vector similarity search using pgvector with sector-specific embeddings. * Follows TDD principles: tests written first, then implementation. * * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5 */ import { beforeEach, describe, expect, it, vi } from "vitest"; import type { DatabaseConnectionManager } from "../../../database/connection-manager"; import { MemorySector } from "../../../embeddings/types"; import { VectorSearchEngine } from "../../../search/vector-search-engine"; describe("VectorSearchEngine", () => { let engine: VectorSearchEngine; let mockDb: DatabaseConnectionManager; let mockPool: { query: ReturnType<typeof vi.fn>; }; beforeEach(() => { // Create mock pool mockPool = { query: vi.fn(), }; // Create mock database manager mockDb = { pool: mockPool, getConnection: vi.fn(), releaseConnection: vi.fn(), } as unknown as DatabaseConnectionManager; engine = new VectorSearchEngine(mockDb); }); describe("searchByEmbedding", () => { it("should search by embedding in specific sector", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; mockPool.query.mockResolvedValue({ rows: [ { memory_id: "mem1", sector: "semantic", similarity: 0.95, }, { memory_id: "mem2", sector: "semantic", similarity: 0.85, }, ], }); const results = await engine.searchByEmbedding(queryEmbedding, sector, 10); expect(results).toHaveLength(2); expect(results[0].memoryId).toBe("mem1"); expect(results[0].similarity).toBe(0.95); expect(results[1].memoryId).toBe("mem2"); expect(results[1].similarity).toBe(0.85); // Verify query uses pgvector cosine similarity operator expect(mockPool.query).toHaveBeenCalledWith( expect.stringContaining("<=>"), expect.arrayContaining([JSON.stringify(queryEmbedding), sector, 10]) ); }); it("should apply minimum similarity threshold", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Episodic; const threshold = 0.7; mockPool.query.mockResolvedValue({ rows: [ { memory_id: "mem1", sector: "episodic", similarity: 0.85, }, ], }); await engine.searchByEmbedding(queryEmbedding, sector, 10, threshold); // Verify threshold is applied in query expect(mockPool.query).toHaveBeenCalledWith( expect.stringContaining("1 - (embedding <=> $1::vector) >= $3"), expect.arrayContaining([JSON.stringify(queryEmbedding), sector, threshold, 10]) ); }); it("should respect result limit", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Procedural; const limit = 5; mockPool.query.mockResolvedValue({ rows: [], }); await engine.searchByEmbedding(queryEmbedding, sector, limit); // Verify LIMIT clause expect(mockPool.query).toHaveBeenCalledWith( expect.stringContaining("LIMIT $4"), expect.arrayContaining([JSON.stringify(queryEmbedding), sector, 0.0, limit]) ); }); it("should return empty array when no results", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Emotional; mockPool.query.mockResolvedValue({ rows: [], }); const results = await engine.searchByEmbedding(queryEmbedding, sector, 10); expect(results).toEqual([]); }); it("should throw error for invalid embedding", async () => { const invalidEmbedding: number[] = []; const sector = MemorySector.Semantic; await expect(engine.searchByEmbedding(invalidEmbedding, sector, 10)).rejects.toThrow( "Embedding must be a non-empty array" ); }); it("should throw error for invalid limit", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; await expect(engine.searchByEmbedding(queryEmbedding, sector, 0)).rejects.toThrow( "Limit must be at least 1" ); await expect(engine.searchByEmbedding(queryEmbedding, sector, -5)).rejects.toThrow( "Limit must be at least 1" ); }); it("should throw error for invalid threshold", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; await expect(engine.searchByEmbedding(queryEmbedding, sector, 10, -0.1)).rejects.toThrow( "Threshold must be between 0 and 1" ); await expect(engine.searchByEmbedding(queryEmbedding, sector, 10, 1.5)).rejects.toThrow( "Threshold must be between 0 and 1" ); }); it("should handle database errors gracefully", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; mockPool.query.mockRejectedValue(new Error("Database connection failed")); await expect(engine.searchByEmbedding(queryEmbedding, sector, 10)).rejects.toThrow( "Vector search failed" ); }); }); describe("searchByMemoryId", () => { it("should search using a memory's embedding", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; // Mock retrieving the memory's embedding mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: JSON.stringify(new Array(1536).fill(0.2)), }, ], }); // Mock similarity search results mockPool.query.mockResolvedValueOnce({ rows: [ { memory_id: "mem456", sector: "semantic", similarity: 0.92, }, ], }); const results = await engine.searchByMemoryId(memoryId, sector, 10); expect(results).toHaveLength(1); expect(results[0].memoryId).toBe("mem456"); expect(results[0].similarity).toBe(0.92); // Verify two queries: one to get embedding, one to search expect(mockPool.query).toHaveBeenCalledTimes(2); }); it("should throw error when memory has no embedding", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; mockPool.query.mockResolvedValue({ rows: [], }); await expect(engine.searchByMemoryId(memoryId, sector, 10)).rejects.toThrow( "No embedding found for memory" ); }); it("should throw error for invalid memory ID", async () => { const sector = MemorySector.Semantic; await expect(engine.searchByMemoryId("", sector, 10)).rejects.toThrow( "Memory ID must be a non-empty string" ); }); it("should exclude the source memory from results", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: JSON.stringify(new Array(1536).fill(0.2)), }, ], }); mockPool.query.mockResolvedValueOnce({ rows: [ { memory_id: "mem456", sector: "semantic", similarity: 0.92, }, ], }); await engine.searchByMemoryId(memoryId, sector, 10); // Verify query excludes source memory expect(mockPool.query).toHaveBeenCalledWith( expect.stringContaining("memory_id != $5"), expect.arrayContaining([memoryId]) ); }); }); describe("sector-specific search", () => { it("should search in episodic sector", async () => { const queryEmbedding = new Array(1536).fill(0.1); mockPool.query.mockResolvedValue({ rows: [ { memory_id: "mem1", sector: "episodic", similarity: 0.88, }, ], }); const results = await engine.searchByEmbedding(queryEmbedding, MemorySector.Episodic, 10); expect(results[0].sector).toBe(MemorySector.Episodic); }); it("should search in procedural sector", async () => { const queryEmbedding = new Array(1536).fill(0.1); mockPool.query.mockResolvedValue({ rows: [ { memory_id: "mem1", sector: "procedural", similarity: 0.82, }, ], }); const results = await engine.searchByEmbedding(queryEmbedding, MemorySector.Procedural, 10); expect(results[0].sector).toBe(MemorySector.Procedural); }); it("should search in emotional sector", async () => { const queryEmbedding = new Array(1536).fill(0.1); mockPool.query.mockResolvedValue({ rows: [ { memory_id: "mem1", sector: "emotional", similarity: 0.79, }, ], }); const results = await engine.searchByEmbedding(queryEmbedding, MemorySector.Emotional, 10); expect(results[0].sector).toBe(MemorySector.Emotional); }); it("should search in reflective sector", async () => { const queryEmbedding = new Array(1536).fill(0.1); mockPool.query.mockResolvedValue({ rows: [ { memory_id: "mem1", sector: "reflective", similarity: 0.91, }, ], }); const results = await engine.searchByEmbedding(queryEmbedding, MemorySector.Reflective, 10); expect(results[0].sector).toBe(MemorySector.Reflective); }); }); describe("performance and optimization", () => { it("should use IVFFlat index for vector search", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; mockPool.query.mockResolvedValue({ rows: [], }); await engine.searchByEmbedding(queryEmbedding, sector, 10); // Verify query structure supports index usage const call = mockPool.query.mock.calls[0]; const sql = call[0] as string; // Should use cosine distance operator which uses IVFFlat index expect(sql).toContain("<=> $1::vector"); // Should order by distance (which uses index) expect(sql).toContain("ORDER BY embedding <=> $1::vector"); }); it("should handle large result sets efficiently", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; const limit = 1000; const largeResultSet = Array.from({ length: 1000 }, (_, i) => ({ memory_id: `mem${i}`, sector: "semantic", similarity: 0.9 - i * 0.0001, })); mockPool.query.mockResolvedValue({ rows: largeResultSet, }); const results = await engine.searchByEmbedding(queryEmbedding, sector, limit); expect(results).toHaveLength(1000); // Results should be ordered by similarity (descending) expect(results[0].similarity).toBeGreaterThan(results[999].similarity); }); }); describe("batch search operations", () => { it("should batch search by multiple embeddings", async () => { const queryEmbeddings = [ new Array(1536).fill(0.1), new Array(1536).fill(0.2), new Array(1536).fill(0.3), ]; const sector = MemorySector.Semantic; // Mock responses for each query mockPool.query .mockResolvedValueOnce({ rows: [ { memory_id: "mem1", sector: "semantic", similarity: 0.95 }, { memory_id: "mem2", sector: "semantic", similarity: 0.85 }, ], }) .mockResolvedValueOnce({ rows: [{ memory_id: "mem3", sector: "semantic", similarity: 0.9 }], }) .mockResolvedValueOnce({ rows: [{ memory_id: "mem4", sector: "semantic", similarity: 0.88 }], }); const results = await engine.batchSearchByEmbedding(queryEmbeddings, sector, 10); expect(results.size).toBe(3); expect(results.get(0)).toHaveLength(2); expect(results.get(1)).toHaveLength(1); expect(results.get(2)).toHaveLength(1); expect(mockPool.query).toHaveBeenCalledTimes(3); }); it("should throw error for empty embeddings array", async () => { const sector = MemorySector.Semantic; await expect(engine.batchSearchByEmbedding([], sector, 10)).rejects.toThrow( "Query embeddings must be a non-empty array" ); }); it("should validate all embeddings in batch", async () => { const queryEmbeddings = [new Array(1536).fill(0.1), []]; // Second embedding is invalid const sector = MemorySector.Semantic; await expect(engine.batchSearchByEmbedding(queryEmbeddings, sector, 10)).rejects.toThrow( "Embedding must be a non-empty array" ); }); it("should handle batch search errors gracefully", async () => { const queryEmbeddings = [new Array(1536).fill(0.1), new Array(1536).fill(0.2)]; const sector = MemorySector.Semantic; mockPool.query.mockRejectedValue(new Error("Database error")); await expect(engine.batchSearchByEmbedding(queryEmbeddings, sector, 10)).rejects.toThrow( "Batch vector search failed" ); }); }); describe("error handling", () => { it("should throw error when database is not connected", async () => { const disconnectedDb = { pool: null, } as unknown as DatabaseConnectionManager; const disconnectedEngine = new VectorSearchEngine(disconnectedDb); const queryEmbedding = new Array(1536).fill(0.1); await expect( disconnectedEngine.searchByEmbedding(queryEmbedding, MemorySector.Semantic, 10) ).rejects.toThrow("Database not connected"); }); it("should provide helpful error messages", async () => { const queryEmbedding = new Array(1536).fill(0.1); const sector = MemorySector.Semantic; mockPool.query.mockRejectedValue(new Error("Connection timeout")); await expect(engine.searchByEmbedding(queryEmbedding, sector, 10)).rejects.toThrow( "Vector search failed: Connection timeout" ); }); it("should handle non-Error objects in searchByMemoryId", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; // Mock retrieving the memory's embedding mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: JSON.stringify(new Array(1536).fill(0.2)), }, ], }); // Mock non-Error rejection mockPool.query.mockRejectedValueOnce("string error"); await expect(engine.searchByMemoryId(memoryId, sector, 10)).rejects.toThrow( "Vector search by memory ID failed: string error" ); }); }); describe("parseEmbedding", () => { it("should parse embedding from array format", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; const embeddingArray = new Array(1536).fill(0.3); // Mock retrieving the memory's embedding as array (not stringified) mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: embeddingArray, // Array format, not string }, ], }); // Mock similarity search results mockPool.query.mockResolvedValueOnce({ rows: [ { memory_id: "mem456", sector: "semantic", similarity: 0.88, }, ], }); const results = await engine.searchByMemoryId(memoryId, sector, 10); expect(results).toHaveLength(1); expect(results[0].memoryId).toBe("mem456"); }); it("should throw error for invalid embedding format", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; // Mock retrieving the memory's embedding with invalid format mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: { invalid: "format" }, // Object format (invalid) }, ], }); await expect(engine.searchByMemoryId(memoryId, sector, 10)).rejects.toThrow( "Invalid embedding format" ); }); it("should throw error for null embedding", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; // Mock retrieving the memory's embedding as null mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: null, // Null format (invalid) }, ], }); await expect(engine.searchByMemoryId(memoryId, sector, 10)).rejects.toThrow( "Invalid embedding format" ); }); it("should throw error for number embedding", async () => { const memoryId = "mem123"; const sector = MemorySector.Semantic; // Mock retrieving the memory's embedding as number mockPool.query.mockResolvedValueOnce({ rows: [ { embedding: 12345, // Number format (invalid) }, ], }); await expect(engine.searchByMemoryId(memoryId, sector, 10)).rejects.toThrow( "Invalid embedding format" ); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server