Skip to main content
Glama
embedding-engine-optimization.test.ts8.26 kB
/** * Embedding Engine Optimization Tests * * Tests for performance optimizations: * - Request deduplication (prevent duplicate in-flight requests) * - Batch processing optimization * - Cache efficiency * * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5 */ import { beforeEach, describe, expect, it, vi } from "vitest"; import { EmbeddingCache } from "../../../embeddings/cache"; import { EmbeddingEngine } from "../../../embeddings/embedding-engine"; import type { MemoryContent, MemorySector } from "../../../embeddings/types"; import { MockOllamaEmbeddingModel } from "../../utils/mock-embeddings"; describe("EmbeddingEngine - Request Deduplication", () => { let engine: EmbeddingEngine; let model: MockOllamaEmbeddingModel; beforeEach(() => { model = new MockOllamaEmbeddingModel({ host: "http://localhost:11434", modelName: "nomic-embed-text", dimension: 768, }); const cache = new EmbeddingCache(10000, 3600000); engine = new EmbeddingEngine(model, cache); }); it("should deduplicate concurrent requests for same content", async () => { // Spy on model.generate to count actual calls const generateSpy = vi.spyOn(model, "generate"); const content = "Duplicate request test content"; // Make 5 concurrent requests for the same content const promises = Array.from({ length: 5 }, () => engine.generateSemanticEmbedding(content)); const results = await Promise.all(promises); // All results should be identical expect(results).toHaveLength(5); results.forEach((result) => { expect(result).toEqual(results[0]); }); // Model should only be called once (deduplication working) expect(generateSpy).toHaveBeenCalledTimes(1); }); it("should deduplicate concurrent requests across different sectors", async () => { const generateSpy = vi.spyOn(model, "generate"); const memory: MemoryContent = { text: "Multi-sector deduplication test", sector: "semantic" as MemorySector, context: { timestamp: new Date(), sessionId: "test-session", }, emotion: { valence: 0.5, arousal: 0.5, dominance: 0.0, }, insights: ["Test insight"], }; // Make 3 concurrent requests for all sectors const promises = Array.from({ length: 3 }, () => engine.generateAllSectorEmbeddings(memory)); const results = await Promise.all(promises); // All results should be identical expect(results).toHaveLength(3); results.forEach((result) => { expect(result).toEqual(results[0]); }); // Model should be called exactly 5 times (once per sector, not 15 times) // Note: Each sector has different augmented content, so 5 unique calls expected expect(generateSpy.mock.calls.length).toBeLessThanOrEqual(5); }); it("should not deduplicate requests for different content", async () => { const generateSpy = vi.spyOn(model, "generate"); // Make concurrent requests for different content const promises = [ engine.generateSemanticEmbedding("Content A"), engine.generateSemanticEmbedding("Content B"), engine.generateSemanticEmbedding("Content C"), ]; await Promise.all(promises); // Model should be called 3 times (no deduplication for different content) expect(generateSpy).toHaveBeenCalledTimes(3); }); it("should handle deduplication with cache misses and hits", async () => { const content = "Cache and deduplication test"; // First request (cache miss, should generate) await engine.generateSemanticEmbedding(content); const generateSpy = vi.spyOn(model, "generate"); // Make 5 concurrent requests (should all hit cache) const promises = Array.from({ length: 5 }, () => engine.generateSemanticEmbedding(content)); await Promise.all(promises); // Model should not be called (all cache hits) expect(generateSpy).not.toHaveBeenCalled(); }); }); describe("EmbeddingEngine - Batch Processing Optimization", () => { let engine: EmbeddingEngine; beforeEach(() => { const model = new MockOllamaEmbeddingModel({ host: "http://localhost:11434", modelName: "nomic-embed-text", dimension: 768, }); const cache = new EmbeddingCache(10000, 3600000); engine = new EmbeddingEngine(model, cache); }); it("should process large batches efficiently", async () => { // Create 100 unique memories const memories: MemoryContent[] = Array.from({ length: 100 }, (_, i) => ({ text: `Batch memory ${i} with unique content for testing`, sector: "semantic" as MemorySector, })); const startTime = performance.now(); const results = await engine.batchGenerateEmbeddings(memories); const duration = performance.now() - startTime; // Verify all embeddings generated expect(results).toHaveLength(100); results.forEach((result) => { expect(result.semantic).toBeDefined(); expect(result.semantic.length).toBe(768); }); // Should complete in reasonable time (50s = 500ms * 100) expect(duration).toBeLessThan(50000); }); it("should handle empty batch gracefully", async () => { const results = await engine.batchGenerateEmbeddings([]); expect(results).toEqual([]); }); it("should handle single item batch", async () => { const memories: MemoryContent[] = [ { text: "Single item", sector: "semantic" as MemorySector, }, ]; const results = await engine.batchGenerateEmbeddings(memories); expect(results).toHaveLength(1); expect(results[0].semantic).toBeDefined(); }); it("should benefit from caching in batch operations", async () => { // Create batch with some duplicate content const memories: MemoryContent[] = [ { text: "Unique content 1", sector: "semantic" as MemorySector }, { text: "Duplicate content", sector: "semantic" as MemorySector }, { text: "Unique content 2", sector: "semantic" as MemorySector }, { text: "Duplicate content", sector: "semantic" as MemorySector }, { text: "Duplicate content", sector: "semantic" as MemorySector }, ]; const results = await engine.batchGenerateEmbeddings(memories); // All results should be valid expect(results).toHaveLength(5); // Duplicate content should have identical embeddings expect(results[1].semantic).toEqual(results[3].semantic); expect(results[1].semantic).toEqual(results[4].semantic); }); }); describe("EmbeddingEngine - Cache Efficiency", () => { let engine: EmbeddingEngine; let cache: EmbeddingCache; beforeEach(() => { const model = new MockOllamaEmbeddingModel({ host: "http://localhost:11434", modelName: "nomic-embed-text", dimension: 768, }); cache = new EmbeddingCache(10000, 3600000); // 10k entries, 1 hour TTL engine = new EmbeddingEngine(model, cache); }); it("should use cache with 1 hour TTL", async () => { const content = "Cache TTL test"; // Generate embedding const embedding1 = await engine.generateSemanticEmbedding(content); // Immediately retrieve (should hit cache) const embedding2 = await engine.generateSemanticEmbedding(content); // Should be identical (from cache) expect(embedding1).toEqual(embedding2); }); it("should support 10,000 cache entries", async () => { // Generate 10,000 unique embeddings const promises = Array.from({ length: 10000 }, (_, i) => engine.generateSemanticEmbedding(`Cache capacity test ${i}`) ); await Promise.all(promises); // Cache should have 10,000 entries (or close to it due to LRU) expect(cache.size()).toBeGreaterThan(9000); expect(cache.size()).toBeLessThanOrEqual(10000); }); it("should evict oldest entries when cache is full", async () => { // Fill cache to capacity for (let i = 0; i < 10000; i++) { await engine.generateSemanticEmbedding(`Fill cache ${i}`); } const cacheSize = cache.size(); // Add one more entry await engine.generateSemanticEmbedding("Overflow entry"); // Cache size should remain at max expect(cache.size()).toBeLessThanOrEqual(10000); expect(cache.size()).toBeGreaterThanOrEqual(cacheSize); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server