Skip to main content
Glama
embedding-generation.perf.test.ts5.11 kB
/** * Embedding Generation Performance Tests * * Validates that embedding generation meets performance targets: * - <500ms for generating all 5 sector embeddings * - Batch operations scale efficiently * - Cache hit rates are high for repeated content * * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5 */ import { beforeEach, describe, expect, it } from "vitest"; import { EmbeddingCache } from "../../embeddings/cache"; import { EmbeddingEngine } from "../../embeddings/embedding-engine"; import type { MemoryContent, MemorySector } from "../../embeddings/types"; import { PerformanceMonitor } from "../../utils/performance-monitor"; import { MockOllamaEmbeddingModel } from "../utils/mock-embeddings"; describe("Embedding Generation Performance", () => { let engine: EmbeddingEngine; let monitor: PerformanceMonitor; beforeEach(() => { const model = new MockOllamaEmbeddingModel({ host: "http://localhost:11434", modelName: "nomic-embed-text", dimension: 768, }); const cache = new EmbeddingCache(10000, 3600000); // 10k entries, 1 hour TTL engine = new EmbeddingEngine(model, cache); monitor = new PerformanceMonitor(); }); it("should generate all 5 sector embeddings in <500ms", async () => { // Requirement: Embedding generation <500ms for 5 sectors const memory: MemoryContent = { text: "Performance test memory with sufficient content to generate realistic embeddings", sector: "episodic" as MemorySector, context: { timestamp: new Date(), sessionId: "perf-test-1", }, emotion: { valence: 0.5, arousal: 0.5, dominance: 0.0, }, insights: ["Performance matters"], }; const timer = monitor.startTimer("generate-all-sectors", "embedding"); await engine.generateAllSectorEmbeddings(memory); const duration = monitor.endTimer(timer); expect(duration).toBeLessThan(500); }); it("should benefit from caching on repeated content", async () => { // First generation (cache miss) const memory: MemoryContent = { text: "Repeated content for cache testing", sector: "semantic" as MemorySector, }; const timer1 = monitor.startTimer("first-generation", "embedding"); await engine.generateAllSectorEmbeddings(memory); const duration1 = monitor.endTimer(timer1); // Second generation (cache hit) const timer2 = monitor.startTimer("cached-generation", "embedding"); await engine.generateAllSectorEmbeddings(memory); const duration2 = monitor.endTimer(timer2); // Cached generation should be significantly faster expect(duration2).toBeLessThan(duration1 * 0.5); }); it("should handle batch generation efficiently", async () => { // Generate 10 memories in batch const memories: MemoryContent[] = Array.from({ length: 10 }, (_, i) => ({ text: `Batch memory ${i} with unique content`, sector: "semantic" as MemorySector, })); const timer = monitor.startTimer("batch-generation", "embedding"); await engine.batchGenerateEmbeddings(memories); const duration = monitor.endTimer(timer); // Batch of 10 should complete in reasonable time (5s = 500ms * 10) expect(duration).toBeLessThan(5000); }); it("should scale linearly with batch size", async () => { // Small batch const smallBatch: MemoryContent[] = Array.from({ length: 5 }, (_, i) => ({ text: `Small batch memory ${i}`, sector: "semantic" as MemorySector, })); const timer1 = monitor.startTimer("small-batch", "embedding"); await engine.batchGenerateEmbeddings(smallBatch); const duration1 = monitor.endTimer(timer1); // Large batch (2x size) const largeBatch: MemoryContent[] = Array.from({ length: 10 }, (_, i) => ({ text: `Large batch memory ${i}`, sector: "semantic" as MemorySector, })); const timer2 = monitor.startTimer("large-batch", "embedding"); await engine.batchGenerateEmbeddings(largeBatch); const duration2 = monitor.endTimer(timer2); // Should scale roughly linearly (allow 2.5x for overhead) expect(duration2).toBeLessThan(duration1 * 2.5); }); it("should maintain performance under load", async () => { // Generate 50 embeddings sequentially const durations: number[] = []; for (let i = 0; i < 50; i++) { const memory: MemoryContent = { text: `Load test memory ${i}`, sector: "semantic" as MemorySector, }; const timer = monitor.startTimer(`load-test-${i}`, "embedding"); await engine.generateAllSectorEmbeddings(memory); const duration = monitor.endTimer(timer); durations.push(duration); } // Calculate percentiles const sorted = durations.sort((a, b) => a - b); const p50 = sorted[Math.floor(sorted.length * 0.5)]; const p95 = sorted[Math.floor(sorted.length * 0.95)]; const p99 = sorted[Math.floor(sorted.length * 0.99)]; // All percentiles should meet target expect(p50).toBeLessThan(500); expect(p95).toBeLessThan(500); expect(p99).toBeLessThan(500); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server