Skip to main content
Glama
mock-embeddings.ts12.6 kB
/** * Mock Embedding Utilities * * Provides utilities for generating mock embeddings for testing: * - Deterministic embedding generation using cached Ollama embeddings when available * - Fallback to hash-based generation when cache miss * - Similarity control * - Fast generation for tests * - No external API calls * * The mock embeddings system supports two modes: * 1. Cached mode: Uses pre-computed embeddings from real Ollama server * 2. Generated mode: Falls back to deterministic hash-based generation * * To capture real Ollama embeddings for the cache: * npm run test:capture-embeddings */ import { getCachedEmbedding, hasCachedEmbedding } from "./ollama-embeddings-cache.js"; /** * Generate a deterministic mock embedding * * @param dimension - Embedding dimension * @param seed - Seed for deterministic generation * @returns Mock embedding vector */ export function generateMockEmbedding(dimension: number = 1536, seed: number = 0): number[] { const embedding: number[] = []; let currentSeed = seed; for (let i = 0; i < dimension; i++) { // Simple deterministic pseudo-random number generator currentSeed = (currentSeed * 9301 + 49297) % 233280; const value = currentSeed / 233280; embedding.push(value); } // Normalize to unit vector return normalizeVector(embedding); } /** * Generate a mock embedding similar to a reference embedding * * @param reference - Reference embedding * @param similarity - Target similarity (0-1) * @returns Similar mock embedding */ export function generateSimilarEmbedding(reference: number[], similarity: number = 0.9): number[] { if (similarity < 0 || similarity > 1) { throw new Error("Similarity must be between 0 and 1"); } const dimension = reference.length; const similar: number[] = []; // Generate similar vector by mixing reference with random noise for (let i = 0; i < dimension; i++) { const noise = (Math.random() - 0.5) * 2; const value = reference[i] * similarity + noise * (1 - similarity); similar.push(value); } return normalizeVector(similar); } /** * Generate mock embeddings for all sectors * * @param dimension - Embedding dimension * @param seed - Seed for deterministic generation * @returns Mock sector embeddings */ export function generateMockSectorEmbeddings( dimension: number = 1536, seed: number = 0 ): { episodic: number[]; semantic: number[]; procedural: number[]; emotional: number[]; reflective: number[]; } { return { episodic: generateMockEmbedding(dimension, seed), semantic: generateMockEmbedding(dimension, seed + 1), procedural: generateMockEmbedding(dimension, seed + 2), emotional: generateMockEmbedding(dimension, seed + 3), reflective: generateMockEmbedding(dimension, seed + 4), }; } /** * Calculate cosine similarity between two embeddings * * @param a - First embedding * @param b - Second embedding * @returns Cosine similarity (-1 to 1) */ export function calculateCosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) { throw new Error("Embeddings must have same dimension"); } let dotProduct = 0; let magnitudeA = 0; let magnitudeB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; magnitudeA += a[i] * a[i]; magnitudeB += b[i] * b[i]; } magnitudeA = Math.sqrt(magnitudeA); magnitudeB = Math.sqrt(magnitudeB); if (magnitudeA === 0 || magnitudeB === 0) { return 0; } return dotProduct / (magnitudeA * magnitudeB); } /** * Normalize a vector to unit length * * @param vector - Vector to normalize * @returns Normalized vector */ export function normalizeVector(vector: number[]): number[] { let magnitude = 0; for (const value of vector) { magnitude += value * value; } magnitude = Math.sqrt(magnitude); if (magnitude === 0) { return vector.map(() => 0); } return vector.map((value) => value / magnitude); } /** * Generate a batch of mock embeddings * * @param count - Number of embeddings to generate * @param dimension - Embedding dimension * @param baseSeed - Base seed for generation * @returns Array of mock embeddings */ export function generateMockEmbeddingBatch( count: number, dimension: number = 1536, baseSeed: number = 0 ): number[][] { return Array.from({ length: count }, (_, i) => generateMockEmbedding(dimension, baseSeed + i)); } /** * Generate mock embeddings with controlled similarity distribution * * @param reference - Reference embedding * @param count - Number of embeddings to generate * @param minSimilarity - Minimum similarity * @param maxSimilarity - Maximum similarity * @returns Array of embeddings with controlled similarity */ export function generateSimilarityDistribution( reference: number[], count: number, minSimilarity: number = 0.5, maxSimilarity: number = 0.95 ): number[][] { const embeddings: number[][] = []; for (let i = 0; i < count; i++) { const similarity = minSimilarity + (maxSimilarity - minSimilarity) * (i / (count - 1)); embeddings.push(generateSimilarEmbedding(reference, similarity)); } return embeddings; } /** * Mock embedding engine for testing */ export class MockEmbeddingEngine { private dimension: number; private cache: Map<string, number[]>; constructor(dimension: number = 1536) { this.dimension = dimension; this.cache = new Map(); } /** * Generate embedding for text * * @param text - Input text * @returns Mock embedding */ async generateEmbedding(text: string): Promise<number[]> { // Check cache const cached = this.cache.get(text); if (cached !== undefined) { return cached; } // Generate deterministic embedding based on text hash const seed = this.hashString(text); const embedding = generateMockEmbedding(this.dimension, seed); // Cache result this.cache.set(text, embedding); return embedding; } /** * Generate embeddings for all sectors * * @param text - Input text * @returns Mock sector embeddings */ async generateSectorEmbeddings(text: string): Promise<{ episodic: number[]; semantic: number[]; procedural: number[]; emotional: number[]; reflective: number[]; }> { const seed = this.hashString(text); return generateMockSectorEmbeddings(this.dimension, seed); } /** * Generate batch of embeddings * * @param texts - Input texts * @returns Array of mock embeddings */ async generateBatch(texts: string[]): Promise<number[][]> { return Promise.all(texts.map((text) => this.generateEmbedding(text))); } /** * Clear cache */ clearCache(): void { this.cache.clear(); } /** * Get cache size */ getCacheSize(): number { return this.cache.size; } /** * Simple string hash function */ private hashString(str: string): number { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash); } } /** * Create a mock embedding engine for testing * * @param dimension - Embedding dimension * @returns Mock embedding engine */ export function createMockEmbeddingEngine(dimension: number = 1536): MockEmbeddingEngine { return new MockEmbeddingEngine(dimension); } /** * Mock Ollama Embedding Model for testing * * Provides a drop-in replacement for OllamaEmbeddingModel that: * - Uses cached real Ollama embeddings when available for consistency * - Falls back to deterministic hash-based generation for cache misses * - Does not require external Ollama service * - Supports all OllamaEmbeddingModel configuration options * - Returns normalized unit vectors */ export class MockOllamaEmbeddingModel { private readonly modelName: string; private readonly dimension: number; private readonly useCachedEmbeddings: boolean; private cache: Map<string, number[]>; private cacheHits: number = 0; private cacheMisses: number = 0; constructor(config: { host: string; modelName: string; dimension: number; timeout?: number; maxRetries?: number; useCachedEmbeddings?: boolean; }) { // Accept all config options for compatibility, but only store what we need this.modelName = config.modelName; this.dimension = config.dimension; this.useCachedEmbeddings = config.useCachedEmbeddings ?? true; this.cache = new Map(); } /** * Generate embedding vector for text * Uses cached Ollama embeddings when available, falls back to hash-based generation */ async generate(text: string): Promise<number[]> { // Validate input (same as real implementation) if (!text || typeof text !== "string") { throw new Error("Text must be a non-empty string"); } if (text.trim().length === 0) { throw new Error("Text cannot be empty or whitespace only"); } if (text.length > 100000) { throw new Error("Text exceeds maximum length of 100,000 characters"); } // Check local cache first const localCached = this.cache.get(text); if (localCached !== undefined) { return localCached; } let embedding: number[]; // Try to get from pre-computed Ollama cache if (this.useCachedEmbeddings && hasCachedEmbedding(text)) { const cachedEmbedding = getCachedEmbedding(text); if (cachedEmbedding && cachedEmbedding.length === this.dimension) { embedding = cachedEmbedding; this.cacheHits++; } else { // Dimension mismatch or invalid cache, fall back to generated embedding = this.generateDeterministicEmbedding(text); this.cacheMisses++; } } else { // No cached embedding, generate deterministically embedding = this.generateDeterministicEmbedding(text); this.cacheMisses++; } // Store in local cache this.cache.set(text, embedding); return embedding; } /** * Generate deterministic embedding based on text hash * Used as fallback when no cached Ollama embedding is available */ private generateDeterministicEmbedding(text: string): number[] { const seed = this.hashString(text); return generateMockEmbedding(this.dimension, seed); } /** * Get embedding dimension for this model */ getDimension(): number { return this.dimension; } /** * Get model name/identifier */ getModelName(): string { return this.modelName; } /** * Clear cache (useful for testing) */ clearCache(): void { this.cache.clear(); this.cacheHits = 0; this.cacheMisses = 0; } /** * Get cache size (useful for testing) */ getCacheSize(): number { return this.cache.size; } /** * Get cache statistics */ getCacheStats(): { hits: number; misses: number; hitRate: number } { const total = this.cacheHits + this.cacheMisses; return { hits: this.cacheHits, misses: this.cacheMisses, hitRate: total > 0 ? this.cacheHits / total : 0, }; } /** * Simple string hash function * Same implementation as MockEmbeddingEngine for consistency */ private hashString(str: string): number { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash); } } /** * Create a mock Ollama embedding model for testing * * @param config - Configuration matching OllamaEmbeddingModel * @returns Mock Ollama embedding model */ export function createMockOllamaModel(config: { host: string; modelName: string; dimension: number; timeout?: number; maxRetries?: number; useCachedEmbeddings?: boolean; }): MockOllamaEmbeddingModel { return new MockOllamaEmbeddingModel(config); } // Re-export utilities from related modules for convenience export { COMMON_TEST_STRINGS, getCachedEmbedding, getCachedTexts, getCacheMetadata, hasCachedEmbedding, } from "./ollama-embeddings-cache"; export { checkOllamaAvailability, clearOllamaStatusCache, DEFAULT_OLLAMA_HOST, getCachedOllamaStatus, getEmbeddingModelConfig, isModelAvailable, shouldUseRealOllama, type OllamaStatus, } from "./ollama-detector"; export { createHybridEmbeddingModel, createTestEmbeddingModel, getIntegrationTestModel, getUnitTestModel, type EmbeddingModel, type HybridEmbeddingConfig, type HybridModelResult, } from "./hybrid-embedding-model";

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server