M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

Overview Schema Related Servers Score Discussions

Mimir
src
indexing

EmbeddingsService.ts•33.3 kB

/** * @file src/indexing/EmbeddingsService.ts * @description Vector embeddings service supporting multiple providers * * Supports: * - Ollama (local models like nomic-embed-text) * - OpenAI/Copilot (text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large) * * Feature flag controlled via LLM configuration. */ import { LLMConfigLoader } from '../config/LLMConfigLoader.js'; import { createSecureFetchOptions } from '../utils/fetch-helper.js'; /** * Sanitize text for embedding API by removing/replacing invalid Unicode * * Handles: * - Lone surrogate code units (U+D800-U+DFFF) that cause JSON parse errors * - Null bytes and other problematic control characters * - Preserves valid emojis, CJK characters, and all proper Unicode * * The embedding API error "surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF" * occurs when lone surrogate pairs exist in text (often from corrupted files or * improper string handling). * * @param text - Text to sanitize * @returns Sanitized text safe for JSON serialization and embedding APIs */ export function sanitizeTextForEmbedding(text: string): string { // Fast path: if no potential issues, return as-is // Check for control chars (0x00-0x08, 0x0B, 0x0E-0x1F) or surrogate range (0xD800-0xDFFF) let needsSanitization = false; for (let i = 0; i < Math.min(text.length, 1000); i++) { const code = text.charCodeAt(i); if ((code >= 0x00 && code <= 0x08) || code === 0x0B || (code >= 0x0E && code <= 0x1F) || (code >= 0xD800 && code <= 0xDFFF)) { needsSanitization = true; break; } } // If sample looks clean, do a full check only if the text is short if (!needsSanitization && text.length <= 1000) { return text; } // For longer texts or those with issues, do full sanitization const result: string[] = []; for (let i = 0; i < text.length; i++) { const code = text.charCodeAt(i); // Skip problematic control characters (keep tab, newline, carriage return, form feed) if ((code >= 0x00 && code <= 0x08) || code === 0x0B || (code >= 0x0E && code <= 0x1F)) { result.push(' '); // Replace with space continue; } // Handle surrogate pairs if (code >= 0xD800 && code <= 0xDBFF) { // High surrogate - check if followed by low surrogate const nextCode = i + 1 < text.length ? text.charCodeAt(i + 1) : 0; if (nextCode >= 0xDC00 && nextCode <= 0xDFFF) { // Valid surrogate pair - keep both result.push(text[i], text[i + 1]); i++; // Skip the low surrogate } else { // Lone high surrogate - replace with Unicode replacement character result.push('\uFFFD'); } continue; } if (code >= 0xDC00 && code <= 0xDFFF) { // Lone low surrogate (without preceding high surrogate) - replace result.push('\uFFFD'); continue; } // All other characters are safe result.push(text[i]); } return result.join(''); } export interface EmbeddingResult { embedding: number[]; dimensions: number; model: string; } export interface ChunkEmbeddingResult { text: string; embedding: number[]; dimensions: number; model: string; startOffset: number; endOffset: number; chunkIndex: number; } export interface TextChunk { text: string; startOffset: number; endOffset: number; } // Metadata interface for file context enrichment export interface FileMetadata { name: string; relativePath: string; language: string; extension: string; directory?: string; sizeBytes?: number; } /** * Format file metadata as natural language for embedding * This enriches content with contextual information about the file itself * enabling semantic search to match on filenames, paths, and file types * * @example * formatMetadataForEmbedding({ * name: 'auth-api.ts', * relativePath: 'src/api/auth-api.ts', * language: 'typescript', * extension: '.ts', * directory: 'src/api', * sizeBytes: 15360 * }) * // Returns: "This is a typescript file named auth-api.ts located at src/api/auth-api.ts in the src/api directory." */ export function formatMetadataForEmbedding(metadata: FileMetadata): string { const parts: string[] = []; // Core identification - always include if (metadata.language) { parts.push(`This is a ${metadata.language} file`); } else { parts.push(`This is a file`); } if (metadata.name) { parts.push(`named ${metadata.name}`); } // Location context if (metadata.relativePath) { parts.push(`located at ${metadata.relativePath}`); } // Directory context (if different from what's in path) if (metadata.directory && metadata.directory !== '.') { parts.push(`in the ${metadata.directory} directory`); } // Join with natural language flow const metadataText = parts.join(' ') + '.'; // Add separator for clarity return metadataText + '\n\n'; } // Chunking configuration based on model context limits // Model limits: all-minilm (512 tokens), nomic-embed-text (2048 tokens), nomic-embed-text (512 tokens) // Configurable via environment variables for flexibility const getChunkSize = () => parseInt(process.env.MIMIR_EMBEDDINGS_CHUNK_SIZE || '768', 10); const getChunkOverlap = () => parseInt(process.env.MIMIR_EMBEDDINGS_CHUNK_OVERLAP || '10', 10); // Default 5 retries to handle model loading which can take 30+ seconds const getMaxRetries = () => parseInt(process.env.MIMIR_EMBEDDINGS_MAX_RETRIES || '5', 10); // Initial delay for model loading retries (ms) - model loading can take 30+ seconds const getModelLoadingBaseDelay = () => parseInt(process.env.MIMIR_EMBEDDINGS_MODEL_LOADING_DELAY || '5000', 10); // Maximum delay cap (ms) const getMaxDelay = () => parseInt(process.env.MIMIR_EMBEDDINGS_MAX_DELAY || '30000', 10); export class EmbeddingsService { private configLoader: LLMConfigLoader; public enabled: boolean = false; private provider: string = 'ollama'; private baseUrl: string = 'http://localhost:11434'; private model: string = 'nomic-embed-text'; private apiKey: string = 'dummy-key-not-used'; constructor() { this.configLoader = LLMConfigLoader.getInstance(); } /** * Initialize the embeddings service with provider configuration * * Loads embeddings configuration from LLM config and sets up the provider * (Ollama, OpenAI, or Copilot). If embeddings are disabled in config or * initialization fails, the service gracefully falls back to disabled state. * * @returns Promise that resolves when initialization is complete * * @example * // Initialize with Ollama (local embeddings) * const embeddingsService = new EmbeddingsService(); * await embeddingsService.initialize(); * if (embeddingsService.isEnabled()) { * console.log('Using local Ollama embeddings'); * } * * @example * // Initialize with OpenAI embeddings * // Set MIMIR_EMBEDDINGS_API_KEY=sk-... in environment * const embeddingsService = new EmbeddingsService(); * await embeddingsService.initialize(); * console.log('Embeddings provider:', embeddingsService.isEnabled()); * * @example * // Handle disabled embeddings gracefully * const embeddingsService = new EmbeddingsService(); * await embeddingsService.initialize(); * if (!embeddingsService.isEnabled()) { * console.log('Embeddings disabled, using full-text search only'); * } */ async initialize(): Promise<void> { const config = await this.configLoader.getEmbeddingsConfig(); if (!config || !config.enabled) { this.enabled = false; console.log('ℹ️ Vector embeddings disabled'); return; } this.enabled = true; this.provider = config.provider; this.model = config.model; // Use embeddings-specific API URL if provided, otherwise fall back to LLM provider config if (process.env.MIMIR_EMBEDDINGS_API) { this.baseUrl = process.env.MIMIR_EMBEDDINGS_API; } else { // Fallback: Get provider base URL from llmConfig const llmConfig = await this.configLoader.load(); const providerConfig = llmConfig.providers[config.provider]; if (providerConfig?.baseUrl) { this.baseUrl = providerConfig.baseUrl; } } // Use embeddings-specific API key if provided if (process.env.MIMIR_EMBEDDINGS_API_KEY) { this.apiKey = process.env.MIMIR_EMBEDDINGS_API_KEY; } else if (this.provider === 'copilot' || this.provider === 'openai') { // Fallback: For OpenAI/Copilot, use dummy key this.apiKey = 'dummy-key-not-used'; } console.log(`✅ Vector embeddings enabled: ${config.provider}/${config.model}`); console.log(` Base URL: ${this.baseUrl}`); console.log(` Dimensions: ${config.dimensions || 'auto'}`); } /** * Check if vector embeddings are enabled and ready to use * * Returns true if the service initialized successfully and embeddings * are configured. Use this before calling embedding generation methods. * * @returns True if embeddings enabled, false otherwise * * @example * // Check before generating embeddings * if (embeddingsService.isEnabled()) { * const result = await embeddingsService.generateEmbedding('search query'); * } else { * console.log('Falling back to keyword search'); * } * * @example * // Conditional feature availability * const features = { * semanticSearch: embeddingsService.isEnabled(), * keywordSearch: true, * hybridSearch: embeddingsService.isEnabled() * }; * console.log('Available features:', features); */ isEnabled(): boolean { return this.enabled; } /** * Chunk text into smaller pieces for embedding * Uses sliding window with overlap to maintain context */ private chunkText(text: string): string[] { if (text.length <= getChunkSize()) { return [text]; } const chunks: string[] = []; let start = 0; while (start < text.length) { let end = start + getChunkSize(); // If this isn't the last chunk, try to break at a natural boundary if (end < text.length) { // Try to break at paragraph boundary const paragraphBreak = text.lastIndexOf('\n\n', end); if (paragraphBreak > start + getChunkSize() / 2) { end = paragraphBreak + 2; } else { // Try to break at sentence boundary const sentenceBreak = text.lastIndexOf('. ', end); if (sentenceBreak > start + getChunkSize() / 2) { end = sentenceBreak + 2; } else { // Try to break at word boundary const wordBreak = text.lastIndexOf(' ', end); if (wordBreak > start + getChunkSize() / 2) { end = wordBreak + 1; } } } } chunks.push(text.substring(start, end).trim()); // Move start position with overlap for context continuity start = end - getChunkOverlap(); if (start < 0) start = 0; } return chunks; } /** * Generate a single averaged embedding vector for text * * For large texts, automatically chunks and averages embeddings. * Returns a single vector representing the entire text. * * @deprecated Use generateChunkEmbeddings() for better search accuracy. * Industry standard is to store separate embeddings per chunk. * * @param text - Text to generate embedding for * @returns Embedding result with vector, dimensions, and model info * @throws {Error} If embeddings disabled or text is empty * * @example * // Generate embedding for search query * const result = await embeddingsService.generateEmbedding( * 'How do I implement authentication?' * ); * console.log(`Embedding dimensions: ${result.dimensions}`); * console.log(`Model: ${result.model}`); * // Use result.embedding for similarity search * * @example * // Generate embedding for short content * const memory = await embeddingsService.generateEmbedding( * 'Use JWT tokens with 15-minute expiry and refresh tokens' * ); * // Store memory.embedding in database for semantic search * * @example * // Handle large text (auto-chunked and averaged) * const longDoc = fs.readFileSync('documentation.md', 'utf-8'); * const result = await embeddingsService.generateEmbedding(longDoc); * // Returns single averaged embedding for entire document */ async generateEmbedding(text: string): Promise<EmbeddingResult> { if (!this.enabled) { throw new Error('Embeddings not enabled. Check embeddings.enabled in config'); } if (!text || text.trim().length === 0) { throw new Error('Cannot generate embedding for empty text'); } // Chunk text if it's too large const chunks = this.chunkText(text); // If only one chunk, process normally if (chunks.length === 1) { if (this.provider === 'copilot' || this.provider === 'openai' || this.provider === 'llama.cpp') { return this.generateOpenAIEmbedding(text); } else { return this.generateOllamaEmbedding(text); } } // For multiple chunks, generate embeddings for each and average them console.log(`📄 Chunking large text into ${chunks.length} pieces`); const embeddings: number[][] = []; for (let i = 0; i < chunks.length; i++) { try { let result: EmbeddingResult; if (this.provider === 'copilot' || this.provider === 'openai' || this.provider === 'llama.cpp') { result = await this.generateOpenAIEmbedding(chunks[i]); } else { result = await this.generateOllamaEmbedding(chunks[i]); } embeddings.push(result.embedding); // Small delay between chunks to avoid overwhelming the API if (i < chunks.length - 1) { await new Promise(resolve => setTimeout(resolve, 50)); } } catch (error: any) { console.warn(`⚠️ Failed to generate embedding for chunk ${i + 1}/${chunks.length}: ${error.message}`); // Continue with other chunks } } if (embeddings.length === 0) { throw new Error('Failed to generate embeddings for all chunks'); } // Average the embeddings const dimensions = embeddings[0].length; const avgEmbedding = new Array(dimensions).fill(0); for (const emb of embeddings) { for (let i = 0; i < dimensions; i++) { avgEmbedding[i] += emb[i]; } } for (let i = 0; i < dimensions; i++) { avgEmbedding[i] /= embeddings.length; } return { embedding: avgEmbedding, dimensions: dimensions, model: this.model, }; } /** * Generate separate embeddings for each text chunk (Industry Standard) * * Splits large text into overlapping chunks and generates individual embeddings. * Each chunk becomes a separate searchable unit, enabling precise retrieval. * This is the recommended approach for file indexing and RAG systems. * * Chunking strategy: * - Default chunk size: 768 characters (configurable via MIMIR_EMBEDDINGS_CHUNK_SIZE) * - Overlap: 10 characters (configurable via MIMIR_EMBEDDINGS_CHUNK_OVERLAP) * - Smart boundaries: Breaks at paragraphs, sentences, or words * * @param text - Text to chunk and embed * @returns Array of chunk embeddings with text, offsets, and metadata * @throws {Error} If embeddings disabled or text is empty * * @example * // Index a source code file * const fileContent = fs.readFileSync('src/auth.ts', 'utf-8'); * const chunks = await embeddingsService.generateChunkEmbeddings(fileContent); * * for (const chunk of chunks) { * await db.createNode('file_chunk', { * text: chunk.text, * embedding: chunk.embedding, * chunkIndex: chunk.chunkIndex, * startOffset: chunk.startOffset, * endOffset: chunk.endOffset * }); * } * console.log(`Indexed ${chunks.length} chunks`); * * @example * // Index documentation with metadata * const docContent = fs.readFileSync('README.md', 'utf-8'); * const metadata = formatMetadataForEmbedding({ * name: 'README.md', * relativePath: 'README.md', * language: 'markdown', * extension: '.md' * }); * const enrichedContent = metadata + docContent; * const chunks = await embeddingsService.generateChunkEmbeddings(enrichedContent); * // Each chunk now includes file context for better search * * @example * // Search within specific chunks * const query = 'authentication implementation'; * const queryEmbedding = await embeddingsService.generateEmbedding(query); * * // Find most similar chunks * const results = await vectorSearch(queryEmbedding.embedding, { * type: 'file_chunk', * limit: 5 * }); * * for (const result of results) { * console.log(`Chunk ${result.chunkIndex}: ${result.text.substring(0, 100)}...`); * console.log(`Similarity: ${result.similarity}`); * } */ async generateChunkEmbeddings(text: string): Promise<ChunkEmbeddingResult[]> { if (!this.enabled) { throw new Error('Embeddings not enabled. Check embeddings.enabled in config'); } if (!text || text.trim().length === 0) { throw new Error('Cannot generate embeddings for empty text'); } const chunkSize = getChunkSize(); const chunkOverlap = getChunkOverlap(); const chunks: ChunkEmbeddingResult[] = []; let start = 0; let chunkIndex = 0; while (start < text.length) { let end = start + chunkSize; // If this isn't the last chunk, try to break at a natural boundary if (end < text.length) { // Try to break at paragraph boundary const paragraphBreak = text.lastIndexOf('\n\n', end); if (paragraphBreak > start + chunkSize / 2) { end = paragraphBreak + 2; } else { // Try to break at sentence boundary const sentenceBreak = text.lastIndexOf('. ', end); if (sentenceBreak > start + chunkSize / 2) { end = sentenceBreak + 2; } else { // Try to break at word boundary const wordBreak = text.lastIndexOf(' ', end); if (wordBreak > start + chunkSize / 2) { end = wordBreak + 1; } } } } const chunkText = text.substring(start, end).trim(); if (chunkText.length > 0) { // Retry logic for individual chunks with transient errors let chunkRetries = 0; const maxChunkRetries = getMaxRetries(); let lastChunkError: Error | null = null; while (chunkRetries <= maxChunkRetries) { try { // Generate embedding for this chunk let result: EmbeddingResult; if (this.provider === 'copilot' || this.provider === 'openai' || this.provider === 'llama.cpp') { result = await this.generateOpenAIEmbedding(chunkText); } else { result = await this.generateOllamaEmbedding(chunkText); } chunks.push({ text: chunkText, embedding: result.embedding, dimensions: result.dimensions, model: result.model, startOffset: start, endOffset: end, chunkIndex: chunkIndex }); chunkIndex++; // Small delay between chunks to avoid overwhelming the API if (start + chunkSize < text.length) { await new Promise(resolve => setTimeout(resolve, 50)); } // Success - break out of retry loop break; } catch (error: any) { lastChunkError = error; // Check if this is a retryable error at the chunk level // (The inner methods already retry, but model loading can persist) const isModelLoading = error.message?.includes('503') || error.message?.includes('Loading model') || error.message?.includes('unavailable_error'); const isFetchFailed = error.message?.includes('fetch failed'); const isRetryable = isModelLoading || isFetchFailed; if (!isRetryable || chunkRetries >= maxChunkRetries) { console.warn(`⚠️ Failed to generate embedding for chunk ${chunkIndex}: ${error.message}`); // Skip this chunk and continue with others break; } // Wait longer for model loading at chunk level (models may need time to fully load) const chunkDelay = isModelLoading ? 5000 * (chunkRetries + 1) : 2000 * (chunkRetries + 1); console.warn( `⚠️ Chunk ${chunkIndex} failed (${isModelLoading ? 'model loading' : 'fetch failed'}), ` + `retry ${chunkRetries + 1}/${maxChunkRetries} in ${chunkDelay}ms...` ); await new Promise(resolve => setTimeout(resolve, chunkDelay)); chunkRetries++; } } } // Move start position with overlap for context continuity start = end - chunkOverlap; if (start < 0) start = 0; // Prevent infinite loop if (start >= end) { start = end; } } if (chunks.length === 0) { throw new Error('Failed to generate embeddings for any chunks'); } console.log(`📄 Generated ${chunks.length} chunk embeddings for text (${text.length} chars)`); return chunks; } /** * Retry wrapper for embedding generation with exponential backoff * Handles EOF errors, 503 (model loading), and other transient failures from Ollama/llama.cpp */ private async retryWithBackoff<T>( fn: () => Promise<T>, operation: string, maxRetries: number = getMaxRetries() ): Promise<T> { let lastError: Error | null = null; const modelLoadingBaseDelay = getModelLoadingBaseDelay(); const maxDelay = getMaxDelay(); for (let attempt = 0; attempt <= maxRetries; attempt++) { try { return await fn(); } catch (error: any) { lastError = error; // Check for retryable errors const isEOFError = error.message?.includes('EOF') || error.message?.includes('unexpected end') || error.code === 'ECONNRESET'; const isModelLoading = error.message?.includes('503') || error.message?.includes('Loading model') || error.message?.includes('unavailable_error'); const isFetchFailed = error.message?.includes('fetch failed'); const isRetryable = isEOFError || isModelLoading || isFetchFailed; // Only retry on transient errors, not on other errors if (!isRetryable || attempt === maxRetries) { throw error; } // Exponential backoff with much longer delays for model loading // Model loading can take 30+ seconds, so we use longer waits: // Model loading: 5s, 10s, 20s, 30s, 30s (with default 5s base) // Fetch failed: 3s, 6s, 12s, 24s, 30s (connection issues may need time) // EOF errors: 1s, 2s, 4s, 8s, 16s let baseDelay: number; if (isModelLoading) { baseDelay = modelLoadingBaseDelay; } else if (isFetchFailed) { baseDelay = 3000; } else { baseDelay = 1000; } const delayMs = Math.min(baseDelay * Math.pow(2, attempt), maxDelay); const errorType = isModelLoading ? 'model loading' : isFetchFailed ? 'fetch failed' : 'EOF'; console.warn( `⚠️ ${operation} failed with ${errorType} error (attempt ${attempt + 1}/${maxRetries + 1}). ` + `Retrying in ${Math.round(delayMs / 1000)}s...` ); await new Promise(resolve => setTimeout(resolve, delayMs)); } } throw lastError || new Error(`${operation} failed after ${maxRetries + 1} attempts`); } /** * Generate embedding using Ollama */ private async generateOllamaEmbedding(text: string): Promise<EmbeddingResult> { // Sanitize text to remove invalid Unicode before sending to API const sanitizedText = sanitizeTextForEmbedding(text); return this.retryWithBackoff(async () => { try { // Simple concatenation: base URL + path const baseUrl = process.env.MIMIR_EMBEDDINGS_API || this.baseUrl || 'http://localhost:11434'; const embeddingsPath = process.env.MIMIR_EMBEDDINGS_API_PATH || '/api/embeddings'; const embeddingsUrl = `${baseUrl}${embeddingsPath}`; const apiKey = process.env.MIMIR_EMBEDDINGS_API_KEY; const headers: Record<string, string> = { 'Content-Type': 'application/json', }; if (apiKey && apiKey !== 'dummy-key') { headers['Authorization'] = `Bearer ${apiKey}`; } const fetchOptions = createSecureFetchOptions(embeddingsUrl, { method: 'POST', headers, body: JSON.stringify({ model: this.model, prompt: sanitizedText, }), }); const response = await fetch(embeddingsUrl, fetchOptions); if (!response.ok) { const errorText = await response.text(); throw new Error(`Ollama API error (${response.status}): ${errorText}`); } const data = await response.json(); if (!data.embedding || !Array.isArray(data.embedding)) { throw new Error('Invalid response from Ollama: missing embedding array'); } return { embedding: data.embedding, dimensions: data.embedding.length, model: this.model, }; } catch (error: any) { if (error.code === 'ECONNREFUSED') { throw new Error(`Cannot connect to Ollama at ${this.baseUrl}. Make sure Ollama is running.`); } throw error; } }, `Ollama embedding (${text.substring(0, 50)}...)`); } /** * Generate embedding using OpenAI/Copilot/llama.cpp API (all use OpenAI-compatible format) */ private async generateOpenAIEmbedding(text: string): Promise<EmbeddingResult> { // Sanitize text to remove invalid Unicode before sending to API const sanitizedText = sanitizeTextForEmbedding(text); return this.retryWithBackoff(async () => { try { // Simple concatenation: base URL + path const baseUrl = process.env.MIMIR_EMBEDDINGS_API || this.baseUrl || 'http://localhost:11434'; const embeddingsPath = process.env.MIMIR_EMBEDDINGS_API_PATH || '/v1/embeddings'; const embeddingsUrl = `${baseUrl}${embeddingsPath}`; const apiKey = process.env.MIMIR_EMBEDDINGS_API_KEY || this.apiKey; // Build headers - llama.cpp doesn't need Authorization header const headers: Record<string, string> = { 'Content-Type': 'application/json', }; // Only add Authorization if we have a key and not llama.cpp if (this.provider !== 'llama.cpp' && apiKey && apiKey !== 'dummy-key') { headers['Authorization'] = `Bearer ${apiKey}`; } // Detect if input is a data URL (image) for multimodal embeddings const isDataURL = sanitizedText.startsWith('data:image/'); // For multimodal embeddings, send as array with image object // For text embeddings, send as string const input = isDataURL ? [{ type: 'image_url', image_url: { url: sanitizedText } }] : sanitizedText; const requestBody = JSON.stringify({ model: this.model, input: input, }); const fetchOptions = createSecureFetchOptions(embeddingsUrl, { method: 'POST', headers, body: requestBody, }); const response = await fetch(embeddingsUrl, fetchOptions); if (!response.ok) { const errorText = await response.text(); throw new Error(`OpenAI API error (${response.status}): ${errorText}`); } const data = await response.json(); if (!data.data || !Array.isArray(data.data) || data.data.length === 0) { throw new Error('Invalid response from OpenAI: missing data array'); } const embedding = data.data[0].embedding; if (!Array.isArray(embedding)) { throw new Error('Invalid response from OpenAI: embedding is not an array'); } return { embedding: embedding, dimensions: embedding.length, model: this.model, }; } catch (error: any) { if (error.code === 'ECONNREFUSED') { throw new Error(`Cannot connect to OpenAI API at ${this.baseUrl}. Make sure copilot-api is running.`); } throw error; } }, `OpenAI embedding (${text.substring(0, 50)}...)`); } /** * Generate embeddings for multiple texts (batch processing) */ async generateEmbeddings(texts: string[]): Promise<EmbeddingResult[]> { if (!this.enabled) { throw new Error('Embeddings not enabled'); } const results: EmbeddingResult[] = []; for (const text of texts) { if (text && text.trim().length > 0) { const result = await this.generateEmbedding(text); results.push(result); } else { // Push empty placeholder for empty texts results.push({ embedding: [], dimensions: 0, model: this.model, }); } } return results; } /** * Calculate cosine similarity between two embeddings */ cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) { throw new Error('Embeddings must have same dimensions'); } let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); } /** * Find most similar embeddings using cosine similarity */ findMostSimilar( query: number[], candidates: Array<{ embedding: number[]; metadata: any }>, topK: number = 5 ): Array<{ similarity: number; metadata: any }> { const similarities = candidates.map(candidate => ({ similarity: this.cosineSimilarity(query, candidate.embedding), metadata: candidate.metadata, })); // Sort by similarity descending similarities.sort((a, b) => b.similarity - a.similarity); return similarities.slice(0, topK); } /** * Verify embedding model is available */ async verifyModel(): Promise<boolean> { if (!this.enabled) { return false; } if (this.provider === 'copilot' || this.provider === 'openai') { return this.verifyOpenAIModel(); } else { return this.verifyOllamaModel(); } } /** * Verify Ollama model is available */ private async verifyOllamaModel(): Promise<boolean> { try { const response = await fetch(`${this.baseUrl}/api/tags`); if (!response.ok) { return false; } const data = await response.json(); const models = data.models || []; const modelExists = models.some((m: any) => m.name.includes(this.model)); if (!modelExists) { console.warn(`⚠️ Embeddings model '${this.model}' not found in Ollama`); console.warn(` Run: ollama pull ${this.model}`); } return modelExists; } catch (error) { console.error('Error verifying Ollama model:', error); return false; } } /** * Verify OpenAI/Copilot model is available */ private async verifyOpenAIModel(): Promise<boolean> { try { const response = await fetch(`${this.baseUrl}/models`, { headers: { 'Authorization': `Bearer ${this.apiKey}`, }, }); if (!response.ok) { console.warn(`⚠️ Cannot verify OpenAI model availability (${response.status})`); console.warn(` Make sure copilot-api is running: copilot-api start`); return false; } const data = await response.json(); const models = data.data || []; // Check if model exists in available models const modelExists = models.some((m: any) => m.id === this.model || m.id.includes(this.model) ); if (!modelExists) { console.warn(`⚠️ Embeddings model '${this.model}' not found in available models`); console.warn(` Available embedding models: text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large`); } return modelExists; } catch (error) { console.error('Error verifying OpenAI model:', error); return false; } } }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server