/**
* @file src/indexing/EmbeddingsService.ts
* @description Vector embeddings service supporting multiple providers
*
* Supports:
* - Ollama (local models like nomic-embed-text)
* - OpenAI/Copilot (text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large)
*
* Feature flag controlled via LLM configuration.
*/
import { LLMConfigLoader } from '../config/LLMConfigLoader.js';
import { createSecureFetchOptions } from '../utils/fetch-helper.js';
/**
* Sanitize text for embedding API by removing/replacing invalid Unicode
*
* Handles:
* - Lone surrogate code units (U+D800-U+DFFF) that cause JSON parse errors
* - Null bytes and other problematic control characters
* - Preserves valid emojis, CJK characters, and all proper Unicode
*
* The embedding API error "surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"
* occurs when lone surrogate pairs exist in text (often from corrupted files or
* improper string handling).
*
* @param text - Text to sanitize
* @returns Sanitized text safe for JSON serialization and embedding APIs
*/
export function sanitizeTextForEmbedding(text: string): string {
// Fast path: if no potential issues, return as-is
// Check for control chars (0x00-0x08, 0x0B, 0x0E-0x1F) or surrogate range (0xD800-0xDFFF)
let needsSanitization = false;
for (let i = 0; i < Math.min(text.length, 1000); i++) {
const code = text.charCodeAt(i);
if ((code >= 0x00 && code <= 0x08) || code === 0x0B || (code >= 0x0E && code <= 0x1F) ||
(code >= 0xD800 && code <= 0xDFFF)) {
needsSanitization = true;
break;
}
}
// If sample looks clean, do a full check only if the text is short
if (!needsSanitization && text.length <= 1000) {
return text;
}
// For longer texts or those with issues, do full sanitization
const result: string[] = [];
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
// Skip problematic control characters (keep tab, newline, carriage return, form feed)
if ((code >= 0x00 && code <= 0x08) || code === 0x0B || (code >= 0x0E && code <= 0x1F)) {
result.push(' '); // Replace with space
continue;
}
// Handle surrogate pairs
if (code >= 0xD800 && code <= 0xDBFF) {
// High surrogate - check if followed by low surrogate
const nextCode = i + 1 < text.length ? text.charCodeAt(i + 1) : 0;
if (nextCode >= 0xDC00 && nextCode <= 0xDFFF) {
// Valid surrogate pair - keep both
result.push(text[i], text[i + 1]);
i++; // Skip the low surrogate
} else {
// Lone high surrogate - replace with Unicode replacement character
result.push('\uFFFD');
}
continue;
}
if (code >= 0xDC00 && code <= 0xDFFF) {
// Lone low surrogate (without preceding high surrogate) - replace
result.push('\uFFFD');
continue;
}
// All other characters are safe
result.push(text[i]);
}
return result.join('');
}
export interface EmbeddingResult {
embedding: number[];
dimensions: number;
model: string;
}
export interface ChunkEmbeddingResult {
text: string;
embedding: number[];
dimensions: number;
model: string;
startOffset: number;
endOffset: number;
chunkIndex: number;
}
export interface TextChunk {
text: string;
startOffset: number;
endOffset: number;
}
// Metadata interface for file context enrichment
export interface FileMetadata {
name: string;
relativePath: string;
language: string;
extension: string;
directory?: string;
sizeBytes?: number;
}
/**
* Format file metadata as natural language for embedding
* This enriches content with contextual information about the file itself
* enabling semantic search to match on filenames, paths, and file types
*
* @example
* formatMetadataForEmbedding({
* name: 'auth-api.ts',
* relativePath: 'src/api/auth-api.ts',
* language: 'typescript',
* extension: '.ts',
* directory: 'src/api',
* sizeBytes: 15360
* })
* // Returns: "This is a typescript file named auth-api.ts located at src/api/auth-api.ts in the src/api directory."
*/
export function formatMetadataForEmbedding(metadata: FileMetadata): string {
const parts: string[] = [];
// Core identification - always include
if (metadata.language) {
parts.push(`This is a ${metadata.language} file`);
} else {
parts.push(`This is a file`);
}
if (metadata.name) {
parts.push(`named ${metadata.name}`);
}
// Location context
if (metadata.relativePath) {
parts.push(`located at ${metadata.relativePath}`);
}
// Directory context (if different from what's in path)
if (metadata.directory && metadata.directory !== '.') {
parts.push(`in the ${metadata.directory} directory`);
}
// Join with natural language flow
const metadataText = parts.join(' ') + '.';
// Add separator for clarity
return metadataText + '\n\n';
}
// Chunking configuration based on model context limits
// Model limits: all-minilm (512 tokens), nomic-embed-text (2048 tokens), nomic-embed-text (512 tokens)
// Configurable via environment variables for flexibility
const getChunkSize = () => parseInt(process.env.MIMIR_EMBEDDINGS_CHUNK_SIZE || '768', 10);
const getChunkOverlap = () => parseInt(process.env.MIMIR_EMBEDDINGS_CHUNK_OVERLAP || '10', 10);
// Default 5 retries to handle model loading which can take 30+ seconds
const getMaxRetries = () => parseInt(process.env.MIMIR_EMBEDDINGS_MAX_RETRIES || '5', 10);
// Initial delay for model loading retries (ms) - model loading can take 30+ seconds
const getModelLoadingBaseDelay = () => parseInt(process.env.MIMIR_EMBEDDINGS_MODEL_LOADING_DELAY || '5000', 10);
// Maximum delay cap (ms)
const getMaxDelay = () => parseInt(process.env.MIMIR_EMBEDDINGS_MAX_DELAY || '30000', 10);
export class EmbeddingsService {
private configLoader: LLMConfigLoader;
public enabled: boolean = false;
private provider: string = 'ollama';
private baseUrl: string = 'http://localhost:11434';
private model: string = 'nomic-embed-text';
private apiKey: string = 'dummy-key-not-used';
constructor() {
this.configLoader = LLMConfigLoader.getInstance();
}
/**
* Initialize the embeddings service with provider configuration
*
* Loads embeddings configuration from LLM config and sets up the provider
* (Ollama, OpenAI, or Copilot). If embeddings are disabled in config or
* initialization fails, the service gracefully falls back to disabled state.
*
* @returns Promise that resolves when initialization is complete
*
* @example
* // Initialize with Ollama (local embeddings)
* const embeddingsService = new EmbeddingsService();
* await embeddingsService.initialize();
* if (embeddingsService.isEnabled()) {
* console.log('Using local Ollama embeddings');
* }
*
* @example
* // Initialize with OpenAI embeddings
* // Set MIMIR_EMBEDDINGS_API_KEY=sk-... in environment
* const embeddingsService = new EmbeddingsService();
* await embeddingsService.initialize();
* console.log('Embeddings provider:', embeddingsService.isEnabled());
*
* @example
* // Handle disabled embeddings gracefully
* const embeddingsService = new EmbeddingsService();
* await embeddingsService.initialize();
* if (!embeddingsService.isEnabled()) {
* console.log('Embeddings disabled, using full-text search only');
* }
*/
async initialize(): Promise<void> {
const config = await this.configLoader.getEmbeddingsConfig();
if (!config || !config.enabled) {
this.enabled = false;
console.log('ℹ️ Vector embeddings disabled');
return;
}
this.enabled = true;
this.provider = config.provider;
this.model = config.model;
// Use embeddings-specific API URL if provided, otherwise fall back to LLM provider config
if (process.env.MIMIR_EMBEDDINGS_API) {
this.baseUrl = process.env.MIMIR_EMBEDDINGS_API;
} else {
// Fallback: Get provider base URL from llmConfig
const llmConfig = await this.configLoader.load();
const providerConfig = llmConfig.providers[config.provider];
if (providerConfig?.baseUrl) {
this.baseUrl = providerConfig.baseUrl;
}
}
// Use embeddings-specific API key if provided
if (process.env.MIMIR_EMBEDDINGS_API_KEY) {
this.apiKey = process.env.MIMIR_EMBEDDINGS_API_KEY;
} else if (this.provider === 'copilot' || this.provider === 'openai') {
// Fallback: For OpenAI/Copilot, use dummy key
this.apiKey = 'dummy-key-not-used';
}
console.log(`✅ Vector embeddings enabled: ${config.provider}/${config.model}`);
console.log(` Base URL: ${this.baseUrl}`);
console.log(` Dimensions: ${config.dimensions || 'auto'}`);
}
/**
* Check if vector embeddings are enabled and ready to use
*
* Returns true if the service initialized successfully and embeddings
* are configured. Use this before calling embedding generation methods.
*
* @returns True if embeddings enabled, false otherwise
*
* @example
* // Check before generating embeddings
* if (embeddingsService.isEnabled()) {
* const result = await embeddingsService.generateEmbedding('search query');
* } else {
* console.log('Falling back to keyword search');
* }
*
* @example
* // Conditional feature availability
* const features = {
* semanticSearch: embeddingsService.isEnabled(),
* keywordSearch: true,
* hybridSearch: embeddingsService.isEnabled()
* };
* console.log('Available features:', features);
*/
isEnabled(): boolean {
return this.enabled;
}
/**
* Chunk text into smaller pieces for embedding
* Uses sliding window with overlap to maintain context
*/
private chunkText(text: string): string[] {
if (text.length <= getChunkSize()) {
return [text];
}
const chunks: string[] = [];
let start = 0;
while (start < text.length) {
let end = start + getChunkSize();
// If this isn't the last chunk, try to break at a natural boundary
if (end < text.length) {
// Try to break at paragraph boundary
const paragraphBreak = text.lastIndexOf('\n\n', end);
if (paragraphBreak > start + getChunkSize() / 2) {
end = paragraphBreak + 2;
} else {
// Try to break at sentence boundary
const sentenceBreak = text.lastIndexOf('. ', end);
if (sentenceBreak > start + getChunkSize() / 2) {
end = sentenceBreak + 2;
} else {
// Try to break at word boundary
const wordBreak = text.lastIndexOf(' ', end);
if (wordBreak > start + getChunkSize() / 2) {
end = wordBreak + 1;
}
}
}
}
chunks.push(text.substring(start, end).trim());
// Move start position with overlap for context continuity
start = end - getChunkOverlap();
if (start < 0) start = 0;
}
return chunks;
}
/**
* Generate a single averaged embedding vector for text
*
* For large texts, automatically chunks and averages embeddings.
* Returns a single vector representing the entire text.
*
* @deprecated Use generateChunkEmbeddings() for better search accuracy.
* Industry standard is to store separate embeddings per chunk.
*
* @param text - Text to generate embedding for
* @returns Embedding result with vector, dimensions, and model info
* @throws {Error} If embeddings disabled or text is empty
*
* @example
* // Generate embedding for search query
* const result = await embeddingsService.generateEmbedding(
* 'How do I implement authentication?'
* );
* console.log(`Embedding dimensions: ${result.dimensions}`);
* console.log(`Model: ${result.model}`);
* // Use result.embedding for similarity search
*
* @example
* // Generate embedding for short content
* const memory = await embeddingsService.generateEmbedding(
* 'Use JWT tokens with 15-minute expiry and refresh tokens'
* );
* // Store memory.embedding in database for semantic search
*
* @example
* // Handle large text (auto-chunked and averaged)
* const longDoc = fs.readFileSync('documentation.md', 'utf-8');
* const result = await embeddingsService.generateEmbedding(longDoc);
* // Returns single averaged embedding for entire document
*/
async generateEmbedding(text: string): Promise<EmbeddingResult> {
if (!this.enabled) {
throw new Error('Embeddings not enabled. Check embeddings.enabled in config');
}
if (!text || text.trim().length === 0) {
throw new Error('Cannot generate embedding for empty text');
}
// Chunk text if it's too large
const chunks = this.chunkText(text);
// If only one chunk, process normally
if (chunks.length === 1) {
if (this.provider === 'copilot' || this.provider === 'openai' || this.provider === 'llama.cpp') {
return this.generateOpenAIEmbedding(text);
} else {
return this.generateOllamaEmbedding(text);
}
}
// For multiple chunks, generate embeddings for each and average them
console.log(`📄 Chunking large text into ${chunks.length} pieces`);
const embeddings: number[][] = [];
for (let i = 0; i < chunks.length; i++) {
try {
let result: EmbeddingResult;
if (this.provider === 'copilot' || this.provider === 'openai' || this.provider === 'llama.cpp') {
result = await this.generateOpenAIEmbedding(chunks[i]);
} else {
result = await this.generateOllamaEmbedding(chunks[i]);
}
embeddings.push(result.embedding);
// Small delay between chunks to avoid overwhelming the API
if (i < chunks.length - 1) {
await new Promise(resolve => setTimeout(resolve, 50));
}
} catch (error: any) {
console.warn(`⚠️ Failed to generate embedding for chunk ${i + 1}/${chunks.length}: ${error.message}`);
// Continue with other chunks
}
}
if (embeddings.length === 0) {
throw new Error('Failed to generate embeddings for all chunks');
}
// Average the embeddings
const dimensions = embeddings[0].length;
const avgEmbedding = new Array(dimensions).fill(0);
for (const emb of embeddings) {
for (let i = 0; i < dimensions; i++) {
avgEmbedding[i] += emb[i];
}
}
for (let i = 0; i < dimensions; i++) {
avgEmbedding[i] /= embeddings.length;
}
return {
embedding: avgEmbedding,
dimensions: dimensions,
model: this.model,
};
}
/**
* Generate separate embeddings for each text chunk (Industry Standard)
*
* Splits large text into overlapping chunks and generates individual embeddings.
* Each chunk becomes a separate searchable unit, enabling precise retrieval.
* This is the recommended approach for file indexing and RAG systems.
*
* Chunking strategy:
* - Default chunk size: 768 characters (configurable via MIMIR_EMBEDDINGS_CHUNK_SIZE)
* - Overlap: 10 characters (configurable via MIMIR_EMBEDDINGS_CHUNK_OVERLAP)
* - Smart boundaries: Breaks at paragraphs, sentences, or words
*
* @param text - Text to chunk and embed
* @returns Array of chunk embeddings with text, offsets, and metadata
* @throws {Error} If embeddings disabled or text is empty
*
* @example
* // Index a source code file
* const fileContent = fs.readFileSync('src/auth.ts', 'utf-8');
* const chunks = await embeddingsService.generateChunkEmbeddings(fileContent);
*
* for (const chunk of chunks) {
* await db.createNode('file_chunk', {
* text: chunk.text,
* embedding: chunk.embedding,
* chunkIndex: chunk.chunkIndex,
* startOffset: chunk.startOffset,
* endOffset: chunk.endOffset
* });
* }
* console.log(`Indexed ${chunks.length} chunks`);
*
* @example
* // Index documentation with metadata
* const docContent = fs.readFileSync('README.md', 'utf-8');
* const metadata = formatMetadataForEmbedding({
* name: 'README.md',
* relativePath: 'README.md',
* language: 'markdown',
* extension: '.md'
* });
* const enrichedContent = metadata + docContent;
* const chunks = await embeddingsService.generateChunkEmbeddings(enrichedContent);
* // Each chunk now includes file context for better search
*
* @example
* // Search within specific chunks
* const query = 'authentication implementation';
* const queryEmbedding = await embeddingsService.generateEmbedding(query);
*
* // Find most similar chunks
* const results = await vectorSearch(queryEmbedding.embedding, {
* type: 'file_chunk',
* limit: 5
* });
*
* for (const result of results) {
* console.log(`Chunk ${result.chunkIndex}: ${result.text.substring(0, 100)}...`);
* console.log(`Similarity: ${result.similarity}`);
* }
*/
async generateChunkEmbeddings(text: string): Promise<ChunkEmbeddingResult[]> {
if (!this.enabled) {
throw new Error('Embeddings not enabled. Check embeddings.enabled in config');
}
if (!text || text.trim().length === 0) {
throw new Error('Cannot generate embeddings for empty text');
}
const chunkSize = getChunkSize();
const chunkOverlap = getChunkOverlap();
const chunks: ChunkEmbeddingResult[] = [];
let start = 0;
let chunkIndex = 0;
while (start < text.length) {
let end = start + chunkSize;
// If this isn't the last chunk, try to break at a natural boundary
if (end < text.length) {
// Try to break at paragraph boundary
const paragraphBreak = text.lastIndexOf('\n\n', end);
if (paragraphBreak > start + chunkSize / 2) {
end = paragraphBreak + 2;
} else {
// Try to break at sentence boundary
const sentenceBreak = text.lastIndexOf('. ', end);
if (sentenceBreak > start + chunkSize / 2) {
end = sentenceBreak + 2;
} else {
// Try to break at word boundary
const wordBreak = text.lastIndexOf(' ', end);
if (wordBreak > start + chunkSize / 2) {
end = wordBreak + 1;
}
}
}
}
const chunkText = text.substring(start, end).trim();
if (chunkText.length > 0) {
// Retry logic for individual chunks with transient errors
let chunkRetries = 0;
const maxChunkRetries = getMaxRetries();
let lastChunkError: Error | null = null;
while (chunkRetries <= maxChunkRetries) {
try {
// Generate embedding for this chunk
let result: EmbeddingResult;
if (this.provider === 'copilot' || this.provider === 'openai' || this.provider === 'llama.cpp') {
result = await this.generateOpenAIEmbedding(chunkText);
} else {
result = await this.generateOllamaEmbedding(chunkText);
}
chunks.push({
text: chunkText,
embedding: result.embedding,
dimensions: result.dimensions,
model: result.model,
startOffset: start,
endOffset: end,
chunkIndex: chunkIndex
});
chunkIndex++;
// Small delay between chunks to avoid overwhelming the API
if (start + chunkSize < text.length) {
await new Promise(resolve => setTimeout(resolve, 50));
}
// Success - break out of retry loop
break;
} catch (error: any) {
lastChunkError = error;
// Check if this is a retryable error at the chunk level
// (The inner methods already retry, but model loading can persist)
const isModelLoading = error.message?.includes('503') ||
error.message?.includes('Loading model') ||
error.message?.includes('unavailable_error');
const isFetchFailed = error.message?.includes('fetch failed');
const isRetryable = isModelLoading || isFetchFailed;
if (!isRetryable || chunkRetries >= maxChunkRetries) {
console.warn(`⚠️ Failed to generate embedding for chunk ${chunkIndex}: ${error.message}`);
// Skip this chunk and continue with others
break;
}
// Wait longer for model loading at chunk level (models may need time to fully load)
const chunkDelay = isModelLoading ? 5000 * (chunkRetries + 1) : 2000 * (chunkRetries + 1);
console.warn(
`⚠️ Chunk ${chunkIndex} failed (${isModelLoading ? 'model loading' : 'fetch failed'}), ` +
`retry ${chunkRetries + 1}/${maxChunkRetries} in ${chunkDelay}ms...`
);
await new Promise(resolve => setTimeout(resolve, chunkDelay));
chunkRetries++;
}
}
}
// Move start position with overlap for context continuity
start = end - chunkOverlap;
if (start < 0) start = 0;
// Prevent infinite loop
if (start >= end) {
start = end;
}
}
if (chunks.length === 0) {
throw new Error('Failed to generate embeddings for any chunks');
}
console.log(`📄 Generated ${chunks.length} chunk embeddings for text (${text.length} chars)`);
return chunks;
}
/**
* Retry wrapper for embedding generation with exponential backoff
* Handles EOF errors, 503 (model loading), and other transient failures from Ollama/llama.cpp
*/
private async retryWithBackoff<T>(
fn: () => Promise<T>,
operation: string,
maxRetries: number = getMaxRetries()
): Promise<T> {
let lastError: Error | null = null;
const modelLoadingBaseDelay = getModelLoadingBaseDelay();
const maxDelay = getMaxDelay();
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await fn();
} catch (error: any) {
lastError = error;
// Check for retryable errors
const isEOFError = error.message?.includes('EOF') ||
error.message?.includes('unexpected end') ||
error.code === 'ECONNRESET';
const isModelLoading = error.message?.includes('503') ||
error.message?.includes('Loading model') ||
error.message?.includes('unavailable_error');
const isFetchFailed = error.message?.includes('fetch failed');
const isRetryable = isEOFError || isModelLoading || isFetchFailed;
// Only retry on transient errors, not on other errors
if (!isRetryable || attempt === maxRetries) {
throw error;
}
// Exponential backoff with much longer delays for model loading
// Model loading can take 30+ seconds, so we use longer waits:
// Model loading: 5s, 10s, 20s, 30s, 30s (with default 5s base)
// Fetch failed: 3s, 6s, 12s, 24s, 30s (connection issues may need time)
// EOF errors: 1s, 2s, 4s, 8s, 16s
let baseDelay: number;
if (isModelLoading) {
baseDelay = modelLoadingBaseDelay;
} else if (isFetchFailed) {
baseDelay = 3000;
} else {
baseDelay = 1000;
}
const delayMs = Math.min(baseDelay * Math.pow(2, attempt), maxDelay);
const errorType = isModelLoading ? 'model loading' :
isFetchFailed ? 'fetch failed' : 'EOF';
console.warn(
`⚠️ ${operation} failed with ${errorType} error (attempt ${attempt + 1}/${maxRetries + 1}). ` +
`Retrying in ${Math.round(delayMs / 1000)}s...`
);
await new Promise(resolve => setTimeout(resolve, delayMs));
}
}
throw lastError || new Error(`${operation} failed after ${maxRetries + 1} attempts`);
}
/**
* Generate embedding using Ollama
*/
private async generateOllamaEmbedding(text: string): Promise<EmbeddingResult> {
// Sanitize text to remove invalid Unicode before sending to API
const sanitizedText = sanitizeTextForEmbedding(text);
return this.retryWithBackoff(async () => {
try {
// Simple concatenation: base URL + path
const baseUrl = process.env.MIMIR_EMBEDDINGS_API || this.baseUrl || 'http://localhost:11434';
const embeddingsPath = process.env.MIMIR_EMBEDDINGS_API_PATH || '/api/embeddings';
const embeddingsUrl = `${baseUrl}${embeddingsPath}`;
const apiKey = process.env.MIMIR_EMBEDDINGS_API_KEY;
const headers: Record<string, string> = {
'Content-Type': 'application/json',
};
if (apiKey && apiKey !== 'dummy-key') {
headers['Authorization'] = `Bearer ${apiKey}`;
}
const fetchOptions = createSecureFetchOptions(embeddingsUrl, {
method: 'POST',
headers,
body: JSON.stringify({
model: this.model,
prompt: sanitizedText,
}),
});
const response = await fetch(embeddingsUrl, fetchOptions);
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Ollama API error (${response.status}): ${errorText}`);
}
const data = await response.json();
if (!data.embedding || !Array.isArray(data.embedding)) {
throw new Error('Invalid response from Ollama: missing embedding array');
}
return {
embedding: data.embedding,
dimensions: data.embedding.length,
model: this.model,
};
} catch (error: any) {
if (error.code === 'ECONNREFUSED') {
throw new Error(`Cannot connect to Ollama at ${this.baseUrl}. Make sure Ollama is running.`);
}
throw error;
}
}, `Ollama embedding (${text.substring(0, 50)}...)`);
}
/**
* Generate embedding using OpenAI/Copilot/llama.cpp API (all use OpenAI-compatible format)
*/
private async generateOpenAIEmbedding(text: string): Promise<EmbeddingResult> {
// Sanitize text to remove invalid Unicode before sending to API
const sanitizedText = sanitizeTextForEmbedding(text);
return this.retryWithBackoff(async () => {
try {
// Simple concatenation: base URL + path
const baseUrl = process.env.MIMIR_EMBEDDINGS_API || this.baseUrl || 'http://localhost:11434';
const embeddingsPath = process.env.MIMIR_EMBEDDINGS_API_PATH || '/v1/embeddings';
const embeddingsUrl = `${baseUrl}${embeddingsPath}`;
const apiKey = process.env.MIMIR_EMBEDDINGS_API_KEY || this.apiKey;
// Build headers - llama.cpp doesn't need Authorization header
const headers: Record<string, string> = {
'Content-Type': 'application/json',
};
// Only add Authorization if we have a key and not llama.cpp
if (this.provider !== 'llama.cpp' && apiKey && apiKey !== 'dummy-key') {
headers['Authorization'] = `Bearer ${apiKey}`;
}
// Detect if input is a data URL (image) for multimodal embeddings
const isDataURL = sanitizedText.startsWith('data:image/');
// For multimodal embeddings, send as array with image object
// For text embeddings, send as string
const input = isDataURL
? [{ type: 'image_url', image_url: { url: sanitizedText } }]
: sanitizedText;
const requestBody = JSON.stringify({
model: this.model,
input: input,
});
const fetchOptions = createSecureFetchOptions(embeddingsUrl, {
method: 'POST',
headers,
body: requestBody,
});
const response = await fetch(embeddingsUrl, fetchOptions);
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI API error (${response.status}): ${errorText}`);
}
const data = await response.json();
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
throw new Error('Invalid response from OpenAI: missing data array');
}
const embedding = data.data[0].embedding;
if (!Array.isArray(embedding)) {
throw new Error('Invalid response from OpenAI: embedding is not an array');
}
return {
embedding: embedding,
dimensions: embedding.length,
model: this.model,
};
} catch (error: any) {
if (error.code === 'ECONNREFUSED') {
throw new Error(`Cannot connect to OpenAI API at ${this.baseUrl}. Make sure copilot-api is running.`);
}
throw error;
}
}, `OpenAI embedding (${text.substring(0, 50)}...)`);
}
/**
* Generate embeddings for multiple texts (batch processing)
*/
async generateEmbeddings(texts: string[]): Promise<EmbeddingResult[]> {
if (!this.enabled) {
throw new Error('Embeddings not enabled');
}
const results: EmbeddingResult[] = [];
for (const text of texts) {
if (text && text.trim().length > 0) {
const result = await this.generateEmbedding(text);
results.push(result);
} else {
// Push empty placeholder for empty texts
results.push({
embedding: [],
dimensions: 0,
model: this.model,
});
}
}
return results;
}
/**
* Calculate cosine similarity between two embeddings
*/
cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length) {
throw new Error('Embeddings must have same dimensions');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
/**
* Find most similar embeddings using cosine similarity
*/
findMostSimilar(
query: number[],
candidates: Array<{ embedding: number[]; metadata: any }>,
topK: number = 5
): Array<{ similarity: number; metadata: any }> {
const similarities = candidates.map(candidate => ({
similarity: this.cosineSimilarity(query, candidate.embedding),
metadata: candidate.metadata,
}));
// Sort by similarity descending
similarities.sort((a, b) => b.similarity - a.similarity);
return similarities.slice(0, topK);
}
/**
* Verify embedding model is available
*/
async verifyModel(): Promise<boolean> {
if (!this.enabled) {
return false;
}
if (this.provider === 'copilot' || this.provider === 'openai') {
return this.verifyOpenAIModel();
} else {
return this.verifyOllamaModel();
}
}
/**
* Verify Ollama model is available
*/
private async verifyOllamaModel(): Promise<boolean> {
try {
const response = await fetch(`${this.baseUrl}/api/tags`);
if (!response.ok) {
return false;
}
const data = await response.json();
const models = data.models || [];
const modelExists = models.some((m: any) => m.name.includes(this.model));
if (!modelExists) {
console.warn(`⚠️ Embeddings model '${this.model}' not found in Ollama`);
console.warn(` Run: ollama pull ${this.model}`);
}
return modelExists;
} catch (error) {
console.error('Error verifying Ollama model:', error);
return false;
}
}
/**
* Verify OpenAI/Copilot model is available
*/
private async verifyOpenAIModel(): Promise<boolean> {
try {
const response = await fetch(`${this.baseUrl}/models`, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
},
});
if (!response.ok) {
console.warn(`⚠️ Cannot verify OpenAI model availability (${response.status})`);
console.warn(` Make sure copilot-api is running: copilot-api start`);
return false;
}
const data = await response.json();
const models = data.data || [];
// Check if model exists in available models
const modelExists = models.some((m: any) =>
m.id === this.model || m.id.includes(this.model)
);
if (!modelExists) {
console.warn(`⚠️ Embeddings model '${this.model}' not found in available models`);
console.warn(` Available embedding models: text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large`);
}
return modelExists;
} catch (error) {
console.error('Error verifying OpenAI model:', error);
return false;
}
}
}