embedding-manager.jsā¢9.31 kB
import { pipeline, env } from '@xenova/transformers';
import crypto from 'crypto';
import { join } from 'path';
import { EmbeddingCacheManager } from './cache-manager.js';
import { TIME_CONSTANTS, LIMITS } from './constants.js';
import { VectorOperations, getRustInfo } from '../rust-bindings/index.js';
// Store process reference at module level to avoid conflicts
const nodeProcess = process;
// Configure Transformers.js for local model storage
env.localURL = join(nodeProcess.cwd(), '.models');
env.allowRemoteModels = true;
/**
* Manages local embedding generation using Transformers.js
* Provides privacy-first semantic vector generation
*
* Uses Xenova/all-mpnet-base-v2 model for optimal accuracy:
* - 87-88% accuracy on semantic similarity tasks
* - 512 token context window
* - WebGPU acceleration for 3.5x speedup
* - Q8 quantization maintains 98% accuracy
*/
export class LocalEmbeddingManager {
constructor(options = {}) {
this.modelName = options.modelName || 'Xenova/all-mpnet-base-v2';
this.pipeline = null;
this.initPromise = null;
this.cacheManager = new EmbeddingCacheManager({
cacheDir: options.cacheDir || join(nodeProcess.cwd(), '.embedding-cache'),
maxMemoryEntries: options.maxCacheSize || 1000,
ttl: options.ttl || 7 * TIME_CONSTANTS.HOURS_24, // 7 days default
});
// Initialize high-performance vector operations
this.vectorOps = new VectorOperations({
useSimd: options.useSimd !== false,
useParallel: options.useParallel !== false,
similarityThreshold: options.similarityThreshold || 0.7,
});
// Get Rust performance info
this.rustInfo = getRustInfo();
// WebGPU configuration for better performance
this.pipelineConfig = {
quantized: true,
quantization: options.quantization || 'q8', // Q8 quantization for 3.5x speedup with 98% accuracy retention
device: options.device || 'webgpu', // Enable WebGPU acceleration when available
};
}
/**
* Initialize the embedding pipeline
* @returns {Promise<void>}
*/
async initialize() {
if (this.initPromise) {
return this.initPromise;
}
this.initPromise = this._initialize();
return this.initPromise;
}
async _initialize() {
// Initializing embedding model silently for production
// Initialize cache manager
await this.cacheManager.initialize();
// Initialize the feature extraction pipeline with enhanced configuration
this.pipeline = await pipeline('feature-extraction', this.modelName, this.pipelineConfig);
// Embedding model loaded successfully
}
/**
* Generate embedding for text content
* @param {string} content - Text to embed
* @param {string} filePath - File path for caching
* @returns {Promise<{vector: number[], cached: boolean}>}
*/
async generateEmbedding(content, filePath) {
if (!this.pipeline) {
await this.initialize();
}
// Generate content hash for caching
const contentHash = this.hashContent(content);
// Check cache
const cachedData = await this.cacheManager.getEmbedding(contentHash, filePath);
if (cachedData && cachedData.vector) {
return {
vector: cachedData.vector,
cached: true,
};
}
try {
// Generate embedding
const output = await this.pipeline(content, {
pooling: 'mean',
normalize: true,
});
// Convert to array
const vector = Array.from(output.data);
// Cache the result
await this.cacheManager.setEmbedding(contentHash, filePath, vector);
return {
vector,
cached: false,
};
} catch (error) {
// Error generating embedding - using fallback
// Return a fallback empty embedding
return {
vector: new Array(LIMITS.EMBEDDING_DIMENSION).fill(0),
cached: false,
error: error.message,
};
}
}
/**
* Generate embeddings for multiple texts
* @param {Array<{content: string, path: string}>} items - Items to embed
* @returns {Promise<Array<{path: string, vector: number[], cached: boolean}>>}
*/
async generateBatchEmbeddings(items) {
if (!this.pipeline) {
await this.initialize();
}
const results = [];
const uncachedItems = [];
// Check cache for each item
const cacheCheckItems = items.map((item) => ({
contentHash: this.hashContent(item.content),
filePath: item.path,
}));
const cachedResults = await this.cacheManager.getBatch(cacheCheckItems);
for (const item of items) {
const contentHash = this.hashContent(item.content);
const cacheKey = `${item.path}:${contentHash}`;
const cachedData = cachedResults.get(cacheKey);
if (cachedData && cachedData.vector) {
results.push({
path: item.path,
vector: cachedData.vector,
cached: true,
});
} else {
uncachedItems.push({ ...item, contentHash });
}
}
// Process uncached items
if (uncachedItems.length > 0) {
try {
// Batch process for efficiency
const contents = uncachedItems.map((item) => item.content);
const outputs = await this.pipeline(contents, {
pooling: 'mean',
normalize: true,
});
// Process results
for (let i = 0; i < uncachedItems.length; i++) {
const item = uncachedItems[i];
const start = i * LIMITS.EMBEDDING_DIMENSION;
const end = (i + 1) * LIMITS.EMBEDDING_DIMENSION;
const vector = Array.from(outputs.data.slice(start, end));
await this.cacheManager.setEmbedding(item.contentHash, item.path, vector);
results.push({
path: item.path,
vector,
cached: false,
});
}
} catch (error) {
// Error generating batch embeddings - using fallback
// Add fallback embeddings for failed items
for (const item of uncachedItems) {
results.push({
path: item.path,
vector: new Array(LIMITS.EMBEDDING_DIMENSION).fill(0),
cached: false,
error: error.message,
});
}
}
}
return results;
}
/**
* Calculate cosine similarity between two vectors
* @param {number[]} vecA - First vector
* @param {number[]} vecB - Second vector
* @returns {Promise<number>} Similarity score (0-1)
*/
async cosineSimilarity(vecA, vecB) {
try {
return await this.vectorOps.cosineSimilarity(vecA, vecB);
} catch {
// Fallback to original JavaScript implementation
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
normA += vecA[i] * vecA[i];
normB += vecB[i] * vecB[i];
}
normA = Math.sqrt(normA);
normB = Math.sqrt(normB);
if (normA === 0 || normB === 0) {
return 0;
}
return dotProduct / (normA * normB);
}
}
/**
* Find similar vectors from a collection
* @param {number[]} queryVector - Query vector
* @param {Array<{path: string, vector: number[]}>} vectors - Collection to search
* @param {number} topK - Number of results to return
* @param {number} threshold - Minimum similarity threshold
* @returns {Promise<Array<{path: string, similarity: number}>>}
*/
async findSimilar(
queryVector,
vectors,
topK = LIMITS.SIMILAR_FILES_DEFAULT,
threshold = LIMITS.SIMILARITY_THRESHOLD,
) {
try {
// Use high-performance Rust implementation
const vectorData = vectors.map((item) => item.vector);
const paths = vectors.map((item) => item.path);
const results = await this.vectorOps.findSimilarVectors(queryVector, vectorData, paths, topK);
// Filter by threshold and format results
return results
.filter((result) => result.similarity >= threshold)
.map((result) => ({
path: result.path,
similarity: result.similarity,
}));
} catch {
// Fallback to original JavaScript implementation
const similarities = [];
for (const item of vectors) {
try {
const similarity = await this.cosineSimilarity(queryVector, item.vector);
similarities.push({
path: item.path,
similarity,
});
} catch {
// Skip items that cause errors
continue;
}
}
return similarities
.filter((item) => item.similarity >= threshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, topK);
}
}
/**
* Hash content for caching
* @param {string} content - Content to hash
* @returns {string} SHA-256 hash
*/
hashContent(content) {
return crypto.createHash('sha256').update(content).digest('hex');
}
/**
* Clear the cache
*/
async clearCache() {
await this.cacheManager.clear(true);
}
/**
* Get cache statistics
* @returns {Object} Cache stats
*/
getCacheStats() {
return this.cacheManager.getStats();
}
/**
* Cleanup resources
*/
async cleanup() {
// Cache manager handles its own cleanup
this.pipeline = null;
}
}