Chrome MCP Server

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

semantic-similarity-engine.ts•84.5 kB

import { AutoTokenizer, env as TransformersEnv } from '@xenova/transformers'; import type { Tensor as TransformersTensor, PreTrainedTokenizer } from '@xenova/transformers'; import LRUCache from './lru-cache'; import { SIMDMathEngine } from './simd-math-engine'; import { OffscreenManager } from './offscreen-manager'; import { STORAGE_KEYS } from '@/common/constants'; import { OFFSCREEN_MESSAGE_TYPES } from '@/common/message-types'; import { ModelCacheManager } from './model-cache-manager'; /** * Get cached model data, prioritizing cache reads and handling redirected URLs. * @param {string} modelUrl Stable, permanent URL of the model * @returns {Promise<ArrayBuffer>} Model data as ArrayBuffer */ async function getCachedModelData(modelUrl: string): Promise<ArrayBuffer> { const cacheManager = ModelCacheManager.getInstance(); // 1. 尝试从缓存获取数据 const cachedData = await cacheManager.getCachedModelData(modelUrl); if (cachedData) { return cachedData; } console.log('Model not found in cache or expired. Fetching from network...'); try { // 2. 从网络获取数据 const response = await fetch(modelUrl); if (!response.ok) { throw new Error(`Failed to fetch model: ${response.status} ${response.statusText}`); } // 3. 获取数据并存储到缓存 const arrayBuffer = await response.arrayBuffer(); await cacheManager.storeModelData(modelUrl, arrayBuffer); console.log( `Model fetched from network and successfully cached (${(arrayBuffer.byteLength / 1024 / 1024).toFixed(2)}MB).`, ); return arrayBuffer; } catch (error) { console.error(`Error fetching or caching model:`, error); // 如果获取失败，清理可能不完整的缓存条目 await cacheManager.deleteCacheEntry(modelUrl); throw error; } } /** * Clear all model cache entries */ export async function clearModelCache(): Promise<void> { try { const cacheManager = ModelCacheManager.getInstance(); await cacheManager.clearAllCache(); } catch (error) { console.error('Failed to clear model cache:', error); throw error; } } /** * Get cache statistics */ export async function getCacheStats(): Promise<{ totalSize: number; totalSizeMB: number; entryCount: number; entries: Array<{ url: string; size: number; sizeMB: number; timestamp: number; age: string; expired: boolean; }>; }> { try { const cacheManager = ModelCacheManager.getInstance(); return await cacheManager.getCacheStats(); } catch (error) { console.error('Failed to get cache stats:', error); throw error; } } /** * Manually trigger cache cleanup */ export async function cleanupModelCache(): Promise<void> { try { const cacheManager = ModelCacheManager.getInstance(); await cacheManager.manualCleanup(); } catch (error) { console.error('Failed to cleanup cache:', error); throw error; } } /** * Check if the default model is cached and available * @returns Promise<boolean> True if default model is cached and valid */ export async function isDefaultModelCached(): Promise<boolean> { try { // Get the default model configuration const result = await chrome.storage.local.get([STORAGE_KEYS.SEMANTIC_MODEL]); const defaultModel = (result[STORAGE_KEYS.SEMANTIC_MODEL] as ModelPreset) || 'multilingual-e5-small'; // Build the model URL const modelInfo = PREDEFINED_MODELS[defaultModel]; const modelIdentifier = modelInfo.modelIdentifier; const onnxModelFile = 'model.onnx'; // Default ONNX file name const modelIdParts = modelIdentifier.split('/'); const modelNameForUrl = modelIdParts.length > 1 ? modelIdentifier : `Xenova/${modelIdentifier}`; const onnxModelUrl = `https://huggingface.co/${modelNameForUrl}/resolve/main/onnx/${onnxModelFile}`; // Check if this model is cached const cacheManager = ModelCacheManager.getInstance(); return await cacheManager.isModelCached(onnxModelUrl); } catch (error) { console.error('Error checking if default model is cached:', error); return false; } } /** * Check if any model cache exists (for conditional initialization) * @returns Promise<boolean> True if any valid model cache exists */ export async function hasAnyModelCache(): Promise<boolean> { try { const cacheManager = ModelCacheManager.getInstance(); return await cacheManager.hasAnyValidCache(); } catch (error) { console.error('Error checking for any model cache:', error); return false; } } // Predefined model configurations - 2025 curated recommended models, using quantized versions to reduce file size export const PREDEFINED_MODELS = { // Multilingual model - default recommendation 'multilingual-e5-small': { modelIdentifier: 'Xenova/multilingual-e5-small', dimension: 384, description: 'Multilingual E5 Small - Lightweight multilingual model supporting 100+ languages', language: 'multilingual', performance: 'excellent', size: '116MB', // Quantized version latency: '20ms', multilingualFeatures: { languageSupport: '100+', crossLanguageRetrieval: 'good', chineseEnglishMixed: 'good', }, modelSpecificConfig: { requiresTokenTypeIds: false, // E5 model doesn't require token_type_ids }, }, 'multilingual-e5-base': { modelIdentifier: 'Xenova/multilingual-e5-base', dimension: 768, description: 'Multilingual E5 base - Medium-scale multilingual model supporting 100+ languages', language: 'multilingual', performance: 'excellent', size: '279MB', // Quantized version latency: '30ms', multilingualFeatures: { languageSupport: '100+', crossLanguageRetrieval: 'excellent', chineseEnglishMixed: 'excellent', }, modelSpecificConfig: { requiresTokenTypeIds: false, // E5 model doesn't require token_type_ids }, }, } as const; export type ModelPreset = keyof typeof PREDEFINED_MODELS; /** * Get model information */ export function getModelInfo(preset: ModelPreset) { return PREDEFINED_MODELS[preset]; } /** * List all available models */ export function listAvailableModels() { return Object.entries(PREDEFINED_MODELS).map(([key, value]) => ({ preset: key as ModelPreset, ...value, })); } /** * Recommend model based on language - only uses multilingual-e5 series models */ export function recommendModelForLanguage( _language: 'en' | 'zh' | 'multilingual' = 'multilingual', scenario: 'speed' | 'balanced' | 'quality' = 'balanced', ): ModelPreset { // All languages use multilingual models if (scenario === 'quality') { return 'multilingual-e5-base'; // High quality choice } return 'multilingual-e5-small'; // Default lightweight choice } /** * Intelligently recommend model based on device performance and usage scenario - only uses multilingual-e5 series models */ export function recommendModelForDevice( _language: 'en' | 'zh' | 'multilingual' = 'multilingual', deviceMemory: number = 4, // GB networkSpeed: 'slow' | 'fast' = 'fast', prioritizeSpeed: boolean = false, ): ModelPreset { // Low memory devices or slow network, prioritize small models if (deviceMemory < 4 || networkSpeed === 'slow' || prioritizeSpeed) { return 'multilingual-e5-small'; // Lightweight choice } // High performance devices can use better models if (deviceMemory >= 8 && !prioritizeSpeed) { return 'multilingual-e5-base'; // High performance choice } // Default balanced choice return 'multilingual-e5-small'; } /** * Get model size information (only supports quantized version) */ export function getModelSizeInfo( preset: ModelPreset, _version: 'full' | 'quantized' | 'compressed' = 'quantized', ) { const model = PREDEFINED_MODELS[preset]; return { size: model.size, recommended: 'quantized', description: `${model.description} (Size: ${model.size})`, }; } /** * Compare performance and size of multiple models */ export function compareModels(presets: ModelPreset[]) { return presets.map((preset) => { const model = PREDEFINED_MODELS[preset]; return { preset, name: model.description.split(' - ')[0], language: model.language, performance: model.performance, dimension: model.dimension, latency: model.latency, size: model.size, features: (model as any).multilingualFeatures || {}, maxLength: (model as any).maxLength || 512, recommendedFor: getRecommendationContext(preset), }; }); } /** * Get recommended use cases for model */ function getRecommendationContext(preset: ModelPreset): string[] { const contexts: string[] = []; const model = PREDEFINED_MODELS[preset]; // All models are multilingual contexts.push('Multilingual document processing'); if (model.performance === 'excellent') contexts.push('High accuracy requirements'); if (model.latency.includes('20ms')) contexts.push('Fast response'); // Add scenarios based on model size const sizeInMB = parseInt(model.size.replace('MB', '')); if (sizeInMB < 300) { contexts.push('Mobile devices'); contexts.push('Lightweight deployment'); } if (preset === 'multilingual-e5-small') { contexts.push('Lightweight deployment'); } else if (preset === 'multilingual-e5-base') { contexts.push('High accuracy requirements'); } return contexts; } /** * Get ONNX model filename (only supports quantized version) */ export function getOnnxFileNameForVersion( _version: 'full' | 'quantized' | 'compressed' = 'quantized', ): string { // Only return quantized version filename return 'model_quantized.onnx'; } /** * Get model identifier (only supports quantized version) */ export function getModelIdentifierWithVersion( preset: ModelPreset, _version: 'full' | 'quantized' | 'compressed' = 'quantized', ): string { const model = PREDEFINED_MODELS[preset]; return model.modelIdentifier; } /** * Get size comparison of all available models */ export function getAllModelSizes() { const models = Object.entries(PREDEFINED_MODELS).map(([preset, config]) => { return { preset: preset as ModelPreset, name: config.description.split(' - ')[0], language: config.language, size: config.size, performance: config.performance, latency: config.latency, }; }); // Sort by size return models.sort((a, b) => { const sizeA = parseInt(a.size.replace('MB', '')); const sizeB = parseInt(b.size.replace('MB', '')); return sizeA - sizeB; }); } // Define necessary types interface ModelConfig { modelIdentifier: string; localModelPathPrefix?: string; // Base path for local models (relative to public) onnxModelFile?: string; // ONNX model filename maxLength?: number; cacheSize?: number; numThreads?: number; executionProviders?: string[]; useLocalFiles?: boolean; workerPath?: string; // Worker script path (relative to extension root) concurrentLimit?: number; // Worker task concurrency limit forceOffscreen?: boolean; // Force offscreen mode (for testing) modelPreset?: ModelPreset; // Predefined model selection dimension?: number; // Vector dimension (auto-obtained from preset model) modelVersion?: 'full' | 'quantized' | 'compressed'; // Model version selection requiresTokenTypeIds?: boolean; // Whether model requires token_type_ids input } interface WorkerMessagePayload { modelPath?: string; modelData?: ArrayBuffer; numThreads?: number; executionProviders?: string[]; input_ids?: number[]; attention_mask?: number[]; token_type_ids?: number[]; dims?: { input_ids: number[]; attention_mask: number[]; token_type_ids?: number[]; }; } interface WorkerResponsePayload { data?: Float32Array | number[]; // Tensor data as Float32Array or number array dims?: number[]; // Tensor dimensions message?: string; // For error or status messages } interface WorkerStats { inferenceTime?: number; totalInferences?: number; averageInferenceTime?: number; memoryAllocations?: number; batchSize?: number; } // Memory pool manager class EmbeddingMemoryPool { private pools: Map<number, Float32Array[]> = new Map(); private maxPoolSize: number = 10; private stats = { allocated: 0, reused: 0, released: 0 }; getEmbedding(size: number): Float32Array { const pool = this.pools.get(size); if (pool && pool.length > 0) { this.stats.reused++; return pool.pop()!; } this.stats.allocated++; return new Float32Array(size); } releaseEmbedding(embedding: Float32Array): void { const size = embedding.length; if (!this.pools.has(size)) { this.pools.set(size, []); } const pool = this.pools.get(size)!; if (pool.length < this.maxPoolSize) { // Clear array for reuse embedding.fill(0); pool.push(embedding); this.stats.released++; } } getStats() { return { ...this.stats }; } clear(): void { this.pools.clear(); this.stats = { allocated: 0, reused: 0, released: 0 }; } } interface PendingMessage { resolve: (value: WorkerResponsePayload | PromiseLike<WorkerResponsePayload>) => void; reject: (reason?: any) => void; type: string; } interface TokenizedOutput { // Simulates part of transformers.js tokenizer output input_ids: TransformersTensor; attention_mask: TransformersTensor; token_type_ids?: TransformersTensor; } /** * SemanticSimilarityEngine proxy class * Used by ContentIndexer and other components to reuse engine instance in offscreen, avoiding duplicate model downloads */ export class SemanticSimilarityEngineProxy { private _isInitialized = false; private config: Partial<ModelConfig>; private offscreenManager: OffscreenManager; private _isEnsuring = false; // Flag to prevent concurrent ensureOffscreenEngineInitialized calls constructor(config: Partial<ModelConfig> = {}) { this.config = config; this.offscreenManager = OffscreenManager.getInstance(); console.log('SemanticSimilarityEngineProxy: Proxy created with config:', { modelPreset: config.modelPreset, modelVersion: config.modelVersion, dimension: config.dimension, }); } async initialize(): Promise<void> { try { console.log('SemanticSimilarityEngineProxy: Starting proxy initialization...'); // Ensure offscreen document exists console.log('SemanticSimilarityEngineProxy: Ensuring offscreen document exists...'); await this.offscreenManager.ensureOffscreenDocument(); console.log('SemanticSimilarityEngineProxy: Offscreen document ready'); // Ensure engine in offscreen is initialized console.log('SemanticSimilarityEngineProxy: Ensuring offscreen engine is initialized...'); await this.ensureOffscreenEngineInitialized(); this._isInitialized = true; console.log( 'SemanticSimilarityEngineProxy: Proxy initialized, delegating to offscreen engine', ); } catch (error) { console.error('SemanticSimilarityEngineProxy: Initialization failed:', error); throw new Error( `Failed to initialize proxy: ${error instanceof Error ? error.message : 'Unknown error'}`, ); } } /** * Check engine status in offscreen */ private async checkOffscreenEngineStatus(): Promise<{ isInitialized: boolean; currentConfig: any; }> { try { const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_STATUS, }); if (response && response.success) { return { isInitialized: response.isInitialized || false, currentConfig: response.currentConfig || null, }; } } catch (error) { console.warn('SemanticSimilarityEngineProxy: Failed to check engine status:', error); } return { isInitialized: false, currentConfig: null }; } /** * Ensure engine in offscreen is initialized (with concurrency protection) */ private async ensureOffscreenEngineInitialized(): Promise<void> { // Prevent concurrent initialization attempts if (this._isEnsuring) { console.log('SemanticSimilarityEngineProxy: Already ensuring initialization, waiting...'); // Wait a bit and check again await new Promise((resolve) => setTimeout(resolve, 100)); return; } try { this._isEnsuring = true; const status = await this.checkOffscreenEngineStatus(); if (!status.isInitialized) { console.log( 'SemanticSimilarityEngineProxy: Engine not initialized in offscreen, initializing...', ); // Reinitialize engine const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_INIT, config: this.config, }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to initialize engine in offscreen document'); } console.log('SemanticSimilarityEngineProxy: Engine reinitialized successfully'); } } finally { this._isEnsuring = false; } } /** * Send message to offscreen document with retry mechanism and auto-reinitialization */ private async sendMessageToOffscreen(message: any, maxRetries: number = 3): Promise<any> { // 确保offscreen document存在 await this.offscreenManager.ensureOffscreenDocument(); let lastError: Error | null = null; for (let attempt = 1; attempt <= maxRetries; attempt++) { try { console.log( `SemanticSimilarityEngineProxy: Sending message (attempt ${attempt}/${maxRetries}):`, message.type, ); const response = await chrome.runtime.sendMessage(message); if (!response) { throw new Error('No response received from offscreen document'); } // If engine not initialized error received, try to reinitialize if (!response.success && response.error && response.error.includes('not initialized')) { console.log( 'SemanticSimilarityEngineProxy: Engine not initialized, attempting to reinitialize...', ); await this.ensureOffscreenEngineInitialized(); // Resend original message const retryResponse = await chrome.runtime.sendMessage(message); if (retryResponse && retryResponse.success) { return retryResponse; } } return response; } catch (error) { lastError = error as Error; console.warn( `SemanticSimilarityEngineProxy: Message failed (attempt ${attempt}/${maxRetries}):`, error, ); // If engine not initialized error, try to reinitialize if (error instanceof Error && error.message.includes('not initialized')) { try { console.log( 'SemanticSimilarityEngineProxy: Attempting to reinitialize engine due to error...', ); await this.ensureOffscreenEngineInitialized(); // Resend original message const retryResponse = await chrome.runtime.sendMessage(message); if (retryResponse && retryResponse.success) { return retryResponse; } } catch (reinitError) { console.warn( 'SemanticSimilarityEngineProxy: Failed to reinitialize engine:', reinitError, ); } } if (attempt < maxRetries) { // Wait before retry await new Promise((resolve) => setTimeout(resolve, 100 * attempt)); // Re-ensure offscreen document exists try { await this.offscreenManager.ensureOffscreenDocument(); } catch (offscreenError) { console.warn( 'SemanticSimilarityEngineProxy: Failed to ensure offscreen document:', offscreenError, ); } } } } throw new Error( `Failed to communicate with offscreen document after ${maxRetries} attempts. Last error: ${lastError?.message}`, ); } async getEmbedding(text: string, options: Record<string, any> = {}): Promise<Float32Array> { if (!this._isInitialized) { await this.initialize(); } // Check and ensure engine is initialized before each call await this.ensureOffscreenEngineInitialized(); const response = await this.sendMessageToOffscreen({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_COMPUTE, text: text, options: options, }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to get embedding from offscreen document'); } if (!response.embedding || !Array.isArray(response.embedding)) { throw new Error('Invalid embedding data received from offscreen document'); } return new Float32Array(response.embedding); } async getEmbeddingsBatch( texts: string[], options: Record<string, any> = {}, ): Promise<Float32Array[]> { if (!this._isInitialized) { await this.initialize(); } if (!texts || texts.length === 0) return []; // Check and ensure engine is initialized before each call await this.ensureOffscreenEngineInitialized(); const response = await this.sendMessageToOffscreen({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_BATCH_COMPUTE, texts: texts, options: options, }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to get embeddings batch from offscreen document'); } return response.embeddings.map((emb: number[]) => new Float32Array(emb)); } async computeSimilarity( text1: string, text2: string, options: Record<string, any> = {}, ): Promise<number> { const [embedding1, embedding2] = await this.getEmbeddingsBatch([text1, text2], options); return this.cosineSimilarity(embedding1, embedding2); } async computeSimilarityBatch( pairs: { text1: string; text2: string }[], options: Record<string, any> = {}, ): Promise<number[]> { if (!this._isInitialized) { await this.initialize(); } // Check and ensure engine is initialized before each call await this.ensureOffscreenEngineInitialized(); const response = await this.sendMessageToOffscreen({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_BATCH_COMPUTE, pairs: pairs, options: options, }); if (!response || !response.success) { throw new Error( response?.error || 'Failed to compute similarity batch from offscreen document', ); } return response.similarities; } private cosineSimilarity(a: Float32Array, b: Float32Array): number { if (a.length !== b.length) { throw new Error(`Vector dimensions don't match: ${a.length} vs ${b.length}`); } let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const magnitude = Math.sqrt(normA) * Math.sqrt(normB); return magnitude === 0 ? 0 : dotProduct / magnitude; } get isInitialized(): boolean { return this._isInitialized; } async dispose(): Promise<void> { // Proxy class doesn't need to clean up resources, actual resources are managed by offscreen this._isInitialized = false; console.log('SemanticSimilarityEngineProxy: Proxy disposed'); } } export class SemanticSimilarityEngine { private worker: Worker | null = null; private tokenizer: PreTrainedTokenizer | null = null; public isInitialized = false; private isInitializing = false; private initPromise: Promise<void> | null = null; private nextTokenId = 0; private pendingMessages = new Map<number, PendingMessage>(); private useOffscreen = false; // Whether to use offscreen mode public readonly config: Required<ModelConfig>; private embeddingCache: LRUCache<string, Float32Array>; // Added: tokenization cache private tokenizationCache: LRUCache<string, TokenizedOutput>; // Added: memory pool manager private memoryPool: EmbeddingMemoryPool; // Added: SIMD math engine private simdMath: SIMDMathEngine | null = null; private useSIMD = false; public cacheStats = { embedding: { hits: 0, misses: 0, size: 0 }, tokenization: { hits: 0, misses: 0, size: 0 }, }; public performanceStats = { totalEmbeddingComputations: 0, totalEmbeddingTime: 0, averageEmbeddingTime: 0, totalTokenizationTime: 0, averageTokenizationTime: 0, totalSimilarityComputations: 0, totalSimilarityTime: 0, averageSimilarityTime: 0, workerStats: null as WorkerStats | null, }; private runningWorkerTasks = 0; private workerTaskQueue: (() => void)[] = []; /** * Detect if current runtime environment supports Worker */ private isWorkerSupported(): boolean { try { // Check if in Service Worker environment (background script) if (typeof importScripts === 'function') { return false; } // Check if Worker constructor is available return typeof Worker !== 'undefined'; } catch { return false; } } /** * Detect if in offscreen document environment */ private isInOffscreenDocument(): boolean { try { // In offscreen document, window.location.pathname is usually '/offscreen.html' return ( typeof window !== 'undefined' && window.location && window.location.pathname.includes('offscreen') ); } catch { return false; } } /** * Ensure offscreen document exists */ private async ensureOffscreenDocument(): Promise<void> { return OffscreenManager.getInstance().ensureOffscreenDocument(); } // Helper function to safely convert tensor data to number array private convertTensorDataToNumbers(data: any): number[] { if (data instanceof BigInt64Array) { return Array.from(data, (val: bigint) => Number(val)); } else if (data instanceof Int32Array) { return Array.from(data); } else { return Array.from(data); } } constructor(options: Partial<ModelConfig> = {}) { console.log('SemanticSimilarityEngine: Constructor called with options:', { useLocalFiles: options.useLocalFiles, modelIdentifier: options.modelIdentifier, forceOffscreen: options.forceOffscreen, modelPreset: options.modelPreset, modelVersion: options.modelVersion, }); // Handle model presets let modelConfig = { ...options }; if (options.modelPreset && PREDEFINED_MODELS[options.modelPreset]) { const preset = PREDEFINED_MODELS[options.modelPreset]; const modelVersion = options.modelVersion || 'quantized'; // Default to quantized version const baseModelIdentifier = preset.modelIdentifier; // Use base identifier without version suffix const onnxFileName = getOnnxFileNameForVersion(modelVersion); // Get ONNX filename based on version // Get model-specific configuration const modelSpecificConfig = (preset as any).modelSpecificConfig || {}; modelConfig = { ...options, modelIdentifier: baseModelIdentifier, // Use base identifier onnxModelFile: onnxFileName, // Set corresponding version ONNX filename dimension: preset.dimension, modelVersion: modelVersion, requiresTokenTypeIds: modelSpecificConfig.requiresTokenTypeIds !== false, // Default to true unless explicitly set to false }; console.log( `SemanticSimilarityEngine: Using model preset "${options.modelPreset}" with version "${modelVersion}":`, preset, ); console.log(`SemanticSimilarityEngine: Base model identifier: ${baseModelIdentifier}`); console.log(`SemanticSimilarityEngine: ONNX file for version: ${onnxFileName}`); console.log( `SemanticSimilarityEngine: Requires token_type_ids: ${modelConfig.requiresTokenTypeIds}`, ); } // Set default configuration - using 2025 recommended default model this.config = { ...modelConfig, modelIdentifier: modelConfig.modelIdentifier || 'Xenova/bge-small-en-v1.5', localModelPathPrefix: modelConfig.localModelPathPrefix || 'models/', onnxModelFile: modelConfig.onnxModelFile || 'model.onnx', maxLength: modelConfig.maxLength || 256, cacheSize: modelConfig.cacheSize || 500, numThreads: modelConfig.numThreads || (typeof navigator !== 'undefined' && navigator.hardwareConcurrency ? Math.max(1, Math.floor(navigator.hardwareConcurrency / 2)) : 2), executionProviders: modelConfig.executionProviders || (typeof WebAssembly === 'object' && WebAssembly.validate(new Uint8Array([0, 97, 115, 109, 1, 0, 0, 0])) ? ['wasm'] : ['webgl']), useLocalFiles: (() => { console.log( 'SemanticSimilarityEngine: DEBUG - modelConfig.useLocalFiles:', modelConfig.useLocalFiles, ); console.log( 'SemanticSimilarityEngine: DEBUG - modelConfig.useLocalFiles !== undefined:', modelConfig.useLocalFiles !== undefined, ); const result = modelConfig.useLocalFiles !== undefined ? modelConfig.useLocalFiles : true; console.log('SemanticSimilarityEngine: DEBUG - final useLocalFiles value:', result); return result; })(), workerPath: modelConfig.workerPath || 'js/similarity.worker.js', // Will be overridden by WXT's `new URL` concurrentLimit: modelConfig.concurrentLimit || Math.max( 1, modelConfig.numThreads || (typeof navigator !== 'undefined' && navigator.hardwareConcurrency ? Math.max(1, Math.floor(navigator.hardwareConcurrency / 2)) : 2), ), forceOffscreen: modelConfig.forceOffscreen || false, modelPreset: modelConfig.modelPreset || 'bge-small-en-v1.5', dimension: modelConfig.dimension || 384, modelVersion: modelConfig.modelVersion || 'quantized', requiresTokenTypeIds: modelConfig.requiresTokenTypeIds !== false, // Default to true } as Required<ModelConfig>; console.log('SemanticSimilarityEngine: Final config:', { useLocalFiles: this.config.useLocalFiles, modelIdentifier: this.config.modelIdentifier, forceOffscreen: this.config.forceOffscreen, }); this.embeddingCache = new LRUCache<string, Float32Array>(this.config.cacheSize); this.tokenizationCache = new LRUCache<string, TokenizedOutput>( Math.min(this.config.cacheSize, 200), ); this.memoryPool = new EmbeddingMemoryPool(); this.simdMath = new SIMDMathEngine(); } private _sendMessageToWorker( type: string, payload?: WorkerMessagePayload, transferList?: Transferable[], ): Promise<WorkerResponsePayload> { return new Promise((resolve, reject) => { if (!this.worker) { reject(new Error('Worker is not initialized.')); return; } const id = this.nextTokenId++; this.pendingMessages.set(id, { resolve, reject, type }); // Use transferable objects if provided for zero-copy transfer if (transferList && transferList.length > 0) { this.worker.postMessage({ id, type, payload }, transferList); } else { this.worker.postMessage({ id, type, payload }); } }); } private _setupWorker(): void { console.log('SemanticSimilarityEngine: Setting up worker...'); // 方式1: Chrome extension URL (推荐，生产环境最可靠) try { const workerUrl = chrome.runtime.getURL('workers/similarity.worker.js'); console.log(`SemanticSimilarityEngine: Trying chrome.runtime.getURL ${workerUrl}`); this.worker = new Worker(workerUrl); console.log(`SemanticSimilarityEngine: Method 1 successful with path`); } catch (error) { console.warn('Method (chrome.runtime.getURL) failed:', error); } if (!this.worker) { throw new Error('Worker creation failed'); } this.worker.onmessage = ( event: MessageEvent<{ id: number; type: string; status: string; payload: WorkerResponsePayload; stats?: WorkerStats; }>, ) => { const { id, status, payload, stats } = event.data; const promiseCallbacks = this.pendingMessages.get(id); if (!promiseCallbacks) return; this.pendingMessages.delete(id); // 更新 Worker 统计信息 if (stats) { this.performanceStats.workerStats = stats; } if (status === 'success') { promiseCallbacks.resolve(payload); } else { const error = new Error( payload?.message || `Worker error for task ${promiseCallbacks.type}`, ); (error as any).name = (payload as any)?.name || 'WorkerError'; (error as any).stack = (payload as any)?.stack || undefined; console.error( `Error from worker (task ${id}, type ${promiseCallbacks.type}):`, error, event.data, ); promiseCallbacks.reject(error); } }; this.worker.onerror = (error: ErrorEvent) => { console.error('==== Unhandled error in SemanticSimilarityEngine Worker ===='); console.error('Event Message:', error.message); console.error('Event Filename:', error.filename); console.error('Event Lineno:', error.lineno); console.error('Event Colno:', error.colno); if (error.error) { // 检查 event.error 是否存在 console.error('Actual Error Name:', error.error.name); console.error('Actual Error Message:', error.error.message); console.error('Actual Error Stack:', error.error.stack); } else { console.error('Actual Error object (event.error) is not available. Error details:', { message: error.message, filename: error.filename, lineno: error.lineno, colno: error.colno, }); } console.error('=========================================================='); this.pendingMessages.forEach((callbacks) => { callbacks.reject(new Error(`Worker terminated or unhandled error: ${error.message}`)); }); this.pendingMessages.clear(); this.isInitialized = false; this.isInitializing = false; }; } public async initialize(): Promise<void> { if (this.isInitialized) return Promise.resolve(); if (this.isInitializing && this.initPromise) return this.initPromise; this.isInitializing = true; this.initPromise = this._doInitialize().finally(() => { this.isInitializing = false; // this.warmupModel(); }); return this.initPromise; } /** * 带进度回调的初始化方法 */ public async initializeWithProgress( onProgress?: (progress: { status: string; progress: number; message?: string }) => void, ): Promise<void> { if (this.isInitialized) return Promise.resolve(); if (this.isInitializing && this.initPromise) return this.initPromise; this.isInitializing = true; this.initPromise = this._doInitializeWithProgress(onProgress).finally(() => { this.isInitializing = false; // this.warmupModel(); }); return this.initPromise; } /** * 带进度回调的内部初始化方法 */ private async _doInitializeWithProgress( onProgress?: (progress: { status: string; progress: number; message?: string }) => void, ): Promise<void> { console.log('SemanticSimilarityEngine: Initializing with progress tracking...'); const startTime = performance.now(); // 进度报告辅助函数 const reportProgress = (status: string, progress: number, message?: string) => { if (onProgress) { onProgress({ status, progress, message }); } }; try { reportProgress('initializing', 5, 'Starting initialization...'); // 检测环境并决定使用哪种模式 const workerSupported = this.isWorkerSupported(); const inOffscreenDocument = this.isInOffscreenDocument(); // 🛠️ 防止死循环：如果已经在 offscreen document 中，强制使用直接 Worker 模式 if (inOffscreenDocument) { this.useOffscreen = false; console.log( 'SemanticSimilarityEngine: Running in offscreen document, using direct Worker mode to prevent recursion', ); } else { this.useOffscreen = this.config.forceOffscreen || !workerSupported; } console.log( `SemanticSimilarityEngine: Worker supported: ${workerSupported}, In offscreen: ${inOffscreenDocument}, Using offscreen: ${this.useOffscreen}`, ); reportProgress('initializing', 10, 'Environment detection complete'); if (this.useOffscreen) { // 使用offscreen模式 - 委托给offscreen document，它会处理自己的进度 reportProgress('initializing', 15, 'Setting up offscreen document...'); await this.ensureOffscreenDocument(); // 发送初始化消息到offscreen document console.log('SemanticSimilarityEngine: Sending config to offscreen:', { useLocalFiles: this.config.useLocalFiles, modelIdentifier: this.config.modelIdentifier, localModelPathPrefix: this.config.localModelPathPrefix, }); // 确保配置对象被正确序列化，显式设置所有属性 const configToSend = { modelIdentifier: this.config.modelIdentifier, localModelPathPrefix: this.config.localModelPathPrefix, onnxModelFile: this.config.onnxModelFile, maxLength: this.config.maxLength, cacheSize: this.config.cacheSize, numThreads: this.config.numThreads, executionProviders: this.config.executionProviders, useLocalFiles: Boolean(this.config.useLocalFiles), // 强制转换为布尔值 workerPath: this.config.workerPath, concurrentLimit: this.config.concurrentLimit, forceOffscreen: this.config.forceOffscreen, modelPreset: this.config.modelPreset, modelVersion: this.config.modelVersion, dimension: this.config.dimension, }; // 使用 JSON 序列化确保数据完整性 const serializedConfig = JSON.parse(JSON.stringify(configToSend)); reportProgress('initializing', 20, 'Delegating to offscreen document...'); const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_INIT, config: serializedConfig, }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to initialize engine in offscreen document'); } reportProgress('ready', 100, 'Initialized via offscreen document'); console.log('SemanticSimilarityEngine: Initialized via offscreen document'); } else { // 使用直接Worker模式 - 这里我们可以提供真实的进度跟踪 await this._initializeDirectWorkerWithProgress(reportProgress); } this.isInitialized = true; console.log( `SemanticSimilarityEngine: Initialization complete in ${(performance.now() - startTime).toFixed(2)}ms`, ); } catch (error) { console.error('SemanticSimilarityEngine: Initialization failed.', error); const errorMessage = error instanceof Error ? error.message : 'Unknown error'; reportProgress('error', 0, `Initialization failed: ${errorMessage}`); if (this.worker) this.worker.terminate(); this.worker = null; this.isInitialized = false; this.isInitializing = false; this.initPromise = null; // 创建一个更详细的错误对象 const enhancedError = new Error(errorMessage); enhancedError.name = 'ModelInitializationError'; throw enhancedError; } } private async _doInitialize(): Promise<void> { console.log('SemanticSimilarityEngine: Initializing...'); const startTime = performance.now(); try { // 检测环境并决定使用哪种模式 const workerSupported = this.isWorkerSupported(); const inOffscreenDocument = this.isInOffscreenDocument(); // 🛠️ 防止死循环：如果已经在 offscreen document 中，强制使用直接 Worker 模式 if (inOffscreenDocument) { this.useOffscreen = false; console.log( 'SemanticSimilarityEngine: Running in offscreen document, using direct Worker mode to prevent recursion', ); } else { this.useOffscreen = this.config.forceOffscreen || !workerSupported; } console.log( `SemanticSimilarityEngine: Worker supported: ${workerSupported}, In offscreen: ${inOffscreenDocument}, Using offscreen: ${this.useOffscreen}`, ); if (this.useOffscreen) { // 使用offscreen模式 await this.ensureOffscreenDocument(); // 发送初始化消息到offscreen document console.log('SemanticSimilarityEngine: Sending config to offscreen:', { useLocalFiles: this.config.useLocalFiles, modelIdentifier: this.config.modelIdentifier, localModelPathPrefix: this.config.localModelPathPrefix, }); // 确保配置对象被正确序列化，显式设置所有属性 const configToSend = { modelIdentifier: this.config.modelIdentifier, localModelPathPrefix: this.config.localModelPathPrefix, onnxModelFile: this.config.onnxModelFile, maxLength: this.config.maxLength, cacheSize: this.config.cacheSize, numThreads: this.config.numThreads, executionProviders: this.config.executionProviders, useLocalFiles: Boolean(this.config.useLocalFiles), // 强制转换为布尔值 workerPath: this.config.workerPath, concurrentLimit: this.config.concurrentLimit, forceOffscreen: this.config.forceOffscreen, modelPreset: this.config.modelPreset, modelVersion: this.config.modelVersion, dimension: this.config.dimension, }; console.log( 'SemanticSimilarityEngine: DEBUG - configToSend.useLocalFiles:', configToSend.useLocalFiles, ); console.log( 'SemanticSimilarityEngine: DEBUG - typeof configToSend.useLocalFiles:', typeof configToSend.useLocalFiles, ); console.log('SemanticSimilarityEngine: Explicit config to send:', configToSend); console.log( 'SemanticSimilarityEngine: DEBUG - this.config.useLocalFiles value:', this.config.useLocalFiles, ); console.log( 'SemanticSimilarityEngine: DEBUG - typeof this.config.useLocalFiles:', typeof this.config.useLocalFiles, ); // 使用 JSON 序列化确保数据完整性 const serializedConfig = JSON.parse(JSON.stringify(configToSend)); console.log( 'SemanticSimilarityEngine: DEBUG - serializedConfig.useLocalFiles:', serializedConfig.useLocalFiles, ); const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_INIT, config: serializedConfig, // 使用原始配置，不强制修改 useLocalFiles }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to initialize engine in offscreen document'); } console.log('SemanticSimilarityEngine: Initialized via offscreen document'); } else { // 使用直接Worker模式 this._setupWorker(); TransformersEnv.allowRemoteModels = !this.config.useLocalFiles; TransformersEnv.allowLocalModels = this.config.useLocalFiles; console.log(`SemanticSimilarityEngine: TransformersEnv config:`, { allowRemoteModels: TransformersEnv.allowRemoteModels, allowLocalModels: TransformersEnv.allowLocalModels, useLocalFiles: this.config.useLocalFiles, }); if (TransformersEnv.backends?.onnx?.wasm) { // 检查路径是否存在 TransformersEnv.backends.onnx.wasm.numThreads = this.config.numThreads; } let tokenizerIdentifier = this.config.modelIdentifier; if (this.config.useLocalFiles) { // 对于WXT，public目录下的资源在运行时位于根路径 // 直接使用模型标识符，transformers.js 会自动添加 /models/ 前缀 tokenizerIdentifier = this.config.modelIdentifier; } console.log( `SemanticSimilarityEngine: Loading tokenizer from ${tokenizerIdentifier} (local_files_only: ${this.config.useLocalFiles})`, ); const tokenizerConfig: any = { quantized: false, local_files_only: this.config.useLocalFiles, }; // 对于不需要token_type_ids的模型，在tokenizer配置中明确设置 if (!this.config.requiresTokenTypeIds) { tokenizerConfig.return_token_type_ids = false; } console.log(`SemanticSimilarityEngine: Full tokenizer config:`, { tokenizerIdentifier, localModelPathPrefix: this.config.localModelPathPrefix, modelIdentifier: this.config.modelIdentifier, useLocalFiles: this.config.useLocalFiles, local_files_only: this.config.useLocalFiles, requiresTokenTypeIds: this.config.requiresTokenTypeIds, tokenizerConfig, }); this.tokenizer = await AutoTokenizer.from_pretrained(tokenizerIdentifier, tokenizerConfig); console.log('SemanticSimilarityEngine: Tokenizer loaded.'); if (this.config.useLocalFiles) { // Local files mode - use URL path as before const onnxModelPathForWorker = chrome.runtime.getURL( `models/${this.config.modelIdentifier}/${this.config.onnxModelFile}`, ); console.log( `SemanticSimilarityEngine: Instructing worker to load local ONNX model from ${onnxModelPathForWorker}`, ); await this._sendMessageToWorker('init', { modelPath: onnxModelPathForWorker, numThreads: this.config.numThreads, executionProviders: this.config.executionProviders, }); } else { // Remote files mode - use cached model data const modelIdParts = this.config.modelIdentifier.split('/'); const modelNameForUrl = modelIdParts.length > 1 ? this.config.modelIdentifier : `Xenova/${this.config.modelIdentifier}`; const onnxModelUrl = `https://huggingface.co/${modelNameForUrl}/resolve/main/onnx/${this.config.onnxModelFile}`; if (!this.config.modelIdentifier.includes('/')) { console.warn( `Warning: modelIdentifier "${this.config.modelIdentifier}" might not be a full HuggingFace path. Assuming Xenova prefix for remote URL.`, ); } console.log(`SemanticSimilarityEngine: Getting cached model data from ${onnxModelUrl}`); // Get model data from cache (may download if not cached) const modelData = await getCachedModelData(onnxModelUrl); console.log( `SemanticSimilarityEngine: Sending cached model data to worker (${modelData.byteLength} bytes)`, ); // Send ArrayBuffer to worker with transferable objects for zero-copy await this._sendMessageToWorker( 'init', { modelData: modelData, numThreads: this.config.numThreads, executionProviders: this.config.executionProviders, }, [modelData], ); } console.log('SemanticSimilarityEngine: Worker reported model initialized.'); // 尝试初始化 SIMD 加速 try { console.log('SemanticSimilarityEngine: Checking SIMD support...'); const simdSupported = await SIMDMathEngine.checkSIMDSupport(); if (simdSupported) { console.log('SemanticSimilarityEngine: SIMD supported, initializing...'); await this.simdMath!.initialize(); this.useSIMD = true; console.log('SemanticSimilarityEngine: ✅ SIMD acceleration enabled'); } else { console.log( 'SemanticSimilarityEngine: ❌ SIMD not supported, using JavaScript fallback', ); console.log('SemanticSimilarityEngine: To enable SIMD, please use:'); console.log(' - Chrome 91+ (May 2021)'); console.log(' - Firefox 89+ (June 2021)'); console.log(' - Safari 16.4+ (March 2023)'); console.log(' - Edge 91+ (May 2021)'); this.useSIMD = false; } } catch (simdError) { console.warn( 'SemanticSimilarityEngine: SIMD initialization failed, using JavaScript fallback:', simdError, ); this.useSIMD = false; } } this.isInitialized = true; console.log( `SemanticSimilarityEngine: Initialization complete in ${(performance.now() - startTime).toFixed(2)}ms`, ); } catch (error) { console.error('SemanticSimilarityEngine: Initialization failed.', error); if (this.worker) this.worker.terminate(); this.worker = null; this.isInitialized = false; this.isInitializing = false; this.initPromise = null; // 创建一个更详细的错误对象 const errorMessage = error instanceof Error ? error.message : 'Unknown error'; const enhancedError = new Error(errorMessage); enhancedError.name = 'ModelInitializationError'; throw enhancedError; } } /** * 直接Worker模式的初始化，支持进度回调 */ private async _initializeDirectWorkerWithProgress( reportProgress: (status: string, progress: number, message?: string) => void, ): Promise<void> { // 使用直接Worker模式 reportProgress('initializing', 25, 'Setting up worker...'); this._setupWorker(); TransformersEnv.allowRemoteModels = !this.config.useLocalFiles; TransformersEnv.allowLocalModels = this.config.useLocalFiles; console.log(`SemanticSimilarityEngine: TransformersEnv config:`, { allowRemoteModels: TransformersEnv.allowRemoteModels, allowLocalModels: TransformersEnv.allowLocalModels, useLocalFiles: this.config.useLocalFiles, }); if (TransformersEnv.backends?.onnx?.wasm) { TransformersEnv.backends.onnx.wasm.numThreads = this.config.numThreads; } let tokenizerIdentifier = this.config.modelIdentifier; if (this.config.useLocalFiles) { tokenizerIdentifier = this.config.modelIdentifier; } reportProgress('downloading', 40, 'Loading tokenizer...'); console.log( `SemanticSimilarityEngine: Loading tokenizer from ${tokenizerIdentifier} (local_files_only: ${this.config.useLocalFiles})`, ); // 使用 transformers.js 2.17+ 的进度回调功能 const tokenizerProgressCallback = (progress: any) => { if (progress.status === 'downloading') { const progressPercent = Math.min(40 + (progress.progress || 0) * 0.3, 70); reportProgress( 'downloading', progressPercent, `Downloading tokenizer: ${progress.file || ''}`, ); } }; const tokenizerConfig: any = { quantized: false, local_files_only: this.config.useLocalFiles, }; // 对于不需要token_type_ids的模型，在tokenizer配置中明确设置 if (!this.config.requiresTokenTypeIds) { tokenizerConfig.return_token_type_ids = false; } try { if (!this.config.useLocalFiles) { tokenizerConfig.progress_callback = tokenizerProgressCallback; } this.tokenizer = await AutoTokenizer.from_pretrained(tokenizerIdentifier, tokenizerConfig); } catch (error) { // 如果进度回调不支持，回退到标准方式 console.log( 'SemanticSimilarityEngine: Progress callback not supported, using standard loading', ); delete tokenizerConfig.progress_callback; this.tokenizer = await AutoTokenizer.from_pretrained(tokenizerIdentifier, tokenizerConfig); } reportProgress('downloading', 70, 'Tokenizer loaded, setting up ONNX model...'); console.log('SemanticSimilarityEngine: Tokenizer loaded.'); if (this.config.useLocalFiles) { // Local files mode - use URL path as before const onnxModelPathForWorker = chrome.runtime.getURL( `models/${this.config.modelIdentifier}/${this.config.onnxModelFile}`, ); reportProgress('downloading', 80, 'Loading local ONNX model...'); console.log( `SemanticSimilarityEngine: Instructing worker to load local ONNX model from ${onnxModelPathForWorker}`, ); await this._sendMessageToWorker('init', { modelPath: onnxModelPathForWorker, numThreads: this.config.numThreads, executionProviders: this.config.executionProviders, }); } else { // Remote files mode - use cached model data const modelIdParts = this.config.modelIdentifier.split('/'); const modelNameForUrl = modelIdParts.length > 1 ? this.config.modelIdentifier : `Xenova/${this.config.modelIdentifier}`; const onnxModelUrl = `https://huggingface.co/${modelNameForUrl}/resolve/main/onnx/${this.config.onnxModelFile}`; if (!this.config.modelIdentifier.includes('/')) { console.warn( `Warning: modelIdentifier "${this.config.modelIdentifier}" might not be a full HuggingFace path. Assuming Xenova prefix for remote URL.`, ); } reportProgress('downloading', 80, 'Loading cached ONNX model...'); console.log(`SemanticSimilarityEngine: Getting cached model data from ${onnxModelUrl}`); // Get model data from cache (may download if not cached) const modelData = await getCachedModelData(onnxModelUrl); console.log( `SemanticSimilarityEngine: Sending cached model data to worker (${modelData.byteLength} bytes)`, ); // Send ArrayBuffer to worker with transferable objects for zero-copy await this._sendMessageToWorker( 'init', { modelData: modelData, numThreads: this.config.numThreads, executionProviders: this.config.executionProviders, }, [modelData], ); } console.log('SemanticSimilarityEngine: Worker reported model initialized.'); reportProgress('initializing', 90, 'Setting up SIMD acceleration...'); // 尝试初始化 SIMD 加速 try { console.log('SemanticSimilarityEngine: Checking SIMD support...'); const simdSupported = await SIMDMathEngine.checkSIMDSupport(); if (simdSupported) { console.log('SemanticSimilarityEngine: SIMD supported, initializing...'); await this.simdMath!.initialize(); this.useSIMD = true; console.log('SemanticSimilarityEngine: ✅ SIMD acceleration enabled'); } else { console.log('SemanticSimilarityEngine: ❌ SIMD not supported, using JavaScript fallback'); this.useSIMD = false; } } catch (simdError) { console.warn( 'SemanticSimilarityEngine: SIMD initialization failed, using JavaScript fallback:', simdError, ); this.useSIMD = false; } reportProgress('ready', 100, 'Initialization complete'); } public async warmupModel(): Promise<void> { if (!this.isInitialized && !this.isInitializing) { await this.initialize(); } else if (this.isInitializing && this.initPromise) { await this.initPromise; } if (!this.isInitialized) throw new Error('Engine not initialized after warmup attempt.'); console.log('SemanticSimilarityEngine: Warming up model...'); // 更有代表性的预热文本，包含不同长度和语言 const warmupTexts = [ // 短文本 'Hello', '你好', 'Test', // 中等长度文本 'Hello world, this is a test.', '你好世界，这是一个测试。', 'The quick brown fox jumps over the lazy dog.', // 长文本 'This is a longer text that contains multiple sentences. It helps warm up the model for various text lengths.', '这是一个包含多个句子的较长文本。它有助于为各种文本长度预热模型。', ]; try { // 渐进式预热：先单个，再批量 console.log('SemanticSimilarityEngine: Phase 1 - Individual warmup...'); for (const text of warmupTexts.slice(0, 4)) { await this.getEmbedding(text); } console.log('SemanticSimilarityEngine: Phase 2 - Batch warmup...'); await this.getEmbeddingsBatch(warmupTexts.slice(4)); // 保留预热结果，不清空缓存 console.log('SemanticSimilarityEngine: Model warmup complete. Cache preserved.'); console.log(`Embedding cache: ${this.cacheStats.embedding.size} items`); console.log(`Tokenization cache: ${this.cacheStats.tokenization.size} items`); } catch (error) { console.warn('SemanticSimilarityEngine: Warmup failed. This might not be critical.', error); } } private async _tokenizeText(text: string | string[]): Promise<TokenizedOutput> { if (!this.tokenizer) throw new Error('Tokenizer not initialized.'); // 对于单个文本，尝试使用缓存 if (typeof text === 'string') { const cacheKey = `tokenize:${text}`; const cached = this.tokenizationCache.get(cacheKey); if (cached) { this.cacheStats.tokenization.hits++; this.cacheStats.tokenization.size = this.tokenizationCache.size; return cached; } this.cacheStats.tokenization.misses++; const startTime = performance.now(); const tokenizerOptions: any = { padding: true, truncation: true, max_length: this.config.maxLength, return_tensors: 'np', }; // 对于不需要token_type_ids的模型，明确设置return_token_type_ids为false if (!this.config.requiresTokenTypeIds) { tokenizerOptions.return_token_type_ids = false; } const result = (await this.tokenizer(text, tokenizerOptions)) as TokenizedOutput; // 更新性能统计 this.performanceStats.totalTokenizationTime += performance.now() - startTime; this.performanceStats.averageTokenizationTime = this.performanceStats.totalTokenizationTime / (this.cacheStats.tokenization.hits + this.cacheStats.tokenization.misses); // 缓存结果 this.tokenizationCache.set(cacheKey, result); this.cacheStats.tokenization.size = this.tokenizationCache.size; return result; } // 对于批量文本，直接处理（批量处理通常不重复） const startTime = performance.now(); const tokenizerOptions: any = { padding: true, truncation: true, max_length: this.config.maxLength, return_tensors: 'np', }; // 对于不需要token_type_ids的模型，明确设置return_token_type_ids为false if (!this.config.requiresTokenTypeIds) { tokenizerOptions.return_token_type_ids = false; } const result = (await this.tokenizer(text, tokenizerOptions)) as TokenizedOutput; this.performanceStats.totalTokenizationTime += performance.now() - startTime; return result; } private _extractEmbeddingFromWorkerOutput( workerOutput: WorkerResponsePayload, attentionMaskArray: number[], ): Float32Array { if (!workerOutput.data || !workerOutput.dims) throw new Error('Invalid worker output for embedding extraction.'); // 优化：直接使用 Float32Array，避免不必要的转换 const lastHiddenStateData = workerOutput.data instanceof Float32Array ? workerOutput.data : new Float32Array(workerOutput.data); const dims = workerOutput.dims; const seqLength = dims[1]; const hiddenSize = dims[2]; // 使用内存池获取 embedding 数组 const embedding = this.memoryPool.getEmbedding(hiddenSize); let validTokens = 0; for (let i = 0; i < seqLength; i++) { if (attentionMaskArray[i] === 1) { const offset = i * hiddenSize; for (let j = 0; j < hiddenSize; j++) { embedding[j] += lastHiddenStateData[offset + j]; } validTokens++; } } if (validTokens > 0) { for (let i = 0; i < hiddenSize; i++) { embedding[i] /= validTokens; } } return this.normalizeVector(embedding); } private _extractBatchEmbeddingsFromWorkerOutput( workerOutput: WorkerResponsePayload, attentionMasksBatch: number[][], ): Float32Array[] { if (!workerOutput.data || !workerOutput.dims) throw new Error('Invalid worker output for batch embedding extraction.'); // 优化：直接使用 Float32Array，避免不必要的转换 const lastHiddenStateData = workerOutput.data instanceof Float32Array ? workerOutput.data : new Float32Array(workerOutput.data); const dims = workerOutput.dims; const batchSize = dims[0]; const seqLength = dims[1]; const hiddenSize = dims[2]; const embeddings: Float32Array[] = []; for (let b = 0; b < batchSize; b++) { // 使用内存池获取 embedding 数组 const embedding = this.memoryPool.getEmbedding(hiddenSize); let validTokens = 0; const currentAttentionMask = attentionMasksBatch[b]; for (let i = 0; i < seqLength; i++) { if (currentAttentionMask[i] === 1) { const offset = (b * seqLength + i) * hiddenSize; for (let j = 0; j < hiddenSize; j++) { embedding[j] += lastHiddenStateData[offset + j]; } validTokens++; } } if (validTokens > 0) { for (let i = 0; i < hiddenSize; i++) { embedding[i] /= validTokens; } } embeddings.push(this.normalizeVector(embedding)); } return embeddings; } public async getEmbedding( text: string, options: Record<string, any> = {}, ): Promise<Float32Array> { if (!this.isInitialized) await this.initialize(); const cacheKey = this.getCacheKey(text, options); const cached = this.embeddingCache.get(cacheKey); if (cached) { this.cacheStats.embedding.hits++; this.cacheStats.embedding.size = this.embeddingCache.size; return cached; } this.cacheStats.embedding.misses++; // 如果使用offscreen模式，委托给offscreen document if (this.useOffscreen) { const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_COMPUTE, text: text, options: options, }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to get embedding from offscreen document'); } // 验证响应数据 if (!response.embedding || !Array.isArray(response.embedding)) { throw new Error('Invalid embedding data received from offscreen document'); } console.log('SemanticSimilarityEngine: Received embedding from offscreen:', { length: response.embedding.length, type: typeof response.embedding, isArray: Array.isArray(response.embedding), firstFewValues: response.embedding.slice(0, 5), }); const embedding = new Float32Array(response.embedding); // 验证转换后的数据 console.log('SemanticSimilarityEngine: Converted embedding:', { length: embedding.length, type: typeof embedding, constructor: embedding.constructor.name, isFloat32Array: embedding instanceof Float32Array, firstFewValues: Array.from(embedding.slice(0, 5)), }); this.embeddingCache.set(cacheKey, embedding); this.cacheStats.embedding.size = this.embeddingCache.size; // 更新性能统计 this.performanceStats.totalEmbeddingComputations++; return embedding; } if (this.runningWorkerTasks >= this.config.concurrentLimit) { await this.waitForWorkerSlot(); } this.runningWorkerTasks++; const startTime = performance.now(); try { const tokenized = await this._tokenizeText(text); const inputIdsData = this.convertTensorDataToNumbers(tokenized.input_ids.data); const attentionMaskData = this.convertTensorDataToNumbers(tokenized.attention_mask.data); const tokenTypeIdsData = tokenized.token_type_ids ? this.convertTensorDataToNumbers(tokenized.token_type_ids.data) : undefined; const workerPayload: WorkerMessagePayload = { input_ids: inputIdsData, attention_mask: attentionMaskData, token_type_ids: tokenTypeIdsData, dims: { input_ids: tokenized.input_ids.dims, attention_mask: tokenized.attention_mask.dims, token_type_ids: tokenized.token_type_ids?.dims, }, }; const workerOutput = await this._sendMessageToWorker('infer', workerPayload); const embedding = this._extractEmbeddingFromWorkerOutput(workerOutput, attentionMaskData); this.embeddingCache.set(cacheKey, embedding); this.cacheStats.embedding.size = this.embeddingCache.size; this.performanceStats.totalEmbeddingComputations++; this.performanceStats.totalEmbeddingTime += performance.now() - startTime; this.performanceStats.averageEmbeddingTime = this.performanceStats.totalEmbeddingTime / this.performanceStats.totalEmbeddingComputations; return embedding; } finally { this.runningWorkerTasks--; this.processWorkerQueue(); } } public async getEmbeddingsBatch( texts: string[], options: Record<string, any> = {}, ): Promise<Float32Array[]> { if (!this.isInitialized) await this.initialize(); if (!texts || texts.length === 0) return []; // 如果使用offscreen模式，委托给offscreen document if (this.useOffscreen) { // 先检查缓存 const results: (Float32Array | undefined)[] = new Array(texts.length).fill(undefined); const uncachedTexts: string[] = []; const uncachedIndices: number[] = []; texts.forEach((text, index) => { const cacheKey = this.getCacheKey(text, options); const cached = this.embeddingCache.get(cacheKey); if (cached) { results[index] = cached; this.cacheStats.embedding.hits++; } else { uncachedTexts.push(text); uncachedIndices.push(index); this.cacheStats.embedding.misses++; } }); // 如果所有都在缓存中，直接返回 if (uncachedTexts.length === 0) { return results as Float32Array[]; } // 只请求未缓存的文本 const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_BATCH_COMPUTE, texts: uncachedTexts, options: options, }); if (!response || !response.success) { throw new Error( response?.error || 'Failed to get embeddings batch from offscreen document', ); } // 将结果放回对应位置并缓存 response.embeddings.forEach((embeddingArray: number[], batchIndex: number) => { const embedding = new Float32Array(embeddingArray); const originalIndex = uncachedIndices[batchIndex]; const originalText = uncachedTexts[batchIndex]; results[originalIndex] = embedding; // 缓存结果 const cacheKey = this.getCacheKey(originalText, options); this.embeddingCache.set(cacheKey, embedding); }); this.cacheStats.embedding.size = this.embeddingCache.size; this.performanceStats.totalEmbeddingComputations += uncachedTexts.length; return results as Float32Array[]; } const results: (Float32Array | undefined)[] = new Array(texts.length).fill(undefined); const uncachedTextsMap = new Map<string, number[]>(); const textsToTokenize: string[] = []; texts.forEach((text, index) => { const cacheKey = this.getCacheKey(text, options); const cached = this.embeddingCache.get(cacheKey); if (cached) { results[index] = cached; this.cacheStats.embedding.hits++; } else { if (!uncachedTextsMap.has(text)) { uncachedTextsMap.set(text, []); textsToTokenize.push(text); } uncachedTextsMap.get(text)!.push(index); this.cacheStats.embedding.misses++; } }); this.cacheStats.embedding.size = this.embeddingCache.size; if (textsToTokenize.length === 0) return results as Float32Array[]; if (this.runningWorkerTasks >= this.config.concurrentLimit) { await this.waitForWorkerSlot(); } this.runningWorkerTasks++; const startTime = performance.now(); try { const tokenizedBatch = await this._tokenizeText(textsToTokenize); const workerPayload: WorkerMessagePayload = { input_ids: this.convertTensorDataToNumbers(tokenizedBatch.input_ids.data), attention_mask: this.convertTensorDataToNumbers(tokenizedBatch.attention_mask.data), token_type_ids: tokenizedBatch.token_type_ids ? this.convertTensorDataToNumbers(tokenizedBatch.token_type_ids.data) : undefined, dims: { input_ids: tokenizedBatch.input_ids.dims, attention_mask: tokenizedBatch.attention_mask.dims, token_type_ids: tokenizedBatch.token_type_ids?.dims, }, }; // 使用真正的批处理推理 const workerOutput = await this._sendMessageToWorker('batchInfer', workerPayload); const attentionMasksForBatch: number[][] = []; const batchSize = tokenizedBatch.input_ids.dims[0]; const seqLength = tokenizedBatch.input_ids.dims[1]; const rawAttentionMaskData = this.convertTensorDataToNumbers( tokenizedBatch.attention_mask.data, ); for (let i = 0; i < batchSize; ++i) { attentionMasksForBatch.push(rawAttentionMaskData.slice(i * seqLength, (i + 1) * seqLength)); } const batchEmbeddings = this._extractBatchEmbeddingsFromWorkerOutput( workerOutput, attentionMasksForBatch, ); batchEmbeddings.forEach((embedding, batchIdx) => { const originalText = textsToTokenize[batchIdx]; const cacheKey = this.getCacheKey(originalText, options); this.embeddingCache.set(cacheKey, embedding); const originalResultIndices = uncachedTextsMap.get(originalText)!; originalResultIndices.forEach((idx) => { results[idx] = embedding; }); }); this.cacheStats.embedding.size = this.embeddingCache.size; this.performanceStats.totalEmbeddingComputations += textsToTokenize.length; this.performanceStats.totalEmbeddingTime += performance.now() - startTime; this.performanceStats.averageEmbeddingTime = this.performanceStats.totalEmbeddingTime / this.performanceStats.totalEmbeddingComputations; return results as Float32Array[]; } finally { this.runningWorkerTasks--; this.processWorkerQueue(); } } public async computeSimilarity( text1: string, text2: string, options: Record<string, any> = {}, ): Promise<number> { if (!this.isInitialized) await this.initialize(); this.validateInput(text1, text2); const simStartTime = performance.now(); const [embedding1, embedding2] = await Promise.all([ this.getEmbedding(text1, options), this.getEmbedding(text2, options), ]); const similarity = this.cosineSimilarity(embedding1, embedding2); console.log('computeSimilarity:', similarity); this.performanceStats.totalSimilarityComputations++; this.performanceStats.totalSimilarityTime += performance.now() - simStartTime; this.performanceStats.averageSimilarityTime = this.performanceStats.totalSimilarityTime / this.performanceStats.totalSimilarityComputations; return similarity; } public async computeSimilarityBatch( pairs: { text1: string; text2: string }[], options: Record<string, any> = {}, ): Promise<number[]> { if (!this.isInitialized) await this.initialize(); if (!pairs || pairs.length === 0) return []; // 如果使用offscreen模式，委托给offscreen document if (this.useOffscreen) { const response = await chrome.runtime.sendMessage({ target: 'offscreen', type: OFFSCREEN_MESSAGE_TYPES.SIMILARITY_ENGINE_BATCH_COMPUTE, pairs: pairs, options: options, }); if (!response || !response.success) { throw new Error(response?.error || 'Failed to compute similarities in offscreen document'); } return response.similarities; } // 直接模式的原有逻辑 const simStartTime = performance.now(); const uniqueTextsSet = new Set<string>(); pairs.forEach((pair) => { this.validateInput(pair.text1, pair.text2); uniqueTextsSet.add(pair.text1); uniqueTextsSet.add(pair.text2); }); const uniqueTextsArray = Array.from(uniqueTextsSet); const embeddingsArray = await this.getEmbeddingsBatch(uniqueTextsArray, options); const embeddingMap = new Map<string, Float32Array>(); uniqueTextsArray.forEach((text, index) => { embeddingMap.set(text, embeddingsArray[index]); }); const similarities = pairs.map((pair) => { const emb1 = embeddingMap.get(pair.text1); const emb2 = embeddingMap.get(pair.text2); if (!emb1 || !emb2) { console.warn('Embeddings not found for pair:', pair); return 0; } return this.cosineSimilarity(emb1, emb2); }); this.performanceStats.totalSimilarityComputations += pairs.length; this.performanceStats.totalSimilarityTime += performance.now() - simStartTime; this.performanceStats.averageSimilarityTime = this.performanceStats.totalSimilarityTime / this.performanceStats.totalSimilarityComputations; return similarities; } public async computeSimilarityMatrix( texts1: string[], texts2: string[], options: Record<string, any> = {}, ): Promise<number[][]> { if (!this.isInitialized) await this.initialize(); if (!texts1 || !texts2 || texts1.length === 0 || texts2.length === 0) return []; const simStartTime = performance.now(); const allTextsSet = new Set<string>([...texts1, ...texts2]); texts1.forEach((t) => this.validateInput(t, 'valid_dummy')); texts2.forEach((t) => this.validateInput(t, 'valid_dummy')); const allTextsArray = Array.from(allTextsSet); const embeddingsArray = await this.getEmbeddingsBatch(allTextsArray, options); const embeddingMap = new Map<string, Float32Array>(); allTextsArray.forEach((text, index) => { embeddingMap.set(text, embeddingsArray[index]); }); // 使用 SIMD 优化的矩阵计算（如果可用） if (this.useSIMD && this.simdMath) { try { const embeddings1 = texts1.map((text) => embeddingMap.get(text)!).filter(Boolean); const embeddings2 = texts2.map((text) => embeddingMap.get(text)!).filter(Boolean); if (embeddings1.length === texts1.length && embeddings2.length === texts2.length) { const matrix = await this.simdMath.similarityMatrix(embeddings1, embeddings2); this.performanceStats.totalSimilarityComputations += texts1.length * texts2.length; this.performanceStats.totalSimilarityTime += performance.now() - simStartTime; this.performanceStats.averageSimilarityTime = this.performanceStats.totalSimilarityTime / this.performanceStats.totalSimilarityComputations; return matrix; } } catch (error) { console.warn('SIMD matrix computation failed, falling back to JavaScript:', error); } } // JavaScript 回退版本 const matrix: number[][] = []; for (const textA of texts1) { const row: number[] = []; const embA = embeddingMap.get(textA); if (!embA) { console.warn(`Embedding not found for text1: "${textA}"`); texts2.forEach(() => row.push(0)); matrix.push(row); continue; } for (const textB of texts2) { const embB = embeddingMap.get(textB); if (!embB) { console.warn(`Embedding not found for text2: "${textB}"`); row.push(0); continue; } row.push(this.cosineSimilarity(embA, embB)); } matrix.push(row); } this.performanceStats.totalSimilarityComputations += texts1.length * texts2.length; this.performanceStats.totalSimilarityTime += performance.now() - simStartTime; this.performanceStats.averageSimilarityTime = this.performanceStats.totalSimilarityTime / this.performanceStats.totalSimilarityComputations; return matrix; } public cosineSimilarity(vecA: Float32Array, vecB: Float32Array): number { if (!vecA || !vecB || vecA.length !== vecB.length) { console.warn('Cosine similarity: Invalid vectors provided.', vecA, vecB); return 0; } // 使用 SIMD 优化版本（如果可用） if (this.useSIMD && this.simdMath) { try { // SIMD 版本是异步的，但为了保持接口兼容性，我们需要同步版本 // 这里我们回退到 JavaScript 版本，或者可以考虑重构为异步 return this.cosineSimilarityJS(vecA, vecB); } catch (error) { console.warn('SIMD cosine similarity failed, falling back to JavaScript:', error); return this.cosineSimilarityJS(vecA, vecB); } } return this.cosineSimilarityJS(vecA, vecB); } private cosineSimilarityJS(vecA: Float32Array, vecB: Float32Array): number { let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < vecA.length; i++) { dotProduct += vecA[i] * vecB[i]; normA += vecA[i] * vecA[i]; normB += vecB[i] * vecB[i]; } const magnitude = Math.sqrt(normA) * Math.sqrt(normB); return magnitude === 0 ? 0 : dotProduct / magnitude; } // 新增：异步 SIMD 优化的余弦相似度 public async cosineSimilaritySIMD(vecA: Float32Array, vecB: Float32Array): Promise<number> { if (!vecA || !vecB || vecA.length !== vecB.length) { console.warn('Cosine similarity: Invalid vectors provided.', vecA, vecB); return 0; } if (this.useSIMD && this.simdMath) { try { return await this.simdMath.cosineSimilarity(vecA, vecB); } catch (error) { console.warn('SIMD cosine similarity failed, falling back to JavaScript:', error); } } return this.cosineSimilarityJS(vecA, vecB); } public normalizeVector(vector: Float32Array): Float32Array { let norm = 0; for (let i = 0; i < vector.length; i++) norm += vector[i] * vector[i]; norm = Math.sqrt(norm); if (norm === 0) return vector; const normalized = new Float32Array(vector.length); for (let i = 0; i < vector.length; i++) normalized[i] = vector[i] / norm; return normalized; } public validateInput(text1: string, text2: string | 'valid_dummy'): void { if (typeof text1 !== 'string' || (text2 !== 'valid_dummy' && typeof text2 !== 'string')) { throw new Error('输入必须是字符串'); } if (text1.trim().length === 0 || (text2 !== 'valid_dummy' && text2.trim().length === 0)) { throw new Error('输入文本不能为空'); } const roughCharLimit = this.config.maxLength * 5; if ( text1.length > roughCharLimit || (text2 !== 'valid_dummy' && text2.length > roughCharLimit) ) { console.warn('输入文本可能过长，将由分词器截断。'); } } private getCacheKey(text: string, _options: Record<string, any> = {}): string { return text; // Options currently not used to vary embedding, simplify key } public getPerformanceStats(): Record<string, any> { return { ...this.performanceStats, cacheStats: { ...this.cacheStats, embedding: { ...this.cacheStats.embedding, hitRate: this.cacheStats.embedding.hits + this.cacheStats.embedding.misses > 0 ? this.cacheStats.embedding.hits / (this.cacheStats.embedding.hits + this.cacheStats.embedding.misses) : 0, }, tokenization: { ...this.cacheStats.tokenization, hitRate: this.cacheStats.tokenization.hits + this.cacheStats.tokenization.misses > 0 ? this.cacheStats.tokenization.hits / (this.cacheStats.tokenization.hits + this.cacheStats.tokenization.misses) : 0, }, }, memoryPool: this.memoryPool.getStats(), memoryUsage: this.getMemoryUsage(), isInitialized: this.isInitialized, isInitializing: this.isInitializing, config: this.config, pendingWorkerTasks: this.workerTaskQueue.length, runningWorkerTasks: this.runningWorkerTasks, }; } private async waitForWorkerSlot(): Promise<void> { return new Promise((resolve) => { this.workerTaskQueue.push(resolve); }); } private processWorkerQueue(): void { if (this.workerTaskQueue.length > 0 && this.runningWorkerTasks < this.config.concurrentLimit) { const resolve = this.workerTaskQueue.shift(); if (resolve) resolve(); } } // 新增：获取 Worker 统计信息 public async getWorkerStats(): Promise<WorkerStats | null> { if (!this.worker || !this.isInitialized) return null; try { const response = await this._sendMessageToWorker('getStats'); return response as WorkerStats; } catch (error) { console.warn('Failed to get worker stats:', error); return null; } } // 新增：清理 Worker 缓冲区 public async clearWorkerBuffers(): Promise<void> { if (!this.worker || !this.isInitialized) return; try { await this._sendMessageToWorker('clearBuffers'); console.log('SemanticSimilarityEngine: Worker buffers cleared.'); } catch (error) { console.warn('Failed to clear worker buffers:', error); } } // 新增：清理所有缓存 public clearAllCaches(): void { this.embeddingCache.clear(); this.tokenizationCache.clear(); this.cacheStats = { embedding: { hits: 0, misses: 0, size: 0 }, tokenization: { hits: 0, misses: 0, size: 0 }, }; console.log('SemanticSimilarityEngine: All caches cleared.'); } // 新增：获取内存使用情况 public getMemoryUsage(): { embeddingCacheUsage: number; tokenizationCacheUsage: number; totalCacheUsage: number; } { const embeddingStats = this.embeddingCache.getStats(); const tokenizationStats = this.tokenizationCache.getStats(); return { embeddingCacheUsage: embeddingStats.usage, tokenizationCacheUsage: tokenizationStats.usage, totalCacheUsage: (embeddingStats.usage + tokenizationStats.usage) / 2, }; } public async dispose(): Promise<void> { console.log('SemanticSimilarityEngine: Disposing...'); // 清理 Worker 缓冲区 await this.clearWorkerBuffers(); if (this.worker) { this.worker.terminate(); this.worker = null; } // 清理 SIMD 引擎 if (this.simdMath) { this.simdMath.dispose(); this.simdMath = null; } this.tokenizer = null; this.embeddingCache.clear(); this.tokenizationCache.clear(); this.memoryPool.clear(); this.pendingMessages.clear(); this.workerTaskQueue = []; this.isInitialized = false; this.isInitializing = false; this.initPromise = null; this.useSIMD = false; console.log('SemanticSimilarityEngine: Disposed.'); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/marie6789040106650/mcp-chrome-bk'

If you have feedback or need assistance with the MCP directory API, please join our Discord server