import crypto from 'crypto';
import { CONFIG } from '../config.js';
import { sendLogMessage } from '../utils/logger.js';
// ============================================
// Embedding Cache (LRU)
// ============================================
const embeddingCache = new Map();
const CACHE_MAX_SIZE = 1000;
let cacheHits = 0;
let cacheMisses = 0;
function getTextHash(text) {
return crypto.createHash('md5').update(text).digest('hex');
}
function getCacheStats() {
return {
size: embeddingCache.size,
maxSize: CACHE_MAX_SIZE,
hits: cacheHits,
misses: cacheMisses,
hitRate: cacheHits + cacheMisses > 0
? (cacheHits / (cacheHits + cacheMisses) * 100).toFixed(2) + '%'
: '0%'
};
}
// Export for monitoring
export { getCacheStats };
// ============================================
// Helper Functions
// ============================================
// Helper function to prepare content for embedding
export function prepareContentForEmbedding(content) {
if (typeof content === 'string') return content;
return JSON.stringify(content);
}
// Internal function to call Aliyun API (uncached)
async function callEmbeddingAPI(text) {
const response = await fetch(CONFIG.embeddings.apiUrl, {
method: 'POST',
headers: {
'Authorization': `Bearer ${CONFIG.embeddings.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: CONFIG.embeddings.model,
input: {
texts: [text]
},
parameters: {
dimension: CONFIG.embeddings.dimensions,
text_type: "document"
}
})
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Aliyun API error: ${response.status} - ${errorText}`);
}
const result = await response.json();
if (result.output && result.output.embeddings && result.output.embeddings[0]) {
return result.output.embeddings[0].embedding;
} else {
throw new Error('Invalid response from Aliyun API: ' + JSON.stringify(result));
}
}
// ============================================
// Public API with Caching
// ============================================
// Generate embedding with LRU cache
export async function generateEmbedding(text) {
const hash = getTextHash(text);
// Check cache first
if (embeddingCache.has(hash)) {
cacheHits++;
sendLogMessage('debug', 'Embedding cache hit', {
textLength: text.length,
cacheStats: getCacheStats()
});
return embeddingCache.get(hash);
}
// Cache miss - call API
cacheMisses++;
try {
const embedding = await callEmbeddingAPI(text);
// LRU eviction: remove oldest entry if at capacity
if (embeddingCache.size >= CACHE_MAX_SIZE) {
const firstKey = embeddingCache.keys().next().value;
embeddingCache.delete(firstKey);
sendLogMessage('debug', 'Embedding cache eviction', { evictedHash: firstKey });
}
embeddingCache.set(hash, embedding);
sendLogMessage('debug', 'Embedding generated and cached', {
textLength: text.length,
dimensions: CONFIG.embeddings.dimensions,
cacheStats: getCacheStats()
});
return embedding;
} catch (error) {
sendLogMessage('error', 'Failed to generate embedding', { error: error.message });
throw error;
}
}
// Helper function to generate embeddings for multiple texts (batch)
export async function generateEmbeddings(texts) {
try {
const response = await fetch(CONFIG.embeddings.apiUrl, {
method: 'POST',
headers: {
'Authorization': `Bearer ${CONFIG.embeddings.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: CONFIG.embeddings.model,
input: {
texts: texts
},
parameters: {
dimension: CONFIG.embeddings.dimensions,
text_type: "document"
}
})
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Aliyun API error: ${response.status} - ${errorText}`);
}
const result = await response.json();
if (result.output && result.output.embeddings) {
sendLogMessage('debug', 'Generated batch embeddings via Aliyun API', {
count: texts.length,
dimensions: CONFIG.embeddings.dimensions
});
return result.output.embeddings.map(e => e.embedding);
} else {
throw new Error('Invalid response from Aliyun API: ' + JSON.stringify(result));
}
} catch (error) {
sendLogMessage('error', 'Failed to generate batch embeddings', { error: error.message });
throw error;
}
}