import type { FeatureExtractionPipeline } from "@xenova/transformers";
let embedder: FeatureExtractionPipeline | null = null;
export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
if (!embedder) {
const { pipeline } = await import("@xenova/transformers");
embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
}
return embedder!;
}
export async function embed(text: string): Promise<number[]> {
const model = await getEmbedder();
const output = await model(text, { pooling: "mean", normalize: true });
return Array.from(output.data as Iterable<number>);
}
export async function embedBatch(texts: string[]): Promise<number[][]> {
const model = await getEmbedder();
const results: number[][] = [];
for (const text of texts) {
const output = await model(text, { pooling: "mean", normalize: true });
results.push(Array.from(output.data as Iterable<number>));
}
return results;
}
export function cosineSimilarity(a: number[], b: number[]): number {
let dot = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i += 1) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}