/**
* Embedding Service
*
* Generates vector embeddings using OpenAI's text-embedding-ada-002 model.
* Used for semantic similarity search of RFPs and historical quotes.
*/
import OpenAI from 'openai';
export class EmbeddingService {
private client: OpenAI;
private static instance: EmbeddingService;
private enabled: boolean;
private constructor() {
const apiKey = process.env.OPENAI_API_KEY;
this.enabled = !!(apiKey && apiKey !== 'sk-your-openai-api-key-here');
if (this.enabled) {
this.client = new OpenAI({ apiKey });
console.log('✓ OpenAI Embedding Service initialized');
} else {
console.log('⚠ OpenAI Embedding Service disabled (no API key)');
}
}
public static getInstance(): EmbeddingService {
if (!EmbeddingService.instance) {
EmbeddingService.instance = new EmbeddingService();
}
return EmbeddingService.instance;
}
public isEnabled(): boolean {
return this.enabled;
}
/**
* Generate embedding for a text string
* Returns a 1536-dimensional vector
*/
async generateEmbedding(text: string): Promise<number[]> {
if (!this.enabled) {
throw new Error('Embedding service is not enabled. Please set OPENAI_API_KEY.');
}
try {
const response = await this.client.embeddings.create({
model: 'text-embedding-ada-002',
input: text.substring(0, 8000) // API limit is 8191 tokens, ~8000 chars to be safe
});
return response.data[0].embedding;
} catch (error: any) {
console.error('Error generating embedding:', error.message);
throw new Error(`Failed to generate embedding: ${error.message}`);
}
}
/**
* Generate embeddings for multiple texts in batch
* More efficient than calling generateEmbedding multiple times
*/
async generateEmbeddingsBatch(texts: string[]): Promise<number[][]> {
if (!this.enabled) {
throw new Error('Embedding service is not enabled. Please set OPENAI_API_KEY.');
}
if (texts.length === 0) {
return [];
}
try {
// Truncate long texts
const truncatedTexts = texts.map(t => t.substring(0, 8000));
const response = await this.client.embeddings.create({
model: 'text-embedding-ada-002',
input: truncatedTexts
});
return response.data.map(d => d.embedding);
} catch (error: any) {
console.error('Error generating embeddings batch:', error.message);
throw new Error(`Failed to generate embeddings: ${error.message}`);
}
}
/**
* Create a text representation of an RFP for embedding
* Combines key fields into a searchable string
*/
createRfpText(rfp: {
rawText?: string;
material?: string;
processes?: string[];
qty?: number;
tolerances?: string;
finish?: string;
notes?: string;
}): string {
const parts: string[] = [];
if (rfp.rawText) {
parts.push(rfp.rawText);
}
if (rfp.material) {
parts.push(`Material: ${rfp.material}`);
}
if (rfp.processes && rfp.processes.length > 0) {
parts.push(`Processes: ${rfp.processes.join(', ')}`);
}
if (rfp.qty) {
parts.push(`Quantity: ${rfp.qty}`);
}
if (rfp.tolerances) {
parts.push(`Tolerances: ${rfp.tolerances}`);
}
if (rfp.finish) {
parts.push(`Finish: ${rfp.finish}`);
}
if (rfp.notes) {
parts.push(`Notes: ${rfp.notes}`);
}
return parts.join(' | ');
}
/**
* Calculate cosine similarity between two embeddings
* Returns a value between -1 and 1 (typically 0 to 1 for our use case)
*/
cosineSimilarity(embedding1: number[], embedding2: number[]): number {
if (embedding1.length !== embedding2.length) {
throw new Error('Embeddings must have the same length');
}
let dotProduct = 0;
let norm1 = 0;
let norm2 = 0;
for (let i = 0; i < embedding1.length; i++) {
dotProduct += embedding1[i] * embedding2[i];
norm1 += embedding1[i] * embedding1[i];
norm2 += embedding2[i] * embedding2[i];
}
const denominator = Math.sqrt(norm1) * Math.sqrt(norm2);
if (denominator === 0) {
return 0;
}
return dotProduct / denominator;
}
}
export const embeddingService = EmbeddingService.getInstance();