import OpenAI from 'openai';
import { getConfig, hasOpenAI } from '../config.js';
import type { UnifiedPost } from '../types.js';
const EMBEDDING_MODEL = 'text-embedding-3-small';
const EMBEDDING_DIMS = 1536;
const BATCH_SIZE = 2048;
let openaiClient: OpenAI | null = null;
function getOpenAI(): OpenAI {
if (!openaiClient) {
openaiClient = new OpenAI({ apiKey: getConfig().openaiApiKey });
}
return openaiClient;
}
export function getEmbeddingDims(): number {
return EMBEDDING_DIMS;
}
function postToEmbeddingText(post: UnifiedPost): string {
const parts = [post.content];
if (post.hashtags.length > 0) parts.push(post.hashtags.join(' '));
if (post.author.username) parts.push(`@${post.author.username}`);
return parts.join(' ').slice(0, 8000);
}
export async function embedTexts(texts: string[]): Promise<number[][]> {
if (!hasOpenAI()) {
return texts.map(() => new Array(EMBEDDING_DIMS).fill(0));
}
const client = getOpenAI();
const results: number[][] = [];
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE);
const response = await client.embeddings.create({
model: EMBEDDING_MODEL,
input: batch,
});
for (const item of response.data) {
results.push(item.embedding);
}
}
return results;
}
export async function embedPosts(posts: UnifiedPost[]): Promise<number[][]> {
const texts = posts.map(postToEmbeddingText);
return embedTexts(texts);
}
export async function embedQuery(query: string): Promise<number[]> {
const results = await embedTexts([query]);
return results[0];
}