"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.VectorStore = void 0;
const pg_1 = require("pg");
const logger_1 = __importDefault(require("../utils/logger"));
class VectorStore {
constructor(config, embeddingModel) {
this.pool = new pg_1.Pool(config);
this.embeddingModel = embeddingModel;
}
async storeChunk(chunk) {
try {
const embedding = await this.embeddingModel.embed(chunk.content);
await this.pool.query(`INSERT INTO metadata_chunks
(id, org_id, type, name, content, symbols, refs, path, raw, embedding)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (id) DO UPDATE SET
content = $5, symbols = $6, refs = $7, embedding = $10, updated_at = CURRENT_TIMESTAMP`, [
chunk.id,
chunk.orgId,
chunk.type,
chunk.name,
chunk.content,
chunk.symbols,
JSON.stringify(chunk.references),
chunk.path,
JSON.stringify(chunk.raw),
`[${embedding.join(',')}]`
]);
logger_1.default.debug(`Stored chunk ${chunk.id} with ${embedding.length}-dim embedding`);
}
catch (error) {
logger_1.default.error(`Failed to store chunk ${chunk.id}`, { error });
throw error;
}
}
async storeBatch(chunks) {
const embeddings = await this.embeddingModel.embedBatch(chunks.map(c => c.content));
const client = await this.pool.connect();
try {
await client.query('BEGIN');
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const embedding = embeddings[i];
await client.query(`INSERT INTO metadata_chunks
(id, org_id, type, name, content, symbols, refs, path, raw, embedding)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (id) DO UPDATE SET
content = $5, symbols = $6, refs = $7, embedding = $10, updated_at = CURRENT_TIMESTAMP`, [
chunk.id,
chunk.orgId,
chunk.type,
chunk.name,
chunk.content,
chunk.symbols,
JSON.stringify(chunk.references),
chunk.path,
JSON.stringify(chunk.raw),
`[${embedding.join(',')}]`
]);
}
await client.query('COMMIT');
logger_1.default.info(`Stored ${chunks.length} chunks in batch`);
}
catch (error) {
await client.query('ROLLBACK');
logger_1.default.error('Batch store failed', { error });
throw error;
}
finally {
client.release();
}
}
async vectorSearch(query, limit = 10) {
try {
const embedding = await this.embeddingModel.embed(query);
const result = await this.pool.query(`SELECT id, org_id, type, name, content, symbols, refs, path, raw,
1 - (embedding <=> $1) as similarity
FROM metadata_chunks
ORDER BY embedding <=> $1
LIMIT $2`, [`[${embedding.join(',')}]`, limit]);
return result.rows.map(row => this.mapRowToChunk(row));
}
catch (error) {
logger_1.default.error('Vector search failed', { error, query });
throw error;
}
}
async keywordSearch(query, limit = 10) {
try {
const result = await this.pool.query(`SELECT id, org_id, type, name, content, symbols, refs, path, raw,
ts_rank(to_tsvector('english', content), plainto_tsquery('english', $1)) as rank
FROM metadata_chunks
WHERE to_tsvector('english', content) @@ plainto_tsquery('english', $1)
ORDER BY rank DESC
LIMIT $2`, [query, limit]);
return result.rows.map(row => this.mapRowToChunk(row));
}
catch (error) {
logger_1.default.error('Keyword search failed', { error, query });
throw error;
}
}
async symbolSearch(symbol, limit = 10) {
try {
const result = await this.pool.query(`SELECT id, org_id, type, name, content, symbols, refs, path, raw
FROM metadata_chunks
WHERE $1 = ANY(symbols)
ORDER BY name
LIMIT $2`, [symbol, limit]);
return result.rows.map(row => this.mapRowToChunk(row));
}
catch (error) {
logger_1.default.error('Symbol search failed', { error, symbol });
throw error;
}
}
async hybridSearch(query, limit = 10) {
try {
// Get results from both vector and keyword search
const [vectorResults, keywordResults] = await Promise.all([
this.vectorSearch(query, limit * 2),
this.keywordSearch(query, limit * 2)
]);
// Combine and rerank results
const combinedResults = new Map();
// Add vector results with similarity scores
vectorResults.forEach((result, index) => {
combinedResults.set(result.id, {
...result,
similarity: result.similarity || 0,
rank: (result.similarity || 0) * 0.7 // Weight vector search 70%
});
});
// Merge keyword results
keywordResults.forEach((result, index) => {
const existing = combinedResults.get(result.id);
if (existing) {
// Combine scores
existing.rank = (existing.rank || 0) + (result.rank || 0) * 0.3;
}
else {
combinedResults.set(result.id, {
...result,
rank: (result.rank || 0) * 0.3 // Weight keyword search 30%
});
}
});
// Sort by combined rank and return top results
return Array.from(combinedResults.values())
.sort((a, b) => (b.rank || 0) - (a.rank || 0))
.slice(0, limit);
}
catch (error) {
logger_1.default.error('Hybrid search failed', { error, query });
throw error;
}
}
mapRowToChunk(row) {
let references = [];
let raw = {};
try {
references = row.refs ? JSON.parse(row.refs) : [];
}
catch (e) {
logger_1.default.warn(`Failed to parse references for chunk ${row.id}`, { refs: row.refs });
}
try {
raw = row.raw ? JSON.parse(row.raw) : {};
}
catch (e) {
logger_1.default.warn(`Failed to parse raw data for chunk ${row.id}`, { raw: row.raw });
}
return {
id: row.id,
orgId: row.org_id,
type: row.type,
name: row.name,
content: row.content,
symbols: row.symbols || [],
references,
path: row.path || '',
raw,
metadata: {
size: row.content?.length || 0,
lineCount: row.content?.split('\n').length || 0
},
similarity: row.similarity,
rank: row.rank
};
}
async deleteChunk(chunkId) {
try {
await this.pool.query('DELETE FROM metadata_chunks WHERE id = $1', [chunkId]);
logger_1.default.debug(`Deleted chunk ${chunkId}`);
}
catch (error) {
logger_1.default.error(`Failed to delete chunk ${chunkId}`, { error });
throw error;
}
}
async close() {
await this.pool.end();
}
}
exports.VectorStore = VectorStore;