import { randomUUID } from 'node:crypto';
import type Database from 'better-sqlite3';
import type { VectorStore, SearchResult, DocumentMeta } from '../types/vector-store.js';
import { cosineSimilarity, embeddingToBuffer, bufferToEmbedding } from '../db/cosine.js';
export class SqliteVectorStore implements VectorStore {
constructor(private db: Database.Database) {}
async upsertDocument(
documentId: string,
chunks: { content: string; embedding: Float32Array }[],
meta: DocumentMeta
): Promise<void> {
const tx = this.db.transaction(() => {
// Upsert document
this.db
.prepare(
`INSERT INTO documents (id, scope, meta, updated_at)
VALUES (?, ?, ?, unixepoch())
ON CONFLICT(id) DO UPDATE SET
scope = excluded.scope,
meta = excluded.meta,
updated_at = unixepoch()`
)
.run(documentId, meta.scope ?? 'default', JSON.stringify(meta));
// Delete old chunks
this.db.prepare('DELETE FROM chunks WHERE document_id = ?').run(documentId);
// Insert new chunks
const insert = this.db.prepare(
`INSERT INTO chunks (id, document_id, content, embedding, chunk_index)
VALUES (?, ?, ?, ?, ?)`
);
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
insert.run(
randomUUID(),
documentId,
chunk.content,
embeddingToBuffer(chunk.embedding),
i
);
}
});
tx();
}
async search(
queryEmbedding: Float32Array,
options: { topK?: number; minScore?: number; scope?: string } = {}
): Promise<SearchResult[]> {
const { topK = 10, minScore = 0.3, scope } = options;
// Build query based on scope filter
let query: string;
let params: unknown[];
if (scope) {
query = `
SELECT c.id, c.document_id, c.content, c.embedding, d.meta
FROM chunks c
JOIN documents d ON c.document_id = d.id
WHERE d.scope = ?`;
params = [scope];
} else {
query = `
SELECT c.id, c.document_id, c.content, c.embedding, d.meta
FROM chunks c
JOIN documents d ON c.document_id = d.id`;
params = [];
}
const rows = this.db.prepare(query).all(...params) as {
id: string;
document_id: string;
content: string;
embedding: Buffer;
meta: string;
}[];
// Brute-force cosine similarity
const scored: SearchResult[] = [];
for (const row of rows) {
if (!row.embedding) continue;
const embedding = bufferToEmbedding(row.embedding);
const score = cosineSimilarity(queryEmbedding, embedding);
if (score >= minScore) {
scored.push({
chunkId: row.id,
documentId: row.document_id,
content: row.content,
score,
meta: JSON.parse(row.meta),
});
}
}
scored.sort((a, b) => b.score - a.score);
return scored.slice(0, topK);
}
async deleteDocument(documentId: string): Promise<void> {
this.db.prepare('DELETE FROM documents WHERE id = ?').run(documentId);
}
async getDocumentIds(scope?: string): Promise<string[]> {
const query = scope
? 'SELECT id FROM documents WHERE scope = ?'
: 'SELECT id FROM documents';
const params = scope ? [scope] : [];
const rows = this.db.prepare(query).all(...params) as { id: string }[];
return rows.map((r) => r.id);
}
}