import { Database } from 'better-sqlite3';
import { z } from 'zod';
import { Result, ok, err } from 'neverthrow';
import crypto from 'crypto';
import path from 'path';
import fs from 'fs/promises';
// Type-safe schemas
const DocumentRecordSchema = z.object({
id: z.number().int().positive(),
uri: z.string().url(),
title: z.string().min(1),
mtime: z.number().int().positive(),
hash: z.string().min(1),
metadata: z.string().nullable().optional(),
created_at: z.string(),
updated_at: z.string().nullable().optional()
});
const ChunkRecordSchema = z.object({
id: z.number().int().positive(),
doc_id: z.number().int().positive(),
text: z.string().min(1),
section: z.string().default('main'),
offset: z.number().int().min(0),
lang: z.string().default('auto'),
hash: z.string().min(1),
created_at: z.string(),
token_count: z.number().int().min(0).optional(),
embedding_dim: z.number().int().positive().optional()
});
export type DocumentRecord = z.infer<typeof DocumentRecordSchema>;
export type ChunkRecord = z.infer<typeof ChunkRecordSchema>;
export interface SearchResult {
readonly text: string;
readonly section: string;
readonly uri: string;
readonly title: string;
readonly score: number;
snippet?: string;
highlights?: string[];
}
export interface SearchOptions {
readonly limit?: number;
readonly offset?: number;
readonly filters?: Record<string, unknown>;
readonly includeSnippets?: boolean;
readonly highlightTerms?: boolean;
readonly minScore?: number;
}
export interface ChunkingOptions {
readonly maxChunkSize?: number;
readonly overlapSize?: number;
readonly preserveStructure?: boolean;
readonly splitOnSentences?: boolean;
readonly minChunkSize?: number;
}
// Custom errors
export class SQLiteError extends Error {
constructor(
message: string,
public readonly code: string,
public readonly query?: string,
public readonly cause?: Error
) {
super(message);
this.name = 'SQLiteError';
}
}
export class SQLiteClient {
private db: Database | null = null;
private readonly dbPath: string;
private readonly logger: Logger;
private readonly metrics: MetricsCollector;
private isInitialized = false;
// Prepared statements for better performance
private statements = {
getDocument: null as any,
addDocument: null as any,
updateDocument: null as any,
deleteDocument: null as any,
getChunks: null as any,
addChunk: null as any,
searchFTS: null as any,
searchSimple: null as any
};
constructor(
dbPath?: string,
logger?: Logger,
metrics?: MetricsCollector
) {
this.dbPath = dbPath || './data/rag.db';
this.logger = logger || new ConsoleLogger();
this.metrics = metrics || new NoOpMetrics();
}
async initialize(): Promise<Result<void, SQLiteError>> {
try {
if (this.isInitialized) return ok(undefined);
this.logger.info('Initializing SQLite database', { path: this.dbPath });
// Ensure directory exists
await fs.mkdir(path.dirname(this.dbPath), { recursive: true });
// Import better-sqlite3 dynamically to handle potential missing dependency
const { default: Database } = await import('better-sqlite3');
this.db = new Database(this.dbPath, {
verbose: (sql: string) => this.logger.info('SQL Query', { sql }),
fileMustExist: false
});
// Enable WAL mode for better concurrent access
if (this.db) {
this.db.pragma('journal_mode = WAL');
this.db.pragma('synchronous = NORMAL');
this.db.pragma('cache_size = 10000');
this.db.pragma('temp_store = memory');
this.db.pragma('mmap_size = 268435456'); // 256MB
}
await this.createSchema();
await this.prepareStatements();
this.isInitialized = true;
this.logger.info('SQLite database initialized successfully');
return ok(undefined);
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.logger.error('Failed to initialize SQLite database', errorObj);
return err(new SQLiteError(
`Failed to initialize database: ${errorObj.message}`,
'INIT_ERROR',
undefined,
errorObj
));
}
}
async close(): Promise<void> {
if (this.db) {
try {
// Close prepared statements
Object.values(this.statements).forEach(stmt => {
if (stmt && typeof stmt.finalize === 'function') {
try {
stmt.finalize();
} catch (finalizeError) {
// Ignore finalize errors
}
}
});
this.db.close();
this.db = null;
this.isInitialized = false;
this.logger.info('Database connection closed');
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.logger.error('Error closing database', errorObj);
}
}
}
async query<T>(sql: string, params: any[] = []): Promise<Result<T[], SQLiteError>> {
const startTime = Date.now();
try {
if (!this.db) {
const initResult = await this.initialize();
if (initResult.isErr()) return err(initResult.error);
}
this.logger.info('Executing query', { sql: sql.substring(0, 100), paramCount: params.length });
const stmt = this.db!.prepare(sql);
const results = stmt.all(...params) as T[];
this.metrics.recordQuery(sql, Date.now() - startTime);
return ok(results);
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.metrics.recordError('query');
this.logger.error('Query execution failed', errorObj, { sql, params });
return err(new SQLiteError(
`Query failed: ${errorObj.message}`,
'QUERY_ERROR',
sql,
errorObj
));
}
}
async execute(sql: string, params: any[] = []): Promise<Result<void, SQLiteError>> {
const startTime = Date.now();
try {
if (!this.db) {
const initResult = await this.initialize();
if (initResult.isErr()) return err(initResult.error);
}
this.logger.info('Executing statement', { sql: sql.substring(0, 100), paramCount: params.length });
const stmt = this.db!.prepare(sql);
stmt.run(...params);
this.metrics.recordQuery(sql, Date.now() - startTime);
return ok(undefined);
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.metrics.recordError('execute');
this.logger.error('Statement execution failed', errorObj, { sql, params });
return err(new SQLiteError(
`Execution failed: ${errorObj.message}`,
'EXECUTE_ERROR',
sql,
errorObj
));
}
}
async getDocuments(filters?: Record<string, unknown>): Promise<Result<DocumentRecord[], SQLiteError>> {
try {
let sql = `
SELECT id, uri, title, mtime, hash, metadata, created_at, updated_at
FROM docs
`;
const params: any[] = [];
if (filters && Object.keys(filters).length > 0) {
const conditions: string[] = [];
Object.entries(filters).forEach(([key, value]) => {
conditions.push(`JSON_EXTRACT(metadata, '$.${key}') = ?`);
params.push(value);
});
sql += ` WHERE ${conditions.join(' AND ')}`;
}
sql += ' ORDER BY created_at DESC';
const result = await this.query<DocumentRecord>(sql, params);
if (result.isErr()) return result;
// Validate results
const validatedResults = result.value.map(doc => {
const validation = DocumentRecordSchema.safeParse(doc);
if (!validation.success) {
this.logger.warn('Invalid document record', { doc, errors: validation.error.issues });
return null;
}
return validation.data;
}).filter((doc): doc is DocumentRecord => doc !== null);
return ok(validatedResults);
} catch (error) {
return err(new SQLiteError(
`Failed to get documents: ${error instanceof Error ? error.message : String(error)}`,
'GET_DOCUMENTS_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async getDocument(uri: string): Promise<Result<DocumentRecord | null, SQLiteError>> {
try {
if (!this.statements.getDocument) {
await this.prepareStatements();
}
if (!this.statements.getDocument) {
return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR'));
}
const results = this.statements.getDocument.all(uri) as DocumentRecord[];
if (results.length === 0) {
return ok(null);
}
const validation = DocumentRecordSchema.safeParse(results[0]);
if (!validation.success) {
return err(new SQLiteError(
`Invalid document data: ${validation.error.message}`,
'VALIDATION_ERROR'
));
}
return ok(validation.data);
} catch (error) {
return err(new SQLiteError(
`Failed to get document: ${error instanceof Error ? error.message : String(error)}`,
'GET_DOCUMENT_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async addDocument(
uri: string,
title: string,
content: string,
metadata?: string,
options?: ChunkingOptions
): Promise<Result<number, SQLiteError>> {
try {
if (!this.db) {
return err(new SQLiteError('Database not initialized', 'DB_NOT_INITIALIZED'));
}
if (!this.statements.addDocument) {
await this.prepareStatements();
}
if (!this.statements.addDocument) {
return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR'));
}
const mtime = Math.floor(Date.now() / 1000);
const hash = this.generateSecureHash(content);
// Insert document
const insertResult = this.statements.addDocument.run(
uri, title, mtime, hash, metadata || null
);
const docId = insertResult.lastInsertRowid as number;
// Chunk the content
const chunks = this.chunkText(content, options);
// Insert chunks
const addChunkStmt = this.db.prepare(`
INSERT INTO chunks (doc_id, text, section, offset, lang, hash, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
`);
chunks.forEach((chunk, index) => {
const chunkHash = this.generateSecureHash(chunk.text);
const tokenCount = this.estimateTokenCount(chunk.text);
addChunkStmt.run(
docId,
chunk.text,
chunk.section,
chunk.offset,
'auto',
chunkHash,
tokenCount
);
});
this.metrics.recordOperation('addDocument');
this.logger.info('Document added successfully', { uri, docId });
return ok(docId);
} catch (error) {
this.metrics.recordError('addDocument');
return err(new SQLiteError(
`Failed to add document: ${error instanceof Error ? error.message : String(error)}`,
'ADD_DOCUMENT_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async updateDocument(
uri: string,
title: string,
content: string,
metadata?: string,
options?: ChunkingOptions
): Promise<Result<void, SQLiteError>> {
try {
if (!this.db) {
return err(new SQLiteError('Database not initialized', 'DB_NOT_INITIALIZED'));
}
if (!this.statements.getDocument || !this.statements.updateDocument) {
await this.prepareStatements();
}
if (!this.statements.getDocument || !this.statements.updateDocument) {
return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR'));
}
// Get existing document
const existingDoc = this.statements.getDocument.get(uri) as DocumentRecord | undefined;
if (!existingDoc) {
return err(new SQLiteError('Document not found', 'DOCUMENT_NOT_FOUND'));
}
const mtime = Math.floor(Date.now() / 1000);
const hash = this.generateSecureHash(content);
// Update document
this.statements.updateDocument.run(title, mtime, hash, metadata || null, uri);
// Delete old chunks and their embeddings
this.db.prepare('DELETE FROM chunk_vecs WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_id = ?)').run(existingDoc.id);
this.db.prepare('DELETE FROM chunks WHERE doc_id = ?').run(existingDoc.id);
// Add new chunks
const chunks = this.chunkText(content, options);
const addChunkStmt = this.db.prepare(`
INSERT INTO chunks (doc_id, text, section, offset, lang, hash, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))
`);
chunks.forEach((chunk, index) => {
const chunkHash = this.generateSecureHash(chunk.text);
const tokenCount = this.estimateTokenCount(chunk.text);
addChunkStmt.run(
existingDoc.id,
chunk.text,
chunk.section,
chunk.offset,
'auto',
chunkHash,
tokenCount
);
});
this.metrics.recordOperation('updateDocument');
this.logger.info('Document updated successfully', { uri });
return ok(undefined);
} catch (error) {
this.metrics.recordError('updateDocument');
return err(new SQLiteError(
`Failed to update document: ${error instanceof Error ? error.message : String(error)}`,
'UPDATE_DOCUMENT_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async deleteDocument(uri: string): Promise<Result<void, SQLiteError>> {
try {
if (!this.statements.deleteDocument) {
await this.prepareStatements();
}
if (!this.statements.deleteDocument) {
return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR'));
}
const result = this.statements.deleteDocument.run(uri);
if (result.changes === 0) {
return err(new SQLiteError('Document not found', 'DOCUMENT_NOT_FOUND'));
}
this.metrics.recordError('deleteDocument');
this.logger.info('Document deleted successfully', { uri });
return ok(undefined);
} catch (error) {
this.metrics.recordError('deleteDocument');
return err(new SQLiteError(
`Failed to delete document: ${error instanceof Error ? error.message : String(error)}`,
'DELETE_DOCUMENT_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async search(query: string, options: SearchOptions = {}): Promise<Result<SearchResult[], SQLiteError>> {
const {
limit = 10,
offset = 0,
includeSnippets = true,
highlightTerms = true,
minScore = 0.1
} = options;
try {
// Try FTS search first
let results = await this.performFTSSearch(query, limit, offset);
if (results.length === 0) {
// Fallback to simple search
results = await this.performSimpleSearch(query, limit, offset);
}
// Filter by minimum score
const filteredResults = results.filter(r => r.score >= minScore);
// Add snippets and highlights if requested
if (includeSnippets || highlightTerms) {
filteredResults.forEach(result => {
if (includeSnippets) {
result.snippet = this.generateSnippet(result.text, query, 200);
}
if (highlightTerms) {
result.highlights = this.extractHighlights(result.text, query);
}
});
}
this.metrics.recordOperation('search');
this.logger.info('Search completed', { query, resultsCount: filteredResults.length });
return ok(filteredResults);
} catch (error) {
this.metrics.recordError('search');
return err(new SQLiteError(
`Search failed: ${error instanceof Error ? error.message : String(error)}`,
'SEARCH_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async getDocumentChunks(docId: number): Promise<Result<ChunkRecord[], SQLiteError>> {
try {
if (!this.statements.getChunks) {
await this.prepareStatements();
}
if (!this.statements.getChunks) {
return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR'));
}
const results = this.statements.getChunks.all(docId) as ChunkRecord[];
// Validate results
const validatedResults = results.map(chunk => {
const validation = ChunkRecordSchema.safeParse(chunk);
if (!validation.success) {
this.logger.warn('Invalid chunk record', { chunk, errors: validation.error.issues });
return null;
}
return validation.data;
}).filter((chunk): chunk is ChunkRecord => chunk !== null);
return ok(validatedResults);
} catch (error) {
return err(new SQLiteError(
`Failed to get document chunks: ${error instanceof Error ? error.message : String(error)}`,
'GET_CHUNKS_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
async isConnected(): Promise<boolean> {
try {
if (!this.db) return false;
this.db.prepare('SELECT 1').get();
return true;
} catch {
return false;
}
}
async getStats(): Promise<Result<{
documents: number;
chunks: number;
embeddings: number;
dbSize: number;
ftsEnabled: boolean;
}, SQLiteError>> {
try {
if (!this.db) {
return err(new SQLiteError('Database not initialized', 'DB_NOT_INITIALIZED'));
}
const stats = {
documents: (this.db.prepare('SELECT COUNT(*) as count FROM docs').get() as any).count,
chunks: (this.db.prepare('SELECT COUNT(*) as count FROM chunks').get() as any).count,
embeddings: (this.db.prepare('SELECT COUNT(*) as count FROM chunk_vecs').get() as any).count,
dbSize: (await fs.stat(this.dbPath)).size,
ftsEnabled: this.checkFTSEnabled()
};
return ok(stats);
} catch (error) {
return err(new SQLiteError(
`Failed to get stats: ${error instanceof Error ? error.message : String(error)}`,
'GET_STATS_ERROR',
undefined,
error instanceof Error ? error : undefined
));
}
}
// Private helper methods
private async createSchema(): Promise<void> {
if (!this.db) return;
this.logger.info('Initializing database schema with migrations...');
// Создаем таблицу для отслеживания миграций
this.db.exec(`
CREATE TABLE IF NOT EXISTS schema_migrations (
version INTEGER PRIMARY KEY,
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP,
description TEXT
);
`);
// Применяем миграции
await this.migrate();
}
private async migrate(): Promise<void> {
if (!this.db) return;
const currentVersion = await this.getSchemaVersion();
this.logger.info(`Current schema version: ${currentVersion}`);
// Миграция 1: Базовая схема
if (currentVersion < 1) {
this.logger.info('Applying migration 1: Base schema');
this.db.exec(`
-- Documents table
CREATE TABLE IF NOT EXISTS docs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
uri TEXT UNIQUE NOT NULL,
title TEXT NOT NULL,
mtime INTEGER NOT NULL,
hash TEXT NOT NULL,
metadata TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Chunks table
CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
doc_id INTEGER NOT NULL,
text TEXT NOT NULL,
section TEXT DEFAULT 'main',
offset INTEGER NOT NULL,
lang TEXT DEFAULT 'auto',
hash TEXT NOT NULL,
token_count INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (doc_id) REFERENCES docs(id) ON DELETE CASCADE
);
-- Vector embeddings table
CREATE TABLE IF NOT EXISTS chunk_vecs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
chunk_id INTEGER NOT NULL,
dim INTEGER NOT NULL,
vec BLOB NOT NULL,
model TEXT NOT NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(chunk_id),
FOREIGN KEY (chunk_id) REFERENCES chunks(id) ON DELETE CASCADE
);
-- FTS5 virtual table
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
text,
content='chunks',
content_rowid='id',
tokenize='porter unicode61'
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_docs_uri ON docs(uri);
CREATE INDEX IF NOT EXISTS idx_docs_hash ON docs(hash);
CREATE INDEX IF NOT EXISTS idx_docs_created_at ON docs(created_at);
CREATE INDEX IF NOT EXISTS idx_chunks_doc_id ON chunks(doc_id);
CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash);
CREATE INDEX IF NOT EXISTS idx_chunks_section ON chunks(section);
CREATE INDEX IF NOT EXISTS idx_chunk_vecs_model ON chunk_vecs(model);
-- Triggers
CREATE TRIGGER IF NOT EXISTS chunks_fts_insert AFTER INSERT ON chunks
BEGIN
INSERT INTO chunks_fts(rowid, text) VALUES (new.id, new.text);
END;
CREATE TRIGGER IF NOT EXISTS chunks_fts_delete AFTER DELETE ON chunks
BEGIN
DELETE FROM chunks_fts WHERE rowid = old.id;
END;
CREATE TRIGGER IF NOT EXISTS chunks_fts_update AFTER UPDATE ON chunks
BEGIN
DELETE FROM chunks_fts WHERE rowid = old.id;
INSERT INTO chunks_fts(rowid, text) VALUES (new.id, new.text);
END;
CREATE TRIGGER IF NOT EXISTS docs_updated_at AFTER UPDATE ON docs
BEGIN
UPDATE docs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
`);
await this.setSchemaVersion(1, 'Base schema with docs, chunks, embeddings, and FTS');
}
// Миграция 2: Добавление колонки updated_at если её нет
if (currentVersion < 2) {
this.logger.info('Applying migration 2: Add updated_at column to docs');
try {
// Проверяем, есть ли колонка updated_at
const columnCheck = this.db.prepare("PRAGMA table_info(docs)").all();
const hasUpdatedAt = columnCheck.some((col: any) => col.name === 'updated_at');
if (!hasUpdatedAt) {
this.db.exec('ALTER TABLE docs ADD COLUMN updated_at DATETIME');
// Обновляем существующие записи
this.db.exec('UPDATE docs SET updated_at = created_at WHERE updated_at IS NULL');
this.logger.info('Added updated_at column to docs table');
} else {
this.logger.info('updated_at column already exists in docs table');
}
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.logger.warn('Migration 2 warning:', { error: errorObj.message });
}
await this.setSchemaVersion(2, 'Added updated_at column to docs table');
}
// Миграция 3: Добавление недостающих колонок в chunks
if (currentVersion < 3) {
this.logger.info('Applying migration 3: Add missing columns to chunks table');
try {
// Проверяем, есть ли колонка token_count
const columnCheck = this.db.prepare("PRAGMA table_info(chunks)").all();
const hasTokenCount = columnCheck.some((col: any) => col.name === 'token_count');
if (!hasTokenCount) {
this.db.exec('ALTER TABLE chunks ADD COLUMN token_count INTEGER DEFAULT 0');
this.logger.info('Added token_count column to chunks table');
} else {
this.logger.info('token_count column already exists in chunks table');
}
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.logger.warn('Migration 3 warning:', { error: errorObj.message });
}
await this.setSchemaVersion(3, 'Added missing columns to chunks table');
}
this.logger.info('Database migrations completed');
}
private async getSchemaVersion(): Promise<number> {
if (!this.db) return 0;
try {
const result = this.db.prepare('SELECT MAX(version) as version FROM schema_migrations').get() as any;
return result?.version || 0;
} catch {
return 0;
}
}
private async setSchemaVersion(version: number, description: string): Promise<void> {
if (!this.db) return;
try {
this.db.prepare(`
INSERT OR REPLACE INTO schema_migrations (version, description, applied_at)
VALUES (?, ?, CURRENT_TIMESTAMP)
`).run(version, description);
this.logger.info(`Schema version updated to ${version}: ${description}`);
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.logger.error('Failed to update schema version:', errorObj);
}
}
private async prepareStatements(): Promise<void> {
if (!this.db) return;
this.statements.getDocument = this.db.prepare(
'SELECT * FROM docs WHERE uri = ? LIMIT 1'
);
this.statements.addDocument = this.db.prepare(`
INSERT INTO docs (uri, title, mtime, hash, metadata)
VALUES (?, ?, ?, ?, ?)
`);
this.statements.updateDocument = this.db.prepare(`
UPDATE docs SET title = ?, mtime = ?, hash = ?, metadata = ?, updated_at = CURRENT_TIMESTAMP
WHERE uri = ?
`);
this.statements.deleteDocument = this.db.prepare(
'DELETE FROM docs WHERE uri = ?'
);
this.statements.getChunks = this.db.prepare(
'SELECT * FROM chunks WHERE doc_id = ? ORDER BY offset'
);
this.statements.searchFTS = this.db.prepare(`
SELECT
c.text,
c.section,
d.uri,
d.title,
bm25(chunks_fts) as score
FROM chunks_fts
JOIN chunks c ON chunks_fts.rowid = c.id
JOIN docs d ON c.doc_id = d.id
WHERE chunks_fts MATCH ?
ORDER BY score
LIMIT ? OFFSET ?
`);
this.statements.searchSimple = this.db.prepare(`
SELECT
c.text,
c.section,
d.uri,
d.title,
1.0 as score
FROM chunks c
JOIN docs d ON c.doc_id = d.id
WHERE c.text LIKE ?
ORDER BY c.created_at DESC
LIMIT ? OFFSET ?
`);
}
private async performFTSSearch(query: string, limit: number, offset: number): Promise<SearchResult[]> {
try {
const results = this.statements.searchFTS.all(query, limit, offset) as any[];
return results.map(r => ({
text: r.text,
section: r.section,
uri: r.uri,
title: r.title,
score: Math.abs(r.score) // BM25 can be negative
}));
} catch (error) {
const errorObj = error instanceof Error ? error : new Error(String(error));
this.logger.warn('FTS search failed, falling back to simple search', { error: errorObj.message });
return [];
}
}
private async performSimpleSearch(query: string, limit: number, offset: number): Promise<SearchResult[]> {
if (!this.statements.searchSimple) return [];
const results = this.statements.searchSimple.all(`%${query}%`, limit, offset) as any[];
return results.map(r => ({
text: r.text,
section: r.section,
uri: r.uri,
title: r.title,
score: r.score
}));
}
private chunkText(text: string, options: ChunkingOptions = {}): Array<{
text: string;
section: string;
offset: number;
}> {
const {
maxChunkSize = 1000,
overlapSize = 100,
preserveStructure = true,
splitOnSentences = true,
minChunkSize = 100
} = options;
const chunks: Array<{ text: string; section: string; offset: number }> = [];
if (preserveStructure) {
// Smart chunking that preserves document structure
return this.smartChunkText(text, maxChunkSize, overlapSize, minChunkSize);
} else {
// Simple sliding window chunking
return this.simpleChunkText(text, maxChunkSize, overlapSize, minChunkSize);
}
}
private smartChunkText(text: string, maxChunkSize: number, overlapSize: number, minChunkSize: number) {
const chunks: Array<{ text: string; section: string; offset: number }> = [];
const lines = text.split('\n');
let currentChunk = '';
let currentSection = 'main';
let offset = 0;
for (const line of lines) {
const trimmedLine = line.trim();
// Detect section headers
if (trimmedLine.match(/^#{1,6}\s+/) || trimmedLine.match(/^[A-Z][^.]*:$/)) {
// Save current chunk if it's substantial
if (currentChunk.trim().length >= minChunkSize) {
chunks.push({
text: currentChunk.trim(),
section: currentSection,
offset: offset - currentChunk.length
});
}
currentSection = trimmedLine.replace(/^#+\s*/, '').replace(/:$/, '').toLowerCase();
currentChunk = trimmedLine + '\n';
} else if ((currentChunk + line + '\n').length > maxChunkSize && currentChunk.length >= minChunkSize) {
// Chunk is getting too large, save it
chunks.push({
text: currentChunk.trim(),
section: currentSection,
offset: offset - currentChunk.length
});
// Start new chunk with overlap
const overlapText = this.getOverlapText(currentChunk, overlapSize);
currentChunk = overlapText + line + '\n';
} else {
currentChunk += line + '\n';
}
offset += line.length + 1;
}
// Add final chunk
if (currentChunk.trim().length >= minChunkSize) {
chunks.push({
text: currentChunk.trim(),
section: currentSection,
offset: offset - currentChunk.length
});
}
return chunks;
}
private simpleChunkText(text: string, maxChunkSize: number, overlapSize: number, minChunkSize: number) {
const chunks: Array<{ text: string; section: string; offset: number }> = [];
let offset = 0;
while (offset < text.length) {
const chunkEnd = Math.min(offset + maxChunkSize, text.length);
let chunk = text.substring(offset, chunkEnd);
// Try to break at sentence boundary
if (chunkEnd < text.length) {
const lastSentence = chunk.lastIndexOf('.');
const lastNewline = chunk.lastIndexOf('\n');
const breakPoint = Math.max(lastSentence, lastNewline);
if (breakPoint > offset + minChunkSize) {
chunk = text.substring(offset, offset + breakPoint + 1);
}
}
if (chunk.trim().length >= minChunkSize) {
chunks.push({
text: chunk.trim(),
section: 'main',
offset
});
}
offset += chunk.length - overlapSize;
}
return chunks;
}
private getOverlapText(text: string, overlapSize: number): string {
if (text.length <= overlapSize) return text;
const overlap = text.substring(text.length - overlapSize);
const lastSentence = overlap.lastIndexOf('.');
return lastSentence > 0 ? overlap.substring(lastSentence + 1) : overlap;
}
private generateSecureHash(text: string): string {
return crypto.createHash('sha256').update(text, 'utf8').digest('hex').substring(0, 16);
}
private estimateTokenCount(text: string): number {
// Rough estimate: ~4 characters per token for English text
return Math.ceil(text.length / 4);
}
private generateSnippet(text: string, query: string, maxLength: number): string {
const queryTerms = query.toLowerCase().split(/\s+/);
const textLower = text.toLowerCase();
// Find the best position to start the snippet
let bestPosition = 0;
let bestScore = 0;
for (let i = 0; i <= text.length - maxLength; i += 50) {
const snippet = textLower.substring(i, i + maxLength);
const score = queryTerms.reduce((acc, term) => {
return acc + (snippet.includes(term) ? 1 : 0);
}, 0);
if (score > bestScore) {
bestScore = score;
bestPosition = i;
}
}
let snippet = text.substring(bestPosition, bestPosition + maxLength);
// Try to start and end at word boundaries
if (bestPosition > 0) {
const firstSpace = snippet.indexOf(' ');
if (firstSpace > 0) {
snippet = snippet.substring(firstSpace + 1);
}
}
const lastSpace = snippet.lastIndexOf(' ');
if (lastSpace > 0 && bestPosition + maxLength < text.length) {
snippet = snippet.substring(0, lastSpace);
}
return snippet + (bestPosition + snippet.length < text.length ? '...' : '');
}
private extractHighlights(text: string, query: string): string[] {
const queryTerms = query.toLowerCase().split(/\s+/);
const highlights: string[] = [];
const textLower = text.toLowerCase();
queryTerms.forEach(term => {
let startIndex = 0;
while (true) {
const index = textLower.indexOf(term, startIndex);
if (index === -1) break;
// Extract context around the term
const contextStart = Math.max(0, index - 30);
const contextEnd = Math.min(text.length, index + term.length + 30);
const context = text.substring(contextStart, contextEnd);
highlights.push(context);
startIndex = index + term.length;
}
});
return highlights;
}
private checkFTSEnabled(): boolean {
try {
if (!this.db) return false;
this.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_fts'").get();
return true;
} catch {
return false;
}
}
}
// Supporting interfaces and classes
export interface Logger {
info(message: string, meta?: Record<string, unknown>): void;
warn(message: string, meta?: Record<string, unknown>): void;
error(message: string, error?: Error, meta?: Record<string, unknown>): void;
}
export interface MetricsCollector {
recordQuery(sql: string, duration: number): void;
recordOperation(operation: string): void;
recordError(operation: string): void;
}
export class ConsoleLogger implements Logger {
info(message: string, meta?: Record<string, unknown>): void {
console.log(`ℹ️ ${message}`, meta ? JSON.stringify(meta, null, 2) : '');
}
warn(message: string, meta?: Record<string, unknown>): void {
console.warn(`⚠️ ${message}`, meta ? JSON.stringify(meta, null, 2) : '');
}
error(message: string, error?: Error, meta?: Record<string, unknown>): void {
console.error(`❌ ${message}`, error?.message || error, meta ? JSON.stringify(meta, null, 2) : '');
}
}
export class NoOpMetrics implements MetricsCollector {
recordQuery(): void {}
recordOperation(): void {}
recordError(): void {}
}
// Usage example:
/*
const client = new SQLiteClient('./data/rag.db');
// Initialize the client
const initResult = await client.initialize();
if (initResult.isErr()) {
console.error('Failed to initialize client:', initResult.error);
return;
}
// Add a document with smart chunking
const addResult = await client.addDocument(
'https://example.com/doc1',
'My Document',
'This is the content...',
JSON.stringify({ category: 'tech', author: 'John Doe' }),
{
maxChunkSize: 800,
overlapSize: 50,
preserveStructure: true,
splitOnSentences: true
}
);
if (addResult.isErr()) {
console.error('Failed to add document:', addResult.error);
return;
}
// Search with advanced options
const searchResult = await client.search('example query', {
limit: 10,
includeSnippets: true,
highlightTerms: true,
minScore: 0.5,
filters: { category: 'tech' }
});
if (searchResult.isErr()) {
console.error('Search failed:', searchResult.error);
return;
}
console.log('Search results:', searchResult.value);
// Clean up
await client.close();
*/