Skip to main content
Glama
sqlite-client.ts37.1 kB
import { Database } from 'better-sqlite3'; import { z } from 'zod'; import { Result, ok, err } from 'neverthrow'; import crypto from 'crypto'; import path from 'path'; import fs from 'fs/promises'; // Type-safe schemas const DocumentRecordSchema = z.object({ id: z.number().int().positive(), uri: z.string().url(), title: z.string().min(1), mtime: z.number().int().positive(), hash: z.string().min(1), metadata: z.string().nullable().optional(), created_at: z.string(), updated_at: z.string().nullable().optional() }); const ChunkRecordSchema = z.object({ id: z.number().int().positive(), doc_id: z.number().int().positive(), text: z.string().min(1), section: z.string().default('main'), offset: z.number().int().min(0), lang: z.string().default('auto'), hash: z.string().min(1), created_at: z.string(), token_count: z.number().int().min(0).optional(), embedding_dim: z.number().int().positive().optional() }); export type DocumentRecord = z.infer<typeof DocumentRecordSchema>; export type ChunkRecord = z.infer<typeof ChunkRecordSchema>; export interface SearchResult { readonly text: string; readonly section: string; readonly uri: string; readonly title: string; readonly score: number; snippet?: string; highlights?: string[]; } export interface SearchOptions { readonly limit?: number; readonly offset?: number; readonly filters?: Record<string, unknown>; readonly includeSnippets?: boolean; readonly highlightTerms?: boolean; readonly minScore?: number; } export interface ChunkingOptions { readonly maxChunkSize?: number; readonly overlapSize?: number; readonly preserveStructure?: boolean; readonly splitOnSentences?: boolean; readonly minChunkSize?: number; } // Custom errors export class SQLiteError extends Error { constructor( message: string, public readonly code: string, public readonly query?: string, public readonly cause?: Error ) { super(message); this.name = 'SQLiteError'; } } export class SQLiteClient { private db: Database | null = null; private readonly dbPath: string; private readonly logger: Logger; private readonly metrics: MetricsCollector; private isInitialized = false; // Prepared statements for better performance private statements = { getDocument: null as any, addDocument: null as any, updateDocument: null as any, deleteDocument: null as any, getChunks: null as any, addChunk: null as any, searchFTS: null as any, searchSimple: null as any }; constructor( dbPath?: string, logger?: Logger, metrics?: MetricsCollector ) { this.dbPath = dbPath || './data/rag.db'; this.logger = logger || new ConsoleLogger(); this.metrics = metrics || new NoOpMetrics(); } async initialize(): Promise<Result<void, SQLiteError>> { try { if (this.isInitialized) return ok(undefined); this.logger.info('Initializing SQLite database', { path: this.dbPath }); // Ensure directory exists await fs.mkdir(path.dirname(this.dbPath), { recursive: true }); // Import better-sqlite3 dynamically to handle potential missing dependency const { default: Database } = await import('better-sqlite3'); this.db = new Database(this.dbPath, { verbose: (sql: string) => this.logger.info('SQL Query', { sql }), fileMustExist: false }); // Enable WAL mode for better concurrent access if (this.db) { this.db.pragma('journal_mode = WAL'); this.db.pragma('synchronous = NORMAL'); this.db.pragma('cache_size = 10000'); this.db.pragma('temp_store = memory'); this.db.pragma('mmap_size = 268435456'); // 256MB } await this.createSchema(); await this.prepareStatements(); this.isInitialized = true; this.logger.info('SQLite database initialized successfully'); return ok(undefined); } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.logger.error('Failed to initialize SQLite database', errorObj); return err(new SQLiteError( `Failed to initialize database: ${errorObj.message}`, 'INIT_ERROR', undefined, errorObj )); } } async close(): Promise<void> { if (this.db) { try { // Close prepared statements Object.values(this.statements).forEach(stmt => { if (stmt && typeof stmt.finalize === 'function') { try { stmt.finalize(); } catch (finalizeError) { // Ignore finalize errors } } }); this.db.close(); this.db = null; this.isInitialized = false; this.logger.info('Database connection closed'); } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.logger.error('Error closing database', errorObj); } } } async query<T>(sql: string, params: any[] = []): Promise<Result<T[], SQLiteError>> { const startTime = Date.now(); try { if (!this.db) { const initResult = await this.initialize(); if (initResult.isErr()) return err(initResult.error); } this.logger.info('Executing query', { sql: sql.substring(0, 100), paramCount: params.length }); const stmt = this.db!.prepare(sql); const results = stmt.all(...params) as T[]; this.metrics.recordQuery(sql, Date.now() - startTime); return ok(results); } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.metrics.recordError('query'); this.logger.error('Query execution failed', errorObj, { sql, params }); return err(new SQLiteError( `Query failed: ${errorObj.message}`, 'QUERY_ERROR', sql, errorObj )); } } async execute(sql: string, params: any[] = []): Promise<Result<void, SQLiteError>> { const startTime = Date.now(); try { if (!this.db) { const initResult = await this.initialize(); if (initResult.isErr()) return err(initResult.error); } this.logger.info('Executing statement', { sql: sql.substring(0, 100), paramCount: params.length }); const stmt = this.db!.prepare(sql); stmt.run(...params); this.metrics.recordQuery(sql, Date.now() - startTime); return ok(undefined); } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.metrics.recordError('execute'); this.logger.error('Statement execution failed', errorObj, { sql, params }); return err(new SQLiteError( `Execution failed: ${errorObj.message}`, 'EXECUTE_ERROR', sql, errorObj )); } } async getDocuments(filters?: Record<string, unknown>): Promise<Result<DocumentRecord[], SQLiteError>> { try { let sql = ` SELECT id, uri, title, mtime, hash, metadata, created_at, updated_at FROM docs `; const params: any[] = []; if (filters && Object.keys(filters).length > 0) { const conditions: string[] = []; Object.entries(filters).forEach(([key, value]) => { conditions.push(`JSON_EXTRACT(metadata, '$.${key}') = ?`); params.push(value); }); sql += ` WHERE ${conditions.join(' AND ')}`; } sql += ' ORDER BY created_at DESC'; const result = await this.query<DocumentRecord>(sql, params); if (result.isErr()) return result; // Validate results const validatedResults = result.value.map(doc => { const validation = DocumentRecordSchema.safeParse(doc); if (!validation.success) { this.logger.warn('Invalid document record', { doc, errors: validation.error.issues }); return null; } return validation.data; }).filter((doc): doc is DocumentRecord => doc !== null); return ok(validatedResults); } catch (error) { return err(new SQLiteError( `Failed to get documents: ${error instanceof Error ? error.message : String(error)}`, 'GET_DOCUMENTS_ERROR', undefined, error instanceof Error ? error : undefined )); } } async getDocument(uri: string): Promise<Result<DocumentRecord | null, SQLiteError>> { try { if (!this.statements.getDocument) { await this.prepareStatements(); } if (!this.statements.getDocument) { return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR')); } const results = this.statements.getDocument.all(uri) as DocumentRecord[]; if (results.length === 0) { return ok(null); } const validation = DocumentRecordSchema.safeParse(results[0]); if (!validation.success) { return err(new SQLiteError( `Invalid document data: ${validation.error.message}`, 'VALIDATION_ERROR' )); } return ok(validation.data); } catch (error) { return err(new SQLiteError( `Failed to get document: ${error instanceof Error ? error.message : String(error)}`, 'GET_DOCUMENT_ERROR', undefined, error instanceof Error ? error : undefined )); } } async addDocument( uri: string, title: string, content: string, metadata?: string, options?: ChunkingOptions ): Promise<Result<number, SQLiteError>> { try { if (!this.db) { return err(new SQLiteError('Database not initialized', 'DB_NOT_INITIALIZED')); } if (!this.statements.addDocument) { await this.prepareStatements(); } if (!this.statements.addDocument) { return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR')); } const mtime = Math.floor(Date.now() / 1000); const hash = this.generateSecureHash(content); // Insert document const insertResult = this.statements.addDocument.run( uri, title, mtime, hash, metadata || null ); const docId = insertResult.lastInsertRowid as number; // Chunk the content const chunks = this.chunkText(content, options); // Insert chunks const addChunkStmt = this.db.prepare(` INSERT INTO chunks (doc_id, text, section, offset, lang, hash, token_count, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now')) `); chunks.forEach((chunk, index) => { const chunkHash = this.generateSecureHash(chunk.text); const tokenCount = this.estimateTokenCount(chunk.text); addChunkStmt.run( docId, chunk.text, chunk.section, chunk.offset, 'auto', chunkHash, tokenCount ); }); this.metrics.recordOperation('addDocument'); this.logger.info('Document added successfully', { uri, docId }); return ok(docId); } catch (error) { this.metrics.recordError('addDocument'); return err(new SQLiteError( `Failed to add document: ${error instanceof Error ? error.message : String(error)}`, 'ADD_DOCUMENT_ERROR', undefined, error instanceof Error ? error : undefined )); } } async updateDocument( uri: string, title: string, content: string, metadata?: string, options?: ChunkingOptions ): Promise<Result<void, SQLiteError>> { try { if (!this.db) { return err(new SQLiteError('Database not initialized', 'DB_NOT_INITIALIZED')); } if (!this.statements.getDocument || !this.statements.updateDocument) { await this.prepareStatements(); } if (!this.statements.getDocument || !this.statements.updateDocument) { return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR')); } // Get existing document const existingDoc = this.statements.getDocument.get(uri) as DocumentRecord | undefined; if (!existingDoc) { return err(new SQLiteError('Document not found', 'DOCUMENT_NOT_FOUND')); } const mtime = Math.floor(Date.now() / 1000); const hash = this.generateSecureHash(content); // Update document this.statements.updateDocument.run(title, mtime, hash, metadata || null, uri); // Delete old chunks and their embeddings this.db.prepare('DELETE FROM chunk_vecs WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_id = ?)').run(existingDoc.id); this.db.prepare('DELETE FROM chunks WHERE doc_id = ?').run(existingDoc.id); // Add new chunks const chunks = this.chunkText(content, options); const addChunkStmt = this.db.prepare(` INSERT INTO chunks (doc_id, text, section, offset, lang, hash, token_count, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now')) `); chunks.forEach((chunk, index) => { const chunkHash = this.generateSecureHash(chunk.text); const tokenCount = this.estimateTokenCount(chunk.text); addChunkStmt.run( existingDoc.id, chunk.text, chunk.section, chunk.offset, 'auto', chunkHash, tokenCount ); }); this.metrics.recordOperation('updateDocument'); this.logger.info('Document updated successfully', { uri }); return ok(undefined); } catch (error) { this.metrics.recordError('updateDocument'); return err(new SQLiteError( `Failed to update document: ${error instanceof Error ? error.message : String(error)}`, 'UPDATE_DOCUMENT_ERROR', undefined, error instanceof Error ? error : undefined )); } } async deleteDocument(uri: string): Promise<Result<void, SQLiteError>> { try { if (!this.statements.deleteDocument) { await this.prepareStatements(); } if (!this.statements.deleteDocument) { return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR')); } const result = this.statements.deleteDocument.run(uri); if (result.changes === 0) { return err(new SQLiteError('Document not found', 'DOCUMENT_NOT_FOUND')); } this.metrics.recordError('deleteDocument'); this.logger.info('Document deleted successfully', { uri }); return ok(undefined); } catch (error) { this.metrics.recordError('deleteDocument'); return err(new SQLiteError( `Failed to delete document: ${error instanceof Error ? error.message : String(error)}`, 'DELETE_DOCUMENT_ERROR', undefined, error instanceof Error ? error : undefined )); } } async search(query: string, options: SearchOptions = {}): Promise<Result<SearchResult[], SQLiteError>> { const { limit = 10, offset = 0, includeSnippets = true, highlightTerms = true, minScore = 0.1 } = options; try { // Try FTS search first let results = await this.performFTSSearch(query, limit, offset); if (results.length === 0) { // Fallback to simple search results = await this.performSimpleSearch(query, limit, offset); } // Filter by minimum score const filteredResults = results.filter(r => r.score >= minScore); // Add snippets and highlights if requested if (includeSnippets || highlightTerms) { filteredResults.forEach(result => { if (includeSnippets) { result.snippet = this.generateSnippet(result.text, query, 200); } if (highlightTerms) { result.highlights = this.extractHighlights(result.text, query); } }); } this.metrics.recordOperation('search'); this.logger.info('Search completed', { query, resultsCount: filteredResults.length }); return ok(filteredResults); } catch (error) { this.metrics.recordError('search'); return err(new SQLiteError( `Search failed: ${error instanceof Error ? error.message : String(error)}`, 'SEARCH_ERROR', undefined, error instanceof Error ? error : undefined )); } } async getDocumentChunks(docId: number): Promise<Result<ChunkRecord[], SQLiteError>> { try { if (!this.statements.getChunks) { await this.prepareStatements(); } if (!this.statements.getChunks) { return err(new SQLiteError('Failed to prepare statements', 'STATEMENT_PREPARE_ERROR')); } const results = this.statements.getChunks.all(docId) as ChunkRecord[]; // Validate results const validatedResults = results.map(chunk => { const validation = ChunkRecordSchema.safeParse(chunk); if (!validation.success) { this.logger.warn('Invalid chunk record', { chunk, errors: validation.error.issues }); return null; } return validation.data; }).filter((chunk): chunk is ChunkRecord => chunk !== null); return ok(validatedResults); } catch (error) { return err(new SQLiteError( `Failed to get document chunks: ${error instanceof Error ? error.message : String(error)}`, 'GET_CHUNKS_ERROR', undefined, error instanceof Error ? error : undefined )); } } async isConnected(): Promise<boolean> { try { if (!this.db) return false; this.db.prepare('SELECT 1').get(); return true; } catch { return false; } } async getStats(): Promise<Result<{ documents: number; chunks: number; embeddings: number; dbSize: number; ftsEnabled: boolean; }, SQLiteError>> { try { if (!this.db) { return err(new SQLiteError('Database not initialized', 'DB_NOT_INITIALIZED')); } const stats = { documents: (this.db.prepare('SELECT COUNT(*) as count FROM docs').get() as any).count, chunks: (this.db.prepare('SELECT COUNT(*) as count FROM chunks').get() as any).count, embeddings: (this.db.prepare('SELECT COUNT(*) as count FROM chunk_vecs').get() as any).count, dbSize: (await fs.stat(this.dbPath)).size, ftsEnabled: this.checkFTSEnabled() }; return ok(stats); } catch (error) { return err(new SQLiteError( `Failed to get stats: ${error instanceof Error ? error.message : String(error)}`, 'GET_STATS_ERROR', undefined, error instanceof Error ? error : undefined )); } } // Private helper methods private async createSchema(): Promise<void> { if (!this.db) return; this.logger.info('Initializing database schema with migrations...'); // Создаем таблицу для отслеживания миграций this.db.exec(` CREATE TABLE IF NOT EXISTS schema_migrations ( version INTEGER PRIMARY KEY, applied_at DATETIME DEFAULT CURRENT_TIMESTAMP, description TEXT ); `); // Применяем миграции await this.migrate(); } private async migrate(): Promise<void> { if (!this.db) return; const currentVersion = await this.getSchemaVersion(); this.logger.info(`Current schema version: ${currentVersion}`); // Миграция 1: Базовая схема if (currentVersion < 1) { this.logger.info('Applying migration 1: Base schema'); this.db.exec(` -- Documents table CREATE TABLE IF NOT EXISTS docs ( id INTEGER PRIMARY KEY AUTOINCREMENT, uri TEXT UNIQUE NOT NULL, title TEXT NOT NULL, mtime INTEGER NOT NULL, hash TEXT NOT NULL, metadata TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Chunks table CREATE TABLE IF NOT EXISTS chunks ( id INTEGER PRIMARY KEY AUTOINCREMENT, doc_id INTEGER NOT NULL, text TEXT NOT NULL, section TEXT DEFAULT 'main', offset INTEGER NOT NULL, lang TEXT DEFAULT 'auto', hash TEXT NOT NULL, token_count INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (doc_id) REFERENCES docs(id) ON DELETE CASCADE ); -- Vector embeddings table CREATE TABLE IF NOT EXISTS chunk_vecs ( id INTEGER PRIMARY KEY AUTOINCREMENT, chunk_id INTEGER NOT NULL, dim INTEGER NOT NULL, vec BLOB NOT NULL, model TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, UNIQUE(chunk_id), FOREIGN KEY (chunk_id) REFERENCES chunks(id) ON DELETE CASCADE ); -- FTS5 virtual table CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5( text, content='chunks', content_rowid='id', tokenize='porter unicode61' ); -- Indexes CREATE INDEX IF NOT EXISTS idx_docs_uri ON docs(uri); CREATE INDEX IF NOT EXISTS idx_docs_hash ON docs(hash); CREATE INDEX IF NOT EXISTS idx_docs_created_at ON docs(created_at); CREATE INDEX IF NOT EXISTS idx_chunks_doc_id ON chunks(doc_id); CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash); CREATE INDEX IF NOT EXISTS idx_chunks_section ON chunks(section); CREATE INDEX IF NOT EXISTS idx_chunk_vecs_model ON chunk_vecs(model); -- Triggers CREATE TRIGGER IF NOT EXISTS chunks_fts_insert AFTER INSERT ON chunks BEGIN INSERT INTO chunks_fts(rowid, text) VALUES (new.id, new.text); END; CREATE TRIGGER IF NOT EXISTS chunks_fts_delete AFTER DELETE ON chunks BEGIN DELETE FROM chunks_fts WHERE rowid = old.id; END; CREATE TRIGGER IF NOT EXISTS chunks_fts_update AFTER UPDATE ON chunks BEGIN DELETE FROM chunks_fts WHERE rowid = old.id; INSERT INTO chunks_fts(rowid, text) VALUES (new.id, new.text); END; CREATE TRIGGER IF NOT EXISTS docs_updated_at AFTER UPDATE ON docs BEGIN UPDATE docs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; `); await this.setSchemaVersion(1, 'Base schema with docs, chunks, embeddings, and FTS'); } // Миграция 2: Добавление колонки updated_at если её нет if (currentVersion < 2) { this.logger.info('Applying migration 2: Add updated_at column to docs'); try { // Проверяем, есть ли колонка updated_at const columnCheck = this.db.prepare("PRAGMA table_info(docs)").all(); const hasUpdatedAt = columnCheck.some((col: any) => col.name === 'updated_at'); if (!hasUpdatedAt) { this.db.exec('ALTER TABLE docs ADD COLUMN updated_at DATETIME'); // Обновляем существующие записи this.db.exec('UPDATE docs SET updated_at = created_at WHERE updated_at IS NULL'); this.logger.info('Added updated_at column to docs table'); } else { this.logger.info('updated_at column already exists in docs table'); } } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.logger.warn('Migration 2 warning:', { error: errorObj.message }); } await this.setSchemaVersion(2, 'Added updated_at column to docs table'); } // Миграция 3: Добавление недостающих колонок в chunks if (currentVersion < 3) { this.logger.info('Applying migration 3: Add missing columns to chunks table'); try { // Проверяем, есть ли колонка token_count const columnCheck = this.db.prepare("PRAGMA table_info(chunks)").all(); const hasTokenCount = columnCheck.some((col: any) => col.name === 'token_count'); if (!hasTokenCount) { this.db.exec('ALTER TABLE chunks ADD COLUMN token_count INTEGER DEFAULT 0'); this.logger.info('Added token_count column to chunks table'); } else { this.logger.info('token_count column already exists in chunks table'); } } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.logger.warn('Migration 3 warning:', { error: errorObj.message }); } await this.setSchemaVersion(3, 'Added missing columns to chunks table'); } this.logger.info('Database migrations completed'); } private async getSchemaVersion(): Promise<number> { if (!this.db) return 0; try { const result = this.db.prepare('SELECT MAX(version) as version FROM schema_migrations').get() as any; return result?.version || 0; } catch { return 0; } } private async setSchemaVersion(version: number, description: string): Promise<void> { if (!this.db) return; try { this.db.prepare(` INSERT OR REPLACE INTO schema_migrations (version, description, applied_at) VALUES (?, ?, CURRENT_TIMESTAMP) `).run(version, description); this.logger.info(`Schema version updated to ${version}: ${description}`); } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.logger.error('Failed to update schema version:', errorObj); } } private async prepareStatements(): Promise<void> { if (!this.db) return; this.statements.getDocument = this.db.prepare( 'SELECT * FROM docs WHERE uri = ? LIMIT 1' ); this.statements.addDocument = this.db.prepare(` INSERT INTO docs (uri, title, mtime, hash, metadata) VALUES (?, ?, ?, ?, ?) `); this.statements.updateDocument = this.db.prepare(` UPDATE docs SET title = ?, mtime = ?, hash = ?, metadata = ?, updated_at = CURRENT_TIMESTAMP WHERE uri = ? `); this.statements.deleteDocument = this.db.prepare( 'DELETE FROM docs WHERE uri = ?' ); this.statements.getChunks = this.db.prepare( 'SELECT * FROM chunks WHERE doc_id = ? ORDER BY offset' ); this.statements.searchFTS = this.db.prepare(` SELECT c.text, c.section, d.uri, d.title, bm25(chunks_fts) as score FROM chunks_fts JOIN chunks c ON chunks_fts.rowid = c.id JOIN docs d ON c.doc_id = d.id WHERE chunks_fts MATCH ? ORDER BY score LIMIT ? OFFSET ? `); this.statements.searchSimple = this.db.prepare(` SELECT c.text, c.section, d.uri, d.title, 1.0 as score FROM chunks c JOIN docs d ON c.doc_id = d.id WHERE c.text LIKE ? ORDER BY c.created_at DESC LIMIT ? OFFSET ? `); } private async performFTSSearch(query: string, limit: number, offset: number): Promise<SearchResult[]> { try { const results = this.statements.searchFTS.all(query, limit, offset) as any[]; return results.map(r => ({ text: r.text, section: r.section, uri: r.uri, title: r.title, score: Math.abs(r.score) // BM25 can be negative })); } catch (error) { const errorObj = error instanceof Error ? error : new Error(String(error)); this.logger.warn('FTS search failed, falling back to simple search', { error: errorObj.message }); return []; } } private async performSimpleSearch(query: string, limit: number, offset: number): Promise<SearchResult[]> { if (!this.statements.searchSimple) return []; const results = this.statements.searchSimple.all(`%${query}%`, limit, offset) as any[]; return results.map(r => ({ text: r.text, section: r.section, uri: r.uri, title: r.title, score: r.score })); } private chunkText(text: string, options: ChunkingOptions = {}): Array<{ text: string; section: string; offset: number; }> { const { maxChunkSize = 1000, overlapSize = 100, preserveStructure = true, splitOnSentences = true, minChunkSize = 100 } = options; const chunks: Array<{ text: string; section: string; offset: number }> = []; if (preserveStructure) { // Smart chunking that preserves document structure return this.smartChunkText(text, maxChunkSize, overlapSize, minChunkSize); } else { // Simple sliding window chunking return this.simpleChunkText(text, maxChunkSize, overlapSize, minChunkSize); } } private smartChunkText(text: string, maxChunkSize: number, overlapSize: number, minChunkSize: number) { const chunks: Array<{ text: string; section: string; offset: number }> = []; const lines = text.split('\n'); let currentChunk = ''; let currentSection = 'main'; let offset = 0; for (const line of lines) { const trimmedLine = line.trim(); // Detect section headers if (trimmedLine.match(/^#{1,6}\s+/) || trimmedLine.match(/^[A-Z][^.]*:$/)) { // Save current chunk if it's substantial if (currentChunk.trim().length >= minChunkSize) { chunks.push({ text: currentChunk.trim(), section: currentSection, offset: offset - currentChunk.length }); } currentSection = trimmedLine.replace(/^#+\s*/, '').replace(/:$/, '').toLowerCase(); currentChunk = trimmedLine + '\n'; } else if ((currentChunk + line + '\n').length > maxChunkSize && currentChunk.length >= minChunkSize) { // Chunk is getting too large, save it chunks.push({ text: currentChunk.trim(), section: currentSection, offset: offset - currentChunk.length }); // Start new chunk with overlap const overlapText = this.getOverlapText(currentChunk, overlapSize); currentChunk = overlapText + line + '\n'; } else { currentChunk += line + '\n'; } offset += line.length + 1; } // Add final chunk if (currentChunk.trim().length >= minChunkSize) { chunks.push({ text: currentChunk.trim(), section: currentSection, offset: offset - currentChunk.length }); } return chunks; } private simpleChunkText(text: string, maxChunkSize: number, overlapSize: number, minChunkSize: number) { const chunks: Array<{ text: string; section: string; offset: number }> = []; let offset = 0; while (offset < text.length) { const chunkEnd = Math.min(offset + maxChunkSize, text.length); let chunk = text.substring(offset, chunkEnd); // Try to break at sentence boundary if (chunkEnd < text.length) { const lastSentence = chunk.lastIndexOf('.'); const lastNewline = chunk.lastIndexOf('\n'); const breakPoint = Math.max(lastSentence, lastNewline); if (breakPoint > offset + minChunkSize) { chunk = text.substring(offset, offset + breakPoint + 1); } } if (chunk.trim().length >= minChunkSize) { chunks.push({ text: chunk.trim(), section: 'main', offset }); } offset += chunk.length - overlapSize; } return chunks; } private getOverlapText(text: string, overlapSize: number): string { if (text.length <= overlapSize) return text; const overlap = text.substring(text.length - overlapSize); const lastSentence = overlap.lastIndexOf('.'); return lastSentence > 0 ? overlap.substring(lastSentence + 1) : overlap; } private generateSecureHash(text: string): string { return crypto.createHash('sha256').update(text, 'utf8').digest('hex').substring(0, 16); } private estimateTokenCount(text: string): number { // Rough estimate: ~4 characters per token for English text return Math.ceil(text.length / 4); } private generateSnippet(text: string, query: string, maxLength: number): string { const queryTerms = query.toLowerCase().split(/\s+/); const textLower = text.toLowerCase(); // Find the best position to start the snippet let bestPosition = 0; let bestScore = 0; for (let i = 0; i <= text.length - maxLength; i += 50) { const snippet = textLower.substring(i, i + maxLength); const score = queryTerms.reduce((acc, term) => { return acc + (snippet.includes(term) ? 1 : 0); }, 0); if (score > bestScore) { bestScore = score; bestPosition = i; } } let snippet = text.substring(bestPosition, bestPosition + maxLength); // Try to start and end at word boundaries if (bestPosition > 0) { const firstSpace = snippet.indexOf(' '); if (firstSpace > 0) { snippet = snippet.substring(firstSpace + 1); } } const lastSpace = snippet.lastIndexOf(' '); if (lastSpace > 0 && bestPosition + maxLength < text.length) { snippet = snippet.substring(0, lastSpace); } return snippet + (bestPosition + snippet.length < text.length ? '...' : ''); } private extractHighlights(text: string, query: string): string[] { const queryTerms = query.toLowerCase().split(/\s+/); const highlights: string[] = []; const textLower = text.toLowerCase(); queryTerms.forEach(term => { let startIndex = 0; while (true) { const index = textLower.indexOf(term, startIndex); if (index === -1) break; // Extract context around the term const contextStart = Math.max(0, index - 30); const contextEnd = Math.min(text.length, index + term.length + 30); const context = text.substring(contextStart, contextEnd); highlights.push(context); startIndex = index + term.length; } }); return highlights; } private checkFTSEnabled(): boolean { try { if (!this.db) return false; this.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_fts'").get(); return true; } catch { return false; } } } // Supporting interfaces and classes export interface Logger { info(message: string, meta?: Record<string, unknown>): void; warn(message: string, meta?: Record<string, unknown>): void; error(message: string, error?: Error, meta?: Record<string, unknown>): void; } export interface MetricsCollector { recordQuery(sql: string, duration: number): void; recordOperation(operation: string): void; recordError(operation: string): void; } export class ConsoleLogger implements Logger { info(message: string, meta?: Record<string, unknown>): void { console.log(`ℹ️ ${message}`, meta ? JSON.stringify(meta, null, 2) : ''); } warn(message: string, meta?: Record<string, unknown>): void { console.warn(`⚠️ ${message}`, meta ? JSON.stringify(meta, null, 2) : ''); } error(message: string, error?: Error, meta?: Record<string, unknown>): void { console.error(`❌ ${message}`, error?.message || error, meta ? JSON.stringify(meta, null, 2) : ''); } } export class NoOpMetrics implements MetricsCollector { recordQuery(): void {} recordOperation(): void {} recordError(): void {} } // Usage example: /* const client = new SQLiteClient('./data/rag.db'); // Initialize the client const initResult = await client.initialize(); if (initResult.isErr()) { console.error('Failed to initialize client:', initResult.error); return; } // Add a document with smart chunking const addResult = await client.addDocument( 'https://example.com/doc1', 'My Document', 'This is the content...', JSON.stringify({ category: 'tech', author: 'John Doe' }), { maxChunkSize: 800, overlapSize: 50, preserveStructure: true, splitOnSentences: true } ); if (addResult.isErr()) { console.error('Failed to add document:', addResult.error); return; } // Search with advanced options const searchResult = await client.search('example query', { limit: 10, includeSnippets: true, highlightTerms: true, minScore: 0.5, filters: { category: 'tech' } }); if (searchResult.isErr()) { console.error('Search failed:', searchResult.error); return; } console.log('Search results:', searchResult.value); // Clean up await client.close(); */

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Galiusbro/MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server