Skip to main content
Glama
index.ts9.57 kB
// VectorStore implementation with LanceDB integration import { type Connection, type Table, connect } from '@lancedb/lancedb' // ============================================ // Type Definitions // ============================================ /** * VectorStore configuration */ export interface VectorStoreConfig { /** LanceDB database path */ dbPath: string /** Table name */ tableName: string } /** * Document metadata */ export interface DocumentMetadata { /** File name */ fileName: string /** File size in bytes */ fileSize: number /** File type (extension) */ fileType: string } /** * Vector chunk */ export interface VectorChunk { /** Chunk ID (UUID) */ id: string /** File path (absolute) */ filePath: string /** Chunk index (zero-based) */ chunkIndex: number /** Chunk text */ text: string /** Embedding vector (384 dimensions) */ vector: number[] /** Metadata */ metadata: DocumentMetadata /** Ingestion timestamp (ISO 8601 format) */ timestamp: string } /** * Search result */ export interface SearchResult { /** File path */ filePath: string /** Chunk index */ chunkIndex: number /** Chunk text */ text: string /** Similarity score (0-1, higher means more similar) */ score: number /** Metadata */ metadata: DocumentMetadata } // ============================================ // Error Classes // ============================================ /** * Database error */ export class DatabaseError extends Error { constructor( message: string, public readonly cause?: Error ) { super(message) this.name = 'DatabaseError' } } // ============================================ // VectorStore Class // ============================================ /** * Vector storage class using LanceDB * * Responsibilities: * - LanceDB operations (insert, delete, search) * - Transaction handling (atomicity of delete→insert) * - Metadata management */ export class VectorStore { private db: Connection | null = null private table: Table | null = null private readonly config: VectorStoreConfig constructor(config: VectorStoreConfig) { this.config = config } /** * Initialize LanceDB and create table */ async initialize(): Promise<void> { try { // Connect to LanceDB this.db = await connect(this.config.dbPath) // Check table existence and create if needed const tableNames = await this.db.tableNames() if (tableNames.includes(this.config.tableName)) { // Open existing table this.table = await this.db.openTable(this.config.tableName) console.error(`VectorStore: Opened existing table "${this.config.tableName}"`) } else { // Create new table (schema auto-defined on first data insertion) console.error( `VectorStore: Table "${this.config.tableName}" will be created on first data insertion` ) } console.error(`VectorStore initialized: ${this.config.dbPath}`) } catch (error) { throw new DatabaseError('Failed to initialize VectorStore', error as Error) } } /** * Delete all chunks for specified file path * * @param filePath - File path (absolute) */ async deleteChunks(filePath: string): Promise<void> { if (!this.table) { // If table doesn't exist, no deletion targets, return normally console.error('VectorStore: Skipping deletion as table does not exist') return } try { // Use LanceDB delete API to remove records matching filePath // Escape single quotes to prevent SQL injection const escapedFilePath = filePath.replace(/'/g, "''") // LanceDB's delete method doesn't throw errors if targets don't exist, // so call delete directly // Note: Field names are case-sensitive, use backticks for camelCase fields await this.table.delete(`\`filePath\` = '${escapedFilePath}'`) console.error(`VectorStore: Deleted chunks for file "${filePath}"`) } catch (error) { // If error occurs, output warning log console.warn(`VectorStore: Error occurred while deleting file "${filePath}":`, error) // Don't treat as error if deletion targets don't exist or table is empty // Otherwise throw exception const errorMessage = (error as Error).message.toLowerCase() if ( !errorMessage.includes('not found') && !errorMessage.includes('does not exist') && !errorMessage.includes('no matching') ) { throw new DatabaseError(`Failed to delete chunks for file: ${filePath}`, error as Error) } } } /** * Batch insert vector chunks * * @param chunks - Array of vector chunks */ async insertChunks(chunks: VectorChunk[]): Promise<void> { if (chunks.length === 0) { return } try { if (!this.table) { // Create table on first insertion if (!this.db) { throw new DatabaseError('VectorStore is not initialized. Call initialize() first.') } // LanceDB's createTable API accepts data as Record<string, unknown>[] const records = chunks.map((chunk) => chunk as unknown as Record<string, unknown>) this.table = await this.db.createTable(this.config.tableName, records) console.error(`VectorStore: Created table "${this.config.tableName}"`) } else { // Add data to existing table const records = chunks.map((chunk) => chunk as unknown as Record<string, unknown>) await this.table.add(records) } console.error(`VectorStore: Inserted ${chunks.length} chunks`) } catch (error) { throw new DatabaseError('Failed to insert chunks', error as Error) } } /** * Execute vector search * * @param queryVector - Query vector (384 dimensions) * @param limit - Number of results to retrieve (default 5) * @returns Array of search results (sorted by score descending) */ async search(queryVector: number[], limit = 5): Promise<SearchResult[]> { if (!this.table) { // Return empty array if table doesn't exist console.error('VectorStore: Returning empty results as table does not exist') return [] } if (queryVector.length !== 384) { throw new DatabaseError( `Invalid query vector dimension: expected 384, got ${queryVector.length}` ) } if (limit < 1 || limit > 20) { throw new DatabaseError(`Invalid limit: expected 1-20, got ${limit}`) } try { // Use LanceDB's vector search API const results = await this.table.vectorSearch(queryVector).limit(limit).toArray() // Convert to SearchResult format return results.map((result) => ({ filePath: result.filePath as string, chunkIndex: result.chunkIndex as number, text: result.text as string, score: result._distance as number, // LanceDB returns distance score (closer to 0 means more similar) metadata: result.metadata as DocumentMetadata, })) } catch (error) { throw new DatabaseError('Failed to search vectors', error as Error) } } /** * Get list of ingested files * * @returns Array of file information */ async listFiles(): Promise<{ filePath: string; chunkCount: number; timestamp: string }[]> { if (!this.table) { return [] // Return empty array if table doesn't exist } try { // Retrieve all records const allRecords = await this.table.query().toArray() // Group by file path const fileMap = new Map<string, { chunkCount: number; timestamp: string }>() for (const record of allRecords) { const filePath = record.filePath as string const timestamp = record.timestamp as string if (fileMap.has(filePath)) { const fileInfo = fileMap.get(filePath) if (fileInfo) { fileInfo.chunkCount += 1 // Keep most recent timestamp if (timestamp > fileInfo.timestamp) { fileInfo.timestamp = timestamp } } } else { fileMap.set(filePath, { chunkCount: 1, timestamp }) } } // Convert Map to array of objects return Array.from(fileMap.entries()).map(([filePath, info]) => ({ filePath, chunkCount: info.chunkCount, timestamp: info.timestamp, })) } catch (error) { throw new DatabaseError('Failed to list files', error as Error) } } /** * Get system status * * @returns System status information */ async getStatus(): Promise<{ documentCount: number chunkCount: number memoryUsage: number uptime: number }> { if (!this.table) { return { documentCount: 0, chunkCount: 0, memoryUsage: 0, uptime: process.uptime(), } } try { // Retrieve all records const allRecords = await this.table.query().toArray() const chunkCount = allRecords.length // Count unique file paths const uniqueFilePaths = new Set(allRecords.map((record) => record.filePath as string)) const documentCount = uniqueFilePaths.size // Get memory usage (in MB) const memoryUsage = process.memoryUsage().heapUsed / 1024 / 1024 // Get uptime (in seconds) const uptime = process.uptime() return { documentCount, chunkCount, memoryUsage, uptime, } } catch (error) { throw new DatabaseError('Failed to get status', error as Error) } } }

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shinpr/mcp-local-rag'

If you have feedback or need assistance with the MCP directory API, please join our Discord server