Knowledge Base MCP Server

  • src
// FaissIndexManager.ts import * as fsp from 'fs/promises'; import * as fs from 'fs'; import * as path from 'path'; import { HuggingFaceInferenceEmbeddings } from "@langchain/community/embeddings/hf"; import { FaissStore } from "@langchain/community/vectorstores/faiss"; import { Document } from "@langchain/core/documents"; import { MarkdownTextSplitter } from "langchain/text_splitter"; import { calculateSHA256 } from './utils.js'; import { KNOWLEDGE_BASES_ROOT_DIR, FAISS_INDEX_PATH, HUGGINGFACE_MODEL_NAME } from './config.js'; const MODEL_NAME_FILE = path.join(FAISS_INDEX_PATH, 'model_name.txt'); export class FaissIndexManager { private faissIndex: FaissStore | null = null; private embeddings: HuggingFaceInferenceEmbeddings; private modelName: string; constructor() { const huggingFaceApiKey = process.env.HUGGINGFACE_API_KEY; if (!huggingFaceApiKey) { throw new Error('HUGGINGFACE_API_KEY environment variable is required'); } this.modelName = HUGGINGFACE_MODEL_NAME; this.embeddings = new HuggingFaceInferenceEmbeddings({ apiKey: huggingFaceApiKey, model: this.modelName, }); console.log("Initializing FaissIndexManager"); } async initialize(): Promise<void> { try { if (!fs.existsSync(FAISS_INDEX_PATH)) { await fsp.mkdir(FAISS_INDEX_PATH, { recursive: true }); } const indexFilePath = path.join(FAISS_INDEX_PATH, "faiss.index"); let storedModelName: string | null = null; try { storedModelName = fs.existsSync(MODEL_NAME_FILE) ? (await fsp.readFile(MODEL_NAME_FILE, 'utf-8')) : null; } catch (error) { console.warn("Error reading stored model name:", error); } if (storedModelName && storedModelName !== this.modelName) { console.warn(`Model name has changed from ${storedModelName} to ${this.modelName}. Recreating index.`); if (fs.existsSync(indexFilePath)) { await fsp.unlink(indexFilePath); console.log("Existing FAISS index deleted."); } this.faissIndex = null; // Ensure index is recreated } if (fs.existsSync(indexFilePath)) { console.log("Loading existing FAISS index from:", indexFilePath); this.faissIndex = await FaissStore.load(indexFilePath, this.embeddings); console.log("FAISS index loaded."); } else { console.log("FAISS index file not found at", indexFilePath, ". It will be created if documents are available."); this.faissIndex = null; } // Save the current model name for future checks await fsp.writeFile(MODEL_NAME_FILE, this.modelName, 'utf-8'); } catch (error: any) { console.error("Error initializing FAISS index:", error); console.error(error.stack); throw error; } } /** * Updates the FAISS index. * If `specificKnowledgeBase` is provided, only files from that knowledge base will be checked and updated. * If no update occurs (and the FAISS index remains uninitialized) but there are documents, * then the index is built from all available files. */ async updateIndex(specificKnowledgeBase?: string): Promise<void> { console.log("Updating FAISS index..."); try { let knowledgeBases: string[] = []; if (specificKnowledgeBase) { knowledgeBases.push(specificKnowledgeBase); } else { knowledgeBases = await fsp.readdir(KNOWLEDGE_BASES_ROOT_DIR); } let anyFileProcessed = false; // Process each knowledge base directory. for (const knowledgeBaseName of knowledgeBases) { if (knowledgeBaseName.startsWith('.')) { console.log(`Skipping dot folder: ${knowledgeBaseName}`); continue; } const knowledgeBasePath = path.join(KNOWLEDGE_BASES_ROOT_DIR, knowledgeBaseName); const files = await fsp.readdir(knowledgeBasePath); for (const file of files) { // Skip hidden files (starting with '.') and dot-folders. if (file.startsWith('.')) { console.log(`Skipping dot file/folder: ${file}`); continue; } const filePath = path.join(knowledgeBasePath, file); let stats; try { stats = await fsp.stat(filePath); } catch (error) { console.error(`Error getting stats for ${filePath}: ${error}`); continue; } if (!stats.isFile()) { console.log(`Skipping non-file: ${filePath}`); continue; } anyFileProcessed = true; const fileHash = await calculateSHA256(filePath); const indexDirPath = path.join(knowledgeBasePath, ".index"); const indexFilePath = path.join(indexDirPath, file); if (!fs.existsSync(indexDirPath)) { await fsp.mkdir(indexDirPath, { recursive: true }); } let storedHash: string | null = null; try { const buffer = await fsp.readFile(indexFilePath); storedHash = buffer.toString('utf-8'); } catch (error) { // The hash file may not exist yet; that's fine. } // If the file is new or has changed, process it. if (fileHash !== storedHash) { console.log(`File ${filePath} has changed. Updating index...`); let content = ""; try { content = await fsp.readFile(filePath, 'utf-8'); } catch (error: any) { console.error(`Error reading file ${filePath}:`, error); continue; } let documentsToAdd: Document[] = []; if (path.extname(file).toLowerCase() === '.md') { const splitter = new MarkdownTextSplitter({ chunkSize: 1000, chunkOverlap: 200, keepSeparator: false, }); documentsToAdd = await splitter.createDocuments([content], [{ source: filePath }]); } else { documentsToAdd = [ new Document({ pageContent: content, metadata: { source: filePath }, }), ]; } if (documentsToAdd.length > 0) { if (this.faissIndex === null) { console.log("Creating new FAISS index from texts..."); this.faissIndex = await FaissStore.fromTexts( documentsToAdd.map(doc => doc.pageContent), documentsToAdd.map(doc => doc.metadata), this.embeddings ); } else { await this.faissIndex.addDocuments(documentsToAdd); } const indexFileSavePath = path.join(FAISS_INDEX_PATH, "faiss.index"); try { await this.faissIndex.save(indexFileSavePath); console.log("FAISS index saved successfully to", indexFileSavePath); } catch (saveError: any) { if (saveError.code === 'EISDIR') { console.error(`Error: Attempted to save FAISS index to a directory (${FAISS_INDEX_PATH}) instead of a file.`); } else { console.error("Error saving FAISS index:", saveError); } throw saveError; } await fsp.writeFile(indexFilePath, fileHash, { encoding: 'utf-8' }); console.log(`Index updated for ${filePath}.`); } else { console.log(`No documents generated from ${filePath}. Skipping index update.`); } } else { console.log(`File ${filePath} unchanged, skipping.`); } } } // If at least one file was processed but no changes triggered index creation, // then attempt to build the FAISS index from all available documents. if (this.faissIndex === null && anyFileProcessed) { console.log("No updates detected but FAISS index is not initialized. Building index from all available documents..."); let allDocuments: Document[] = []; for (const knowledgeBaseName of knowledgeBases) { const knowledgeBasePath = path.join(KNOWLEDGE_BASES_ROOT_DIR, knowledgeBaseName); const files = await fsp.readdir(knowledgeBasePath); for (const file of files) { if (file.startsWith('.')) continue; const filePath = path.join(knowledgeBasePath, file); let stats; try { stats = await fsp.stat(filePath); } catch (error) { console.error(`Error getting stats for ${filePath}: ${error}`); continue; } if (!stats.isFile()) continue; let content = ""; try { content = await fsp.readFile(filePath, 'utf-8'); } catch (error) { console.error(`Error reading file ${filePath}:`, error); continue; } let documents: Document[]; if (path.extname(file).toLowerCase() === '.md') { const splitter = new MarkdownTextSplitter({ chunkSize: 1000, chunkOverlap: 200, keepSeparator: false, }); documents = await splitter.createDocuments([content], [{ source: filePath }]); } else { documents = [ new Document({ pageContent: content, metadata: { source: filePath }, }), ]; } if (documents.length > 0) { allDocuments.push(...documents); } } } if (allDocuments.length > 0) { this.faissIndex = await FaissStore.fromTexts( allDocuments.map(doc => doc.pageContent), allDocuments.map(doc => doc.metadata), this.embeddings ); const indexFileSavePath = path.join(FAISS_INDEX_PATH, "faiss.index"); try { await this.faissIndex.save(indexFileSavePath); console.log("FAISS index saved successfully to", indexFileSavePath); } catch (saveError: any) { console.error("Error saving FAISS index:", saveError); throw saveError; } } } console.log("FAISS index update process completed."); } catch (error: any) { console.error("Error updating FAISS index:", error); console.error(error.stack); throw error; } } /** * Performs a similarity search and returns the results with their similarity scores. */ async similaritySearch(query: string, k: number, threshold: number = 2) { if (!this.faissIndex) { throw new Error("FAISS index is not initialized"); } const filter = { score: { $lte: threshold } }; // Use the vector store's method that returns [DocumentInterface, number] tuples. const resultsWithScore = await this.faissIndex.similaritySearchWithScore(query, k, filter); // Map the tuple into an object that includes the score. return resultsWithScore.map(([doc, score]) => ({ ...doc, score, })); } }