DocuMCP

Overview Schema Related Servers Score Discussions

IndexDirTool.ts•12.6 KiB

import path from "path"; import chalk from "chalk"; import * as filesystem from "../services/filesystem.js"; import { createEmbedding, getEmbeddingDimension, } from "../services/embeddings.js"; import { logger } from "../services/logger.js"; import { collectionExists, createCollection, createPoint, upsertPoints, } from "../services/vectordb.js"; import { createSmartChunks } from "../helper/chunk.js"; /** * Result interface for indexed directory */ interface IndexDirectoryResult { success: boolean; dirPath: string; recursive: boolean; processedFiles: number; totalChunks: number; totalEmbeddingsGenerated: number; totalErrors: number; fileResults: any[]; embeddingDimension: number; filteredExtensions: string[] | "all"; collectionName: string; includeHidden: boolean; message: string; } /** * Tool for recursively indexing all files in a directory */ class IndexDirTool { /** * Process a single file and store its chunks in VectorDB */ private async processFile( filePath: string, collectionName: string, chunkSize: number, chunkOverlap: number, ): Promise<any> { try { // Read file content const readResult = await filesystem.readFile(filePath, "utf-8"); if (!readResult.success) { return { filePath, error: readResult.message, success: false, }; } if (!readResult.data || !readResult.data.content) { logger.warn("File content is empty or not found"); readResult.data = { content: "File content is empty or not found", metadata: { size: 0, created: new Date(), modified: new Date(), accessed: new Date(), extension: path.extname(filePath), filename: path.basename(filePath), directory: path.dirname(filePath), }, }; } const { content, metadata: fileMetadata } = readResult.data; // Create chunks using smart chunking based on file type const fileExtension = path.extname(filePath); const chunks = createSmartChunks(content, fileExtension, { chunkSize, overlapSize: chunkOverlap, paragraphsPerChunk: Math.floor(chunkSize / 150), // Rough estimate: ~150 chars per paragraph overlapParagraphs: Math.max(1, Math.floor(chunkOverlap / 150)), }); if (chunks.length === 0) { return { filePath, totalChunks: 0, embeddingsGenerated: 0, embeddingErrors: 0, storedInDatabase: false, sizeInBytes: Buffer.byteLength(content, "utf-8"), message: "File is empty or contains only whitespace", }; } /*logger.info( chalk.gray( `Processing: ${path.basename(filePath)} (${chunks.length} chunks)` ) );*/ // Generate embeddings for chunks const points = []; let successCount = 0; let errorCount = 0; for (let i = 0; i < chunks.length; i++) { try { const embeddingResult = await createEmbedding(chunks[i]); if (embeddingResult.error) { errorCount++; continue; } // Create point with metadata // Use a numeric ID as required by Qdrant const pointId = Date.now() + i; // Using timestamp + index to ensure uniqueness const point = createPoint(pointId, embeddingResult.embedding, { // Store the original filename-based ID in the payload for reference fileNameId: `${path.basename(filePath).replace(/[^a-z0-9]/gi, "_")}-${i}`, filePath, chunkIndex: i, content: chunks[i], startPosition: i * (chunkSize - chunkOverlap), fileType: path.extname(filePath).slice(1), filename: path.basename(filePath), extension: path.extname(filePath), directory: path.dirname(filePath), ...fileMetadata, }); points.push(point); successCount++; } catch (error) { errorCount++; logger.warn( chalk.yellow( `Warning: Error generating embedding for chunk ${i} in ${filePath}: ${(error as Error).message}`, ), ); } } // Store points in VectorDB const storedInDatabase = points.length > 0 ? await upsertPoints(collectionName, points) : false; return { filePath, totalChunks: chunks.length, embeddingsGenerated: successCount, embeddingErrors: errorCount, storedInDatabase, sizeInBytes: Buffer.byteLength(content, "utf-8"), }; } catch (error) { return { filePath, error: (error as Error).message, success: false, }; } } /** * Indexes all files in a directory for RAG retrieval */ async indexDirectory( dirPath: string, recursive: boolean = true, fileExtensions?: string[], includeHidden: boolean = false, chunkSize: number = 512, chunkOverlap: number = 50, collectionName: string = "codebase", ): Promise<IndexDirectoryResult> { try { const absolutePath = filesystem.resolvePath(dirPath); // Check if directory exists const dirExists = await filesystem.directoryExists(absolutePath); if (!dirExists) { throw new Error(`Directory not found: ${absolutePath}`); } // Get all files in the directory const readResult = await filesystem.readDirectory(absolutePath, { recursive, includeHidden, extensions: fileExtensions || [], }); if (!readResult.success) { throw new Error(readResult.message); } if (!readResult.data || !Array.isArray(readResult.data.entries)) { throw new Error("Invalid directory listing result"); } // Filter to only include files (not directories) const files = readResult.data.entries .filter((entry) => entry.type === "file") .map((entry) => path.join(absolutePath, entry.path)); if (files.length === 0) { return { success: true, dirPath: absolutePath, recursive, processedFiles: 0, totalChunks: 0, totalEmbeddingsGenerated: 0, totalErrors: 0, fileResults: [], embeddingDimension: 0, filteredExtensions: fileExtensions || "all", collectionName, includeHidden, message: `No files found to index in directory: ${absolutePath}`, }; } /*logger.info( chalk.blue(`Found ${files.length} files to process in ${absolutePath}`) );*/ // Get embedding dimension from configuration const embeddingDimension = getEmbeddingDimension(); // Ensure collection exists if (!(await collectionExists(collectionName))) { const created = await createCollection( collectionName, embeddingDimension, ); if (!created) { throw new Error(`Failed to create collection ${collectionName}`); } } // Process files const fileResults = []; let totalChunks = 0; let totalEmbeddingsGenerated = 0; let totalErrors = 0; for (const filePath of files) { const result = await this.processFile( filePath, collectionName, chunkSize, chunkOverlap, ); fileResults.push(result); if (result.totalChunks) totalChunks += result.totalChunks; if (result.embeddingsGenerated) totalEmbeddingsGenerated += result.embeddingsGenerated; if (result.embeddingErrors) totalErrors += result.embeddingErrors; if (result.error) totalErrors++; } return { success: true, dirPath: absolutePath, recursive, processedFiles: files.length, totalChunks, totalEmbeddingsGenerated, totalErrors, fileResults: fileResults.length <= 10 ? fileResults : [`${fileResults.length} files processed`], embeddingDimension, filteredExtensions: fileExtensions || "all", collectionName, includeHidden, message: `Directory indexed successfully: ${files.length} files with ${totalChunks} chunks and ${totalEmbeddingsGenerated} embeddings`, }; } catch (error) { logger.error( chalk.red(`Error indexing directory ${dirPath}:`), error as Error, ); throw new Error(`Failed to index directory: ${(error as Error).message}`); } } /** * Process index directory request - main entry point for the tool */ processIndexDirectory(input: any) { try { const { dirPath, recursive = true, fileExtensions, includeHidden = false, chunkSize = 512, chunkOverlap = 50, collectionName = "codebase", } = input; if (!dirPath || typeof dirPath !== "string") { throw new Error("Invalid dirPath: must be a string"); } // Log formatted information logger.info(` ${chalk.blue("🔍 Indexing Directory:")} ${dirPath} ${chalk.gray("├─")} Recursive: ${recursive ? chalk.green("✓") : chalk.red("✗")} ${chalk.gray("├─")} Include Hidden: ${ includeHidden ? chalk.green("✓") : chalk.red("✗") } ${chalk.gray("├─")} Chunk Size: ${chalk.yellow(chunkSize)} ${chalk.gray("├─")} Chunk Overlap: ${chalk.yellow(chunkOverlap)} ${chalk.gray("├─")} Collection: ${chalk.yellow(collectionName)} ${chalk.gray("└─")} Extensions: ${ fileExtensions ? chalk.yellow(fileExtensions.join(", ")) : chalk.gray("all") } `); // Execute the directory indexing operation return this.indexDirectory( dirPath, recursive, fileExtensions, includeHidden, chunkSize, chunkOverlap, collectionName, ) .then((result) => ({ content: [ { type: "text", text: JSON.stringify(result, null, 2), }, ], })) .catch((error) => ({ content: [ { type: "text", text: JSON.stringify( { error: error instanceof Error ? error.message : String(error), status: "failed", }, null, 2, ), }, ], isError: true, })); } catch (error) { return { content: [ { type: "text", text: JSON.stringify( { error: error instanceof Error ? error.message : String(error), status: "failed", }, null, 2, ), }, ], isError: true, }; } } } // Tool definition with improved description const INDEX_DIR_TOOL = { name: "index_directory", description: `Index all files in a directory for retrieval-augmented generation (RAG). Recursively processes files in a directory, chunks their content, and stores in the vector database. Key features: - Recursive directory processing - File type filtering by extension - Automatic chunking with configurable parameters - Progress tracking and error reporting - Hidden file inclusion control Use when you need to: - Index an entire codebase or documentation - Build a comprehensive knowledge base - Prepare multiple files for semantic search Parameters explained: - dirPath: Path to the directory to index (absolute or relative to project root) - recursive: Whether to recursively process subdirectories (default: true) - fileExtensions: Optional array of file extensions to include (e.g. [".js", ".ts"]) - includeHidden: Whether to include hidden files and directories (default: false)`, inputSchema: { type: "object", properties: { dirPath: { type: "string", description: "Directory path to index (absolute or relative to project root)", }, recursive: { type: "boolean", description: "Whether to recursively index subdirectories (default: true)", default: true, }, fileExtensions: { type: "array", items: { type: "string" }, description: 'Array of file extensions to include (e.g., [".js", ".ts"]). Indexes all files if omitted.', }, includeHidden: { type: "boolean", description: "Whether to include hidden files and directories (default: false)", default: false, }, }, required: ["dirPath"], }, }; export { IndexDirTool, INDEX_DIR_TOOL };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/YannickTM/docu-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

IndexDirTool.ts•12.6 KiB