Skip to main content
Glama
lex-tools

@lex-tools/codebase-context-dumper

Official
by lex-tools

dump_codebase_context

Extract and structure codebase content by scanning directories, respecting .gitignore, and organizing files with headers for LLM context analysis.

Instructions

Recursively reads text files from a specified directory, respecting .gitignore rules and skipping binary files. Concatenates content with file path headers/footers. Supports chunking the output for large codebases.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
base_pathYesThe absolute path to the project directory to scan.
num_chunksNoOptional total number of chunks to divide the output into (default: 1).
chunk_indexNoOptional 1-based index of the chunk to return (default: 1). Requires num_chunks > 1.

Implementation Reference

  • The primary handler function for executing the 'dump_codebase_context' tool. It validates input parameters, recursively finds files while respecting .gitignore, skips binary files, supports output chunking, and concatenates file contents with path headers and footers.
    server.setRequestHandler(CallToolRequestSchema, async (request): Promise<ToolHandlerReturnType> => {
      if (request.params.name !== "dump_codebase_context") {
        throw new McpError(
          ErrorCode.MethodNotFound,
          `Unknown tool: ${request.params.name}`
        );
      }
    
      // Declare variables outside the try block
      let combinedContent = "";
      let filesProcessed = 0;
      let filesSkippedBinary = 0;
      // filesSkippedSize removed
    
      const args = request.params.arguments;
    
      // Validate arguments
      if (typeof args?.base_path !== "string" || args.base_path.trim() === "") {
        throw new McpError(
          ErrorCode.InvalidParams,
          "Missing or invalid required parameter: base_path (must be a non-empty string)"
        );
      }
      const basePath = path.resolve(args.base_path); // Ensure absolute path
    
      const numChunks = typeof args?.num_chunks === 'number' && args.num_chunks > 0 ? Math.floor(args.num_chunks) : 1;
      const chunkIndex = typeof args?.chunk_index === 'number' && args.chunk_index > 0 ? Math.floor(args.chunk_index) : 1;
    
      if (chunkIndex > numChunks) {
         throw new McpError(ErrorCode.InvalidParams, `chunk_index (${chunkIndex}) cannot be greater than num_chunks (${numChunks})`);
      }
    
      // maxSize removed
    
      try {
        // Check if base_path exists and is a directory
        const stats = await fs.stat(basePath);
        if (!stats.isDirectory()) {
          throw new McpError(
            ErrorCode.InvalidParams,
            `Provided base_path is not a directory: ${basePath}`
          );
        }
    
        const initialIg = getInitialIgnoreInstance();
        // Start the recursive search from the base path with the initial ignore instance
        const allFiles = await findFiles(basePath, initialIg, basePath);
    
        // --- Calculate total size and file details first ---
        let totalSize = 0;
        const fileDetails: Array<{ path: string; size: number; content: string | null }> = [];
        for (const filePath of allFiles) {
            const details = await calculateFileSizeWithOverhead(filePath, basePath);
            if (details.content !== null) { // Only include non-binary, readable files
                totalSize += details.size;
                fileDetails.push({ path: filePath, size: details.size, content: details.content });
                filesProcessed++; // Count files identified initially
            } else {
                filesSkippedBinary++; // Count binary/unreadable files skipped
            }
        }
        filesProcessed = 0; // Reset for actual content processing count
    
        // --- Determine chunk boundaries ---
        let targetChunkSize = totalSize;
        let startByte = 0;
        let endByte = totalSize;
    
        if (numChunks > 1) {
            targetChunkSize = Math.ceil(totalSize / numChunks); // Use ceil to ensure coverage
            startByte = (chunkIndex - 1) * targetChunkSize;
            endByte = chunkIndex * targetChunkSize;
            console.info(`Chunking: ${numChunks} chunks, returning chunk ${chunkIndex} (bytes ${startByte}-${Math.min(endByte, totalSize)} of ${totalSize})`);
        } else {
             console.info(`Not chunking. Total size: ${totalSize} bytes.`);
        }
    
    
        // --- Process files based on chunk ---
        let currentCumulativeSize = 0;
        let currentChunkSize = 0;
    
        for (const detail of fileDetails) {
            const fileStartByte = currentCumulativeSize;
            const fileEndByte = currentCumulativeSize + detail.size;
    
            let includeFile = false;
            if (numChunks > 1) {
                // Include if the file *starts* within the target chunk range
                if (fileStartByte >= startByte && fileStartByte < endByte) {
                     includeFile = true;
                }
            } else {
                // No chunking, include all non-binary/readable files
                includeFile = true;
            }
    
            if (includeFile && detail.content) {
                combinedContent += detail.content;
                currentChunkSize += detail.size;
                filesProcessed++;
            }
    
            currentCumulativeSize = fileEndByte; // Always advance cumulative size
    
            // If chunking, stop if we've passed the end byte for the current chunk
            // (This prevents adding files that start in the next chunk)
            if (numChunks > 1 && currentCumulativeSize >= endByte && chunkIndex < numChunks) {
                 break;
            }
        }
    
    
        const summary = `Processed ${filesProcessed} files for chunk ${chunkIndex}/${numChunks}. Skipped ${filesSkippedBinary} binary/unreadable files initially. Total chunk size: ${currentChunkSize} bytes.`;
        console.info(summary); // Log summary to server console for debugging
    
        // Ensure the return value matches the ToolHandlerReturnType
        return {
          content: [
            {
              type: "text",
              text: combinedContent,
            },
          ],
        };
      } catch (error: any) {
        console.error("Error in get_codebase_context:", error);
        if (error instanceof McpError) {
          throw error; // Re-throw known MCP errors
        }
        // Throw generic internal error for unexpected issues
        throw new McpError(
          ErrorCode.InternalError,
          `Failed to get codebase context: ${error.message}`
        );
      }
    });
  • src/index.ts:43-76 (registration)
    Tool registration in the ListTools handler, defining the name, description, and input schema for 'dump_codebase_context'.
    server.setRequestHandler(ListToolsRequestSchema, async () => {
      return {
        tools: [
          {
            name: "dump_codebase_context",
            description: "Recursively reads text files from a specified directory, respecting .gitignore rules and skipping binary files. Concatenates content with file path headers/footers. Supports chunking the output for large codebases.",
            inputSchema: {
              type: "object",
              properties: {
                base_path: {
                  type: "string",
                  description:
                    "The absolute path to the project directory to scan.",
                },
                // max_total_size_bytes removed
                num_chunks: {
                  type: "integer",
                  description: "Optional total number of chunks to divide the output into (default: 1).",
                  minimum: 1,
                  default: 1,
                },
                chunk_index: {
                  type: "integer",
                  description: "Optional 1-based index of the chunk to return (default: 1). Requires num_chunks > 1.",
                  minimum: 1,
                  default: 1,
                },
              },
              required: ["base_path"],
            },
          },
        ],
      };
    });
  • Input schema definition for the 'dump_codebase_context' tool, specifying parameters like base_path, num_chunks, and chunk_index.
    inputSchema: {
      type: "object",
      properties: {
        base_path: {
          type: "string",
          description:
            "The absolute path to the project directory to scan.",
        },
        // max_total_size_bytes removed
        num_chunks: {
          type: "integer",
          description: "Optional total number of chunks to divide the output into (default: 1).",
          minimum: 1,
          default: 1,
        },
        chunk_index: {
          type: "integer",
          description: "Optional 1-based index of the chunk to return (default: 1). Requires num_chunks > 1.",
          minimum: 1,
          default: 1,
        },
      },
      required: ["base_path"],
    },
  • Helper function to recursively find all non-ignored files in the directory tree, respecting nested .gitignore rules.
    async function findFiles(
      currentDir: string, // Current directory being scanned
      parentIg: ignore.Ignore, // ignore object from parent (or initial)
      basePath: string // The original base path requested by the user
    ): Promise<string[]> {
      let results: string[] = [];
      let currentIg = parentIg; // Start with parent's ignore rules
    
      // Check for .gitignore in the current directory
      const gitignorePath = path.join(currentDir, ".gitignore");
      try {
        const content = await fs.readFile(gitignorePath, "utf-8");
        // Create a new ignore instance for this level, adding parent rules first, then current rules.
        currentIg = ignore().add(parentIg); // Add parent patterns
        currentIg.add(content); // Add current directory patterns
        // console.info(`Loaded .gitignore from ${gitignorePath}`); // Optional logging
      } catch (error: any) {
        if (error.code !== "ENOENT") {
          // Log errors other than 'file not found'
          console.error(`Error reading .gitignore at ${gitignorePath}:`, error);
        }
        // If no .gitignore here or error reading, currentIg remains parentIg
      }
    
      const list = await fs.readdir(currentDir, { withFileTypes: true });
    
      for (const dirent of list) {
        const fullPath = path.join(currentDir, dirent.name);
        // Path relative to the *current* directory for ignore checking, as per Git behavior.
        const relativeToCurrentDir = dirent.name;
    
        // Check ignore rules using the *current* effective ignore instance
        if (currentIg.ignores(relativeToCurrentDir)) {
          // console.debug(`Ignoring ${relativeToCurrentDir} based on rules in/above ${currentDir}`); // Optional logging
          continue; // Skip ignored files/directories
        }
    
        if (dirent.isDirectory()) {
          // Pass the potentially updated currentIg down recursively
          results = results.concat(await findFiles(fullPath, currentIg, basePath));
        } else {
          // It's a file that's not ignored, add its full path
          results.push(fullPath);
        }
      }
      return results;
    }
  • Helper to calculate the size of file content including headers/footers and detect binary files.
    async function calculateFileSizeWithOverhead(filePath: string, basePath: string): Promise<{ size: number; content: string | null }> {
      try {
        const fileBuffer = await fs.readFile(filePath);
        if (await isBinaryFile(fileBuffer)) {
          return { size: 0, content: null }; // Skip binary files
        }
        const fileContent = fileBuffer.toString("utf-8");
        const relativePath = path.relative(basePath, filePath);
        const header = `--- START: ${relativePath} ---\n`;
        const footer = `\n--- END: ${relativePath} ---\n\n`;
        const contentToAdd = header + fileContent + footer;
        const size = Buffer.byteLength(contentToAdd, "utf-8");
        return { size, content: contentToAdd };
      } catch (e) {
        console.error(`Error reading file ${filePath} for size calculation:`, e);
        return { size: 0, content: null }; // Skip files that can't be read
      }
    }
Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lex-tools/codebase-context-dumper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server