Skip to main content
Glama
lex-tools

@lex-tools/codebase-context-dumper

Official
by lex-tools

dump_codebase_context

Extract and structure codebase content by scanning directories, respecting .gitignore, and organizing files with headers for LLM context analysis.

Instructions

Recursively reads text files from a specified directory, respecting .gitignore rules and skipping binary files. Concatenates content with file path headers/footers. Supports chunking the output for large codebases.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
base_pathYesThe absolute path to the project directory to scan.
num_chunksNoOptional total number of chunks to divide the output into (default: 1).
chunk_indexNoOptional 1-based index of the chunk to return (default: 1). Requires num_chunks > 1.

Implementation Reference

  • The primary handler function for executing the 'dump_codebase_context' tool. It validates input parameters, recursively finds files while respecting .gitignore, skips binary files, supports output chunking, and concatenates file contents with path headers and footers.
    server.setRequestHandler(CallToolRequestSchema, async (request): Promise<ToolHandlerReturnType> => {
      if (request.params.name !== "dump_codebase_context") {
        throw new McpError(
          ErrorCode.MethodNotFound,
          `Unknown tool: ${request.params.name}`
        );
      }
    
      // Declare variables outside the try block
      let combinedContent = "";
      let filesProcessed = 0;
      let filesSkippedBinary = 0;
      // filesSkippedSize removed
    
      const args = request.params.arguments;
    
      // Validate arguments
      if (typeof args?.base_path !== "string" || args.base_path.trim() === "") {
        throw new McpError(
          ErrorCode.InvalidParams,
          "Missing or invalid required parameter: base_path (must be a non-empty string)"
        );
      }
      const basePath = path.resolve(args.base_path); // Ensure absolute path
    
      const numChunks = typeof args?.num_chunks === 'number' && args.num_chunks > 0 ? Math.floor(args.num_chunks) : 1;
      const chunkIndex = typeof args?.chunk_index === 'number' && args.chunk_index > 0 ? Math.floor(args.chunk_index) : 1;
    
      if (chunkIndex > numChunks) {
         throw new McpError(ErrorCode.InvalidParams, `chunk_index (${chunkIndex}) cannot be greater than num_chunks (${numChunks})`);
      }
    
      // maxSize removed
    
      try {
        // Check if base_path exists and is a directory
        const stats = await fs.stat(basePath);
        if (!stats.isDirectory()) {
          throw new McpError(
            ErrorCode.InvalidParams,
            `Provided base_path is not a directory: ${basePath}`
          );
        }
    
        const initialIg = getInitialIgnoreInstance();
        // Start the recursive search from the base path with the initial ignore instance
        const allFiles = await findFiles(basePath, initialIg, basePath);
    
        // --- Calculate total size and file details first ---
        let totalSize = 0;
        const fileDetails: Array<{ path: string; size: number; content: string | null }> = [];
        for (const filePath of allFiles) {
            const details = await calculateFileSizeWithOverhead(filePath, basePath);
            if (details.content !== null) { // Only include non-binary, readable files
                totalSize += details.size;
                fileDetails.push({ path: filePath, size: details.size, content: details.content });
                filesProcessed++; // Count files identified initially
            } else {
                filesSkippedBinary++; // Count binary/unreadable files skipped
            }
        }
        filesProcessed = 0; // Reset for actual content processing count
    
        // --- Determine chunk boundaries ---
        let targetChunkSize = totalSize;
        let startByte = 0;
        let endByte = totalSize;
    
        if (numChunks > 1) {
            targetChunkSize = Math.ceil(totalSize / numChunks); // Use ceil to ensure coverage
            startByte = (chunkIndex - 1) * targetChunkSize;
            endByte = chunkIndex * targetChunkSize;
            console.info(`Chunking: ${numChunks} chunks, returning chunk ${chunkIndex} (bytes ${startByte}-${Math.min(endByte, totalSize)} of ${totalSize})`);
        } else {
             console.info(`Not chunking. Total size: ${totalSize} bytes.`);
        }
    
    
        // --- Process files based on chunk ---
        let currentCumulativeSize = 0;
        let currentChunkSize = 0;
    
        for (const detail of fileDetails) {
            const fileStartByte = currentCumulativeSize;
            const fileEndByte = currentCumulativeSize + detail.size;
    
            let includeFile = false;
            if (numChunks > 1) {
                // Include if the file *starts* within the target chunk range
                if (fileStartByte >= startByte && fileStartByte < endByte) {
                     includeFile = true;
                }
            } else {
                // No chunking, include all non-binary/readable files
                includeFile = true;
            }
    
            if (includeFile && detail.content) {
                combinedContent += detail.content;
                currentChunkSize += detail.size;
                filesProcessed++;
            }
    
            currentCumulativeSize = fileEndByte; // Always advance cumulative size
    
            // If chunking, stop if we've passed the end byte for the current chunk
            // (This prevents adding files that start in the next chunk)
            if (numChunks > 1 && currentCumulativeSize >= endByte && chunkIndex < numChunks) {
                 break;
            }
        }
    
    
        const summary = `Processed ${filesProcessed} files for chunk ${chunkIndex}/${numChunks}. Skipped ${filesSkippedBinary} binary/unreadable files initially. Total chunk size: ${currentChunkSize} bytes.`;
        console.info(summary); // Log summary to server console for debugging
    
        // Ensure the return value matches the ToolHandlerReturnType
        return {
          content: [
            {
              type: "text",
              text: combinedContent,
            },
          ],
        };
      } catch (error: any) {
        console.error("Error in get_codebase_context:", error);
        if (error instanceof McpError) {
          throw error; // Re-throw known MCP errors
        }
        // Throw generic internal error for unexpected issues
        throw new McpError(
          ErrorCode.InternalError,
          `Failed to get codebase context: ${error.message}`
        );
      }
    });
  • src/index.ts:43-76 (registration)
    Tool registration in the ListTools handler, defining the name, description, and input schema for 'dump_codebase_context'.
    server.setRequestHandler(ListToolsRequestSchema, async () => {
      return {
        tools: [
          {
            name: "dump_codebase_context",
            description: "Recursively reads text files from a specified directory, respecting .gitignore rules and skipping binary files. Concatenates content with file path headers/footers. Supports chunking the output for large codebases.",
            inputSchema: {
              type: "object",
              properties: {
                base_path: {
                  type: "string",
                  description:
                    "The absolute path to the project directory to scan.",
                },
                // max_total_size_bytes removed
                num_chunks: {
                  type: "integer",
                  description: "Optional total number of chunks to divide the output into (default: 1).",
                  minimum: 1,
                  default: 1,
                },
                chunk_index: {
                  type: "integer",
                  description: "Optional 1-based index of the chunk to return (default: 1). Requires num_chunks > 1.",
                  minimum: 1,
                  default: 1,
                },
              },
              required: ["base_path"],
            },
          },
        ],
      };
    });
  • Input schema definition for the 'dump_codebase_context' tool, specifying parameters like base_path, num_chunks, and chunk_index.
    inputSchema: {
      type: "object",
      properties: {
        base_path: {
          type: "string",
          description:
            "The absolute path to the project directory to scan.",
        },
        // max_total_size_bytes removed
        num_chunks: {
          type: "integer",
          description: "Optional total number of chunks to divide the output into (default: 1).",
          minimum: 1,
          default: 1,
        },
        chunk_index: {
          type: "integer",
          description: "Optional 1-based index of the chunk to return (default: 1). Requires num_chunks > 1.",
          minimum: 1,
          default: 1,
        },
      },
      required: ["base_path"],
    },
  • Helper function to recursively find all non-ignored files in the directory tree, respecting nested .gitignore rules.
    async function findFiles(
      currentDir: string, // Current directory being scanned
      parentIg: ignore.Ignore, // ignore object from parent (or initial)
      basePath: string // The original base path requested by the user
    ): Promise<string[]> {
      let results: string[] = [];
      let currentIg = parentIg; // Start with parent's ignore rules
    
      // Check for .gitignore in the current directory
      const gitignorePath = path.join(currentDir, ".gitignore");
      try {
        const content = await fs.readFile(gitignorePath, "utf-8");
        // Create a new ignore instance for this level, adding parent rules first, then current rules.
        currentIg = ignore().add(parentIg); // Add parent patterns
        currentIg.add(content); // Add current directory patterns
        // console.info(`Loaded .gitignore from ${gitignorePath}`); // Optional logging
      } catch (error: any) {
        if (error.code !== "ENOENT") {
          // Log errors other than 'file not found'
          console.error(`Error reading .gitignore at ${gitignorePath}:`, error);
        }
        // If no .gitignore here or error reading, currentIg remains parentIg
      }
    
      const list = await fs.readdir(currentDir, { withFileTypes: true });
    
      for (const dirent of list) {
        const fullPath = path.join(currentDir, dirent.name);
        // Path relative to the *current* directory for ignore checking, as per Git behavior.
        const relativeToCurrentDir = dirent.name;
    
        // Check ignore rules using the *current* effective ignore instance
        if (currentIg.ignores(relativeToCurrentDir)) {
          // console.debug(`Ignoring ${relativeToCurrentDir} based on rules in/above ${currentDir}`); // Optional logging
          continue; // Skip ignored files/directories
        }
    
        if (dirent.isDirectory()) {
          // Pass the potentially updated currentIg down recursively
          results = results.concat(await findFiles(fullPath, currentIg, basePath));
        } else {
          // It's a file that's not ignored, add its full path
          results.push(fullPath);
        }
      }
      return results;
    }
  • Helper to calculate the size of file content including headers/footers and detect binary files.
    async function calculateFileSizeWithOverhead(filePath: string, basePath: string): Promise<{ size: number; content: string | null }> {
      try {
        const fileBuffer = await fs.readFile(filePath);
        if (await isBinaryFile(fileBuffer)) {
          return { size: 0, content: null }; // Skip binary files
        }
        const fileContent = fileBuffer.toString("utf-8");
        const relativePath = path.relative(basePath, filePath);
        const header = `--- START: ${relativePath} ---\n`;
        const footer = `\n--- END: ${relativePath} ---\n\n`;
        const contentToAdd = header + fileContent + footer;
        const size = Buffer.byteLength(contentToAdd, "utf-8");
        return { size, content: contentToAdd };
      } catch (e) {
        console.error(`Error reading file ${filePath} for size calculation:`, e);
        return { size: 0, content: null }; // Skip files that can't be read
      }
    }

Tool Definition Quality

Score is being calculated. Check back soon.

Install Server

Other Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lex-tools/codebase-context-dumper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server