AutoDev Codebase MCP Server

autodev-codebase
src
tree-sitter

index.ts•13.6 KiB

import { LanguageParser, loadRequiredLanguageParsers } from "./languageParser" import { parseMarkdown } from "./markdownParser" import { IFileSystem } from "../abstractions/core" import { IWorkspace, IPathUtils } from "../abstractions/workspace" /** * Dependencies for tree-sitter parsing functions */ export interface TreeSitterDependencies { fileSystem: IFileSystem workspace: IWorkspace pathUtils: IPathUtils } // Private constant const DEFAULT_MIN_COMPONENT_LINES_VALUE = 4 // Getter function for MIN_COMPONENT_LINES (for easier testing) let currentMinComponentLines = DEFAULT_MIN_COMPONENT_LINES_VALUE /** * Get the current minimum number of lines for a component to be included */ export function getMinComponentLines(): number { return currentMinComponentLines } /** * Set the minimum number of lines for a component (for testing) */ export function setMinComponentLines(value: number): void { currentMinComponentLines = value } const extensions = [ "tla", "js", "mjs", "jsx", "ts", "vue", "tsx", "py", // Rust "rs", "go", // C "c", "h", // C++ "cpp", "hpp", // C# "cs", // Ruby "rb", "java", "php", "swift", // Solidity "sol", // Kotlin "kt", "kts", // Elixir "ex", "exs", // Elisp "el", // HTML "html", "htm", // Markdown "md", "markdown", // JSON "json", // CSS "css", // SystemRDL "rdl", // OCaml "ml", "mli", // Lua "lua", // Scala "scala", // TOML "toml", // Zig "zig", // Elm "elm", // Embedded Template "ejs", "erb", // Visual Basic .NET "vb", ].map((e) => `.${e}`) export { extensions } export async function parseSourceCodeDefinitionsForFile( filePath: string, dependencies: TreeSitterDependencies, ): Promise<string | undefined> { // check if the file exists const fileExists = await dependencies.fileSystem.exists(filePath) if (!fileExists) { return "This file does not exist or you do not have permission to access it." } // Get file extension to determine parser const ext = dependencies.pathUtils.extname(filePath).toLowerCase() // Check if the file extension is supported if (!extensions.includes(ext)) { return undefined } // Special case for markdown files if (ext === ".md" || ext === ".markdown") { // Check if we have permission to access this file if (await dependencies.workspace.shouldIgnore(filePath)) { return undefined } // Read file content const fileContentArray = await dependencies.fileSystem.readFile(filePath) const fileContent = new TextDecoder().decode(fileContentArray) // Split the file content into individual lines const lines = fileContent.split("\n") // Parse markdown content to get captures const markdownCaptures = parseMarkdown(fileContent) // Process the captures const markdownDefinitions = processCaptures(markdownCaptures, lines, "markdown") if (markdownDefinitions) { return `# ${dependencies.pathUtils.basename(filePath)}\n${markdownDefinitions}` } return undefined } // For other file types, load parser and use tree-sitter const languageParsers = await loadRequiredLanguageParsers([filePath]) // Parse the file if we have a parser for it const definitions = await parseFile(filePath, languageParsers, dependencies) if (definitions) { return `# ${dependencies.pathUtils.basename(filePath)}\n${definitions}` } return undefined } // TODO: implement caching behavior to avoid having to keep analyzing project for new tasks. export async function parseSourceCodeForDefinitionsTopLevel( dirPath: string, dependencies: TreeSitterDependencies, ): Promise<string> { // check if the path exists const dirExists = await dependencies.fileSystem.exists(dirPath) if (!dirExists) { return "This directory does not exist or you do not have permission to access it." } // Get all files at top level using workspace const allFiles = await dependencies.workspace.findFiles("**/*", undefined) let result = "" // Separate files to parse and remaining files const { filesToParse } = separateFiles(allFiles, dependencies.pathUtils) // Filter filepaths for access using workspace const allowedFilesToParse: string[] = [] for (const file of filesToParse) { if (!(await dependencies.workspace.shouldIgnore(file))) { allowedFilesToParse.push(file) } } // Separate markdown files from other files const markdownFiles: string[] = [] const otherFiles: string[] = [] for (const file of allowedFilesToParse) { const ext = dependencies.pathUtils.extname(file).toLowerCase() if (ext === ".md" || ext === ".markdown") { markdownFiles.push(file) } else { otherFiles.push(file) } } // Load language parsers only for non-markdown files const languageParsers = await loadRequiredLanguageParsers(otherFiles) // Process markdown files for (const file of markdownFiles) { // Check if we have permission to access this file if (await dependencies.workspace.shouldIgnore(file)) { continue } try { // Read file content const fileContentArray = await dependencies.fileSystem.readFile(file) const fileContent = new TextDecoder().decode(fileContentArray) // Split the file content into individual lines const lines = fileContent.split("\n") // Parse markdown content to get captures const markdownCaptures = parseMarkdown(fileContent) // Process the captures const markdownDefinitions = processCaptures(markdownCaptures, lines, "markdown") if (markdownDefinitions) { const relativePath = dependencies.pathUtils.relative(dirPath, file) result += `# ${relativePath}\n${markdownDefinitions}\n` } } catch (error) { console.log(`Error parsing markdown file: ${error}\n`) } } // Process other files using tree-sitter for (const file of otherFiles) { const definitions = await parseFile(file, languageParsers, dependencies) if (definitions) { const relativePath = dependencies.pathUtils.relative(dirPath, file) result += `# ${relativePath}\n${definitions}\n` } } return result ? result : "No source code definitions found." } function separateFiles(allFiles: string[], pathUtils: IPathUtils): { filesToParse: string[]; remainingFiles: string[] } { const filesToParse = allFiles.filter((file) => extensions.includes(pathUtils.extname(file))).slice(0, 50) // 50 files max const remainingFiles = allFiles.filter((file) => !filesToParse.includes(file)) return { filesToParse, remainingFiles } } /* Parsing files using tree-sitter 1. Parse the file content into an AST (Abstract Syntax Tree) using the appropriate language grammar (set of rules that define how the components of a language like keywords, expressions, and statements can be combined to create valid programs). 2. Create a query using a language-specific query string, and run it against the AST's root node to capture specific syntax elements. - We use tag queries to identify named entities in a program, and then use a syntax capture to label the entity and its name. A notable example of this is GitHub's search-based code navigation. - Our custom tag queries are based on tree-sitter's default tag queries, but modified to only capture definitions. 3. Sort the captures by their position in the file, output the name of the definition, and format by i.e. adding "|----\n" for gaps between captured sections. This approach allows us to focus on the most relevant parts of the code (defined by our language-specific queries) and provides a concise yet informative view of the file's structure and key elements. - https://github.com/tree-sitter/node-tree-sitter/blob/master/test/query_test.js - https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/query-test.js - https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js - https://tree-sitter.github.io/tree-sitter/code-navigation-systems */ /** * Parse a file and extract code definitions using tree-sitter * * @param filePath - Path to the file to parse * @param languageParsers - Map of language parsers * @param rooIgnoreController - Optional controller to check file access permissions * @returns A formatted string with code definitions or null if no definitions found */ /** * Process captures from tree-sitter or markdown parser * * @param captures - The captures to process * @param lines - The lines of the file * @param minComponentLines - Minimum number of lines for a component to be included * @returns A formatted string with definitions */ function processCaptures(captures: any[], lines: string[], language: string): string | null { // No definitions found if (captures.length === 0) { return null } let formattedOutput = "" // Sort captures by their start position captures.sort((a, b) => a.node.startPosition.row - b.node.startPosition.row) // Track already processed lines to avoid duplicates const processedLines = new Set<string>() const promoteToLineStartAncestor = (node: any): any => { let current = node const startRow = current?.startPosition?.row if (typeof startRow !== "number") return current // Prefer the highest ancestor that starts on the same line as the capture. // This typically maps `name.definition.*` captures back to their containing // definition node while keeping the output anchored to the correct line. while ( current?.parent && typeof current.parent.startPosition?.row === "number" && current.parent.startPosition.row === startRow ) { current = current.parent } return current } // First pass - categorize captures by type captures.forEach((capture) => { const { node, name } = capture // Skip captures that don't represent definitions or docstrings if (!name.includes("definition") && !name.includes("name") && name !== "docstring") { return } // For name captures (e.g. `name.definition.*`), promote to the nearest // containing node that starts on the same line so we can show the full // construct users expect (and tests rely on). const isNameDefinitionCapture = typeof name === "string" && name.includes("name.definition") // For docstrings, use the actual node. // For definitions, use the definition node itself. const definitionNode = name === "docstring" || name === "doc" ? node : isNameDefinitionCapture ? promoteToLineStartAncestor(node) : node if (!definitionNode) return // Get the start and end lines of the definition const startLine = definitionNode.startPosition.row const endLine = definitionNode.endPosition.row const lineCount = endLine - startLine + 1 // Prefer showing the first non-empty line within the captured range. // This avoids outputting blank lines (common in fixtures that start with // a leading newline), while keeping the original end range. let displayStartLine = startLine while (displayStartLine <= endLine && (lines[displayStartLine] ?? "").trim() === "") { displayStartLine++ } if (displayStartLine > endLine) { return } // Skip components that don't span enough lines if (lineCount < getMinComponentLines()) { return } // Create unique key for this definition based on line range // This ensures we don't output the same line range multiple times const lineKey = `${displayStartLine}-${endLine}` // Skip already processed lines if (processedLines.has(lineKey)) { return } // Check if this is a valid component definition (not an HTML element) const startLineContent = lines[displayStartLine].trim() // Special handling for docstrings if (name === "docstring") { // For docstrings, only show the docstring itself const docstringEndLine = node.endPosition.row const docstringLineCount = docstringEndLine - startLine + 1 // Only include if the docstring spans at least the minimum lines if (docstringLineCount >= getMinComponentLines()) { const docstringKey = `${startLine}-${docstringEndLine}` if (!processedLines.has(docstringKey)) { formattedOutput += `${startLine + 1}--${docstringEndLine + 1} | ${lines[startLine]}\n` processedLines.add(docstringKey) } } return } // For other component definitions (classes, functions, etc.) formattedOutput += `${displayStartLine + 1}--${endLine + 1} | ${lines[displayStartLine]}\n` processedLines.add(lineKey) }) if (formattedOutput.length > 0) { return formattedOutput } return null } /** * Parse a file and extract code definitions using tree-sitter * * @param filePath - Path to the file to parse * @param languageParsers - Map of language parsers * @param dependencies - Dependencies for file system, workspace, and path operations * @returns A formatted string with code definitions or null if no definitions found */ async function parseFile( filePath: string, languageParsers: LanguageParser, dependencies: TreeSitterDependencies, ): Promise<string | null> { // Check if we have permission to access this file if (await dependencies.workspace.shouldIgnore(filePath)) { return null } // Read file content const fileContentArray = await dependencies.fileSystem.readFile(filePath) const fileContent = new TextDecoder().decode(fileContentArray) const extLang = dependencies.pathUtils.extname(filePath).toLowerCase().slice(1) // Check if we have a parser for this file type const { parser, query } = languageParsers[extLang] || {} if (!parser || !query) { return `Unsupported file type: ${filePath}` } try { // Parse the file content into an Abstract Syntax Tree (AST) const tree = parser.parse(fileContent) // Apply the query to the AST and get the captures const captures = query.captures(tree.rootNode) // Split the file content into individual lines const lines = fileContent.split("\n") // Process the captures return processCaptures(captures, lines, extLang) } catch (error) { console.log(`Error parsing file: ${error}\n`) // Return null on parsing error to avoid showing error messages in the output return null } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/anrgct/autodev-codebase'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•13.6 KiB