AutoDev Codebase MCP Server

autodev-codebase
src
tree-sitter

index.ts•13.4 kB

import { LanguageParser, loadRequiredLanguageParsers } from "./languageParser" import { parseMarkdown } from "./markdownParser" import { IFileSystem } from "../abstractions/core" import { IWorkspace, IPathUtils } from "../abstractions/workspace" /** * Dependencies for tree-sitter parsing functions */ export interface TreeSitterDependencies { fileSystem: IFileSystem workspace: IWorkspace pathUtils: IPathUtils } // Private constant const DEFAULT_MIN_COMPONENT_LINES_VALUE = 4 // Getter function for MIN_COMPONENT_LINES (for easier testing) let currentMinComponentLines = DEFAULT_MIN_COMPONENT_LINES_VALUE /** * Get the current minimum number of lines for a component to be included */ export function getMinComponentLines(): number { return currentMinComponentLines } /** * Set the minimum number of lines for a component (for testing) */ export function setMinComponentLines(value: number): void { currentMinComponentLines = value } const extensions = [ "tla", "js", "mjs", "jsx", "ts", "vue", "tsx", "py", // Rust "rs", "go", // C "c", "h", // C++ "cpp", "hpp", // C# "cs", // Ruby "rb", "java", "php", "swift", // Solidity "sol", // Kotlin "kt", "kts", // Elixir "ex", "exs", // Elisp "el", // HTML "html", "htm", // Markdown "md", "markdown", // JSON "json", // CSS "css", // SystemRDL "rdl", // OCaml "ml", "mli", // Lua "lua", // Scala "scala", // TOML "toml", // Zig "zig", // Elm "elm", // Embedded Template "ejs", "erb", ].map((e) => `.${e}`) export { extensions } export async function parseSourceCodeDefinitionsForFile( filePath: string, dependencies: TreeSitterDependencies, ): Promise<string | undefined> { // check if the file exists const fileExists = await dependencies.fileSystem.exists(filePath) if (!fileExists) { return "This file does not exist or you do not have permission to access it." } // Get file extension to determine parser const ext = dependencies.pathUtils.extname(filePath).toLowerCase() // Check if the file extension is supported if (!extensions.includes(ext)) { return undefined } // Special case for markdown files if (ext === ".md" || ext === ".markdown") { // Check if we have permission to access this file if (await dependencies.workspace.shouldIgnore(filePath)) { return undefined } // Read file content const fileContentArray = await dependencies.fileSystem.readFile(filePath) const fileContent = new TextDecoder().decode(fileContentArray) // Split the file content into individual lines const lines = fileContent.split("\n") // Parse markdown content to get captures const markdownCaptures = parseMarkdown(fileContent) // Process the captures const markdownDefinitions = processCaptures(markdownCaptures, lines, "markdown") if (markdownDefinitions) { return `# ${dependencies.pathUtils.basename(filePath)}\n${markdownDefinitions}` } return undefined } // For other file types, load parser and use tree-sitter const languageParsers = await loadRequiredLanguageParsers([filePath]) // Parse the file if we have a parser for it const definitions = await parseFile(filePath, languageParsers, dependencies) if (definitions) { return `# ${dependencies.pathUtils.basename(filePath)}\n${definitions}` } return undefined } // TODO: implement caching behavior to avoid having to keep analyzing project for new tasks. export async function parseSourceCodeForDefinitionsTopLevel( dirPath: string, dependencies: TreeSitterDependencies, ): Promise<string> { // check if the path exists const dirExists = await dependencies.fileSystem.exists(dirPath) if (!dirExists) { return "This directory does not exist or you do not have permission to access it." } // Get all files at top level using workspace const allFiles = await dependencies.workspace.findFiles("**/*", undefined) let result = "" // Separate files to parse and remaining files const { filesToParse } = separateFiles(allFiles, dependencies.pathUtils) // Filter filepaths for access using workspace const allowedFilesToParse: string[] = [] for (const file of filesToParse) { if (!(await dependencies.workspace.shouldIgnore(file))) { allowedFilesToParse.push(file) } } // Separate markdown files from other files const markdownFiles: string[] = [] const otherFiles: string[] = [] for (const file of allowedFilesToParse) { const ext = dependencies.pathUtils.extname(file).toLowerCase() if (ext === ".md" || ext === ".markdown") { markdownFiles.push(file) } else { otherFiles.push(file) } } // Load language parsers only for non-markdown files const languageParsers = await loadRequiredLanguageParsers(otherFiles) // Process markdown files for (const file of markdownFiles) { // Check if we have permission to access this file if (await dependencies.workspace.shouldIgnore(file)) { continue } try { // Read file content const fileContentArray = await dependencies.fileSystem.readFile(file) const fileContent = new TextDecoder().decode(fileContentArray) // Split the file content into individual lines const lines = fileContent.split("\n") // Parse markdown content to get captures const markdownCaptures = parseMarkdown(fileContent) // Process the captures const markdownDefinitions = processCaptures(markdownCaptures, lines, "markdown") if (markdownDefinitions) { const relativePath = dependencies.pathUtils.relative(dirPath, file) result += `# ${relativePath}\n${markdownDefinitions}\n` } } catch (error) { console.log(`Error parsing markdown file: ${error}\n`) } } // Process other files using tree-sitter for (const file of otherFiles) { const definitions = await parseFile(file, languageParsers, dependencies) if (definitions) { const relativePath = dependencies.pathUtils.relative(dirPath, file) result += `# ${relativePath}\n${definitions}\n` } } return result ? result : "No source code definitions found." } function separateFiles(allFiles: string[], pathUtils: IPathUtils): { filesToParse: string[]; remainingFiles: string[] } { const filesToParse = allFiles.filter((file) => extensions.includes(pathUtils.extname(file))).slice(0, 50) // 50 files max const remainingFiles = allFiles.filter((file) => !filesToParse.includes(file)) return { filesToParse, remainingFiles } } /* Parsing files using tree-sitter 1. Parse the file content into an AST (Abstract Syntax Tree) using the appropriate language grammar (set of rules that define how the components of a language like keywords, expressions, and statements can be combined to create valid programs). 2. Create a query using a language-specific query string, and run it against the AST's root node to capture specific syntax elements. - We use tag queries to identify named entities in a program, and then use a syntax capture to label the entity and its name. A notable example of this is GitHub's search-based code navigation. - Our custom tag queries are based on tree-sitter's default tag queries, but modified to only capture definitions. 3. Sort the captures by their position in the file, output the name of the definition, and format by i.e. adding "|----\n" for gaps between captured sections. This approach allows us to focus on the most relevant parts of the code (defined by our language-specific queries) and provides a concise yet informative view of the file's structure and key elements. - https://github.com/tree-sitter/node-tree-sitter/blob/master/test/query_test.js - https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/query-test.js - https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/test/helper.js - https://tree-sitter.github.io/tree-sitter/code-navigation-systems */ /** * Parse a file and extract code definitions using tree-sitter * * @param filePath - Path to the file to parse * @param languageParsers - Map of language parsers * @param rooIgnoreController - Optional controller to check file access permissions * @returns A formatted string with code definitions or null if no definitions found */ /** * Process captures from tree-sitter or markdown parser * * @param captures - The captures to process * @param lines - The lines of the file * @param minComponentLines - Minimum number of lines for a component to be included * @returns A formatted string with definitions */ function processCaptures(captures: any[], lines: string[], language: string): string | null { // Determine if HTML filtering is needed for this language const needsHtmlFiltering = ["jsx", "tsx"].includes(language) // Filter function to exclude HTML elements if needed const isNotHtmlElement = (line: string): boolean => { if (!needsHtmlFiltering) return true // Common HTML elements pattern const HTML_ELEMENTS = /^[^A-Z]*<\/?(?:div|span|button|input|h[1-6]|p|a|img|ul|li|form)\b/ const trimmedLine = line.trim() return !HTML_ELEMENTS.test(trimmedLine) } // No definitions found if (captures.length === 0) { return null } let formattedOutput = "" // Sort captures by their start position captures.sort((a, b) => a.node.startPosition.row - b.node.startPosition.row) // Track already processed lines to avoid duplicates const processedLines = new Set<string>() // First pass - categorize captures by type captures.forEach((capture) => { const { node, name } = capture // Skip captures that don't represent definitions if (!name.includes("definition") && !name.includes("name")) { return } // Get the parent node that contains the full definition const definitionNode = name.includes("name") ? node.parent : node if (!definitionNode) return // Get the start and end lines of the full definition const startLine = definitionNode.startPosition.row const endLine = definitionNode.endPosition.row const lineCount = endLine - startLine + 1 // Skip components that don't span enough lines if (lineCount < getMinComponentLines()) { return } // Create unique key for this definition based on line range // This ensures we don't output the same line range multiple times const lineKey = `${startLine}-${endLine}` // Skip already processed lines if (processedLines.has(lineKey)) { return } // Check if this is a valid component definition (not an HTML element) const startLineContent = lines[startLine].trim() // Special handling for component name definitions if (name.includes("name.definition")) { // Extract component name const componentName = node.text // Add component name to output regardless of HTML filtering if (!processedLines.has(lineKey) && componentName) { formattedOutput += `${startLine + 1}--${endLine + 1} | ${lines[startLine]}\n` processedLines.add(lineKey) } } // For other component definitions else if (isNotHtmlElement(startLineContent)) { formattedOutput += `${startLine + 1}--${endLine + 1} | ${lines[startLine]}\n` processedLines.add(lineKey) // If this is part of a larger definition, include its non-HTML context if (node.parent && node.parent.lastChild) { const contextEnd = node.parent.lastChild.endPosition.row const contextSpan = contextEnd - node.parent.startPosition.row + 1 // Only include context if it spans multiple lines if (contextSpan >= getMinComponentLines()) { // Add the full range first const rangeKey = `${node.parent.startPosition.row}-${contextEnd}` if (!processedLines.has(rangeKey)) { formattedOutput += `${node.parent.startPosition.row + 1}--${contextEnd + 1} | ${lines[node.parent.startPosition.row]}\n` processedLines.add(rangeKey) } } } } }) if (formattedOutput.length > 0) { return formattedOutput } return null } /** * Parse a file and extract code definitions using tree-sitter * * @param filePath - Path to the file to parse * @param languageParsers - Map of language parsers * @param dependencies - Dependencies for file system, workspace, and path operations * @returns A formatted string with code definitions or null if no definitions found */ async function parseFile( filePath: string, languageParsers: LanguageParser, dependencies: TreeSitterDependencies, ): Promise<string | null> { // Check if we have permission to access this file if (await dependencies.workspace.shouldIgnore(filePath)) { return null } // Read file content const fileContentArray = await dependencies.fileSystem.readFile(filePath) const fileContent = new TextDecoder().decode(fileContentArray) const extLang = dependencies.pathUtils.extname(filePath).toLowerCase().slice(1) // Check if we have a parser for this file type const { parser, query } = languageParsers[extLang] || {} if (!parser || !query) { return `Unsupported file type: ${filePath}` } try { // Parse the file content into an Abstract Syntax Tree (AST) const tree = parser.parse(fileContent) // Apply the query to the AST and get the captures const captures = query.captures(tree.rootNode) // Split the file content into individual lines const lines = fileContent.split("\n") // Process the captures return processCaptures(captures, lines, extLang) } catch (error) { console.log(`Error parsing file: ${error}\n`) // Return null on parsing error to avoid showing error messages in the output return null } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/anrgct/autodev-codebase'

If you have feedback or need assistance with the MCP directory API, please join our Discord server