search_codebase

Instructions

Atomic tool for searching codebase files based on query patterns. Returns raw file matches with relevance scores. Extracted from ResearchOrchestrator per ADR-018.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`query`	Yes	Search query (e.g., "Docker configuration", "authentication")
`projectPath`	No	Path to project root	.
`scope`	No	Optional file scope patterns (e.g., ["src/", "config/"])
`includeContent`	No	Include file content in results
`maxFiles`	No	Maximum files to return
`enableTreeSitter`	No	Use tree-sitter for enhanced analysis
`relevanceThreshold`	No	Minimum relevance threshold (0-1)

Implementation Reference

src/tools/search-codebase-tool.ts:373-439 (handler)
MCP tool handler for 'search_codebase'. Wraps searchCodebase core function, formats results as markdown CallToolResult.
export async function searchCodebaseTool( args: { query: string; projectPath?: string; scope?: string[]; includeContent?: boolean; maxFiles?: number; enableTreeSitter?: boolean; } ): Promise<CallToolResult> { try { const result = await searchCodebase(args); // Format response let output = `# Codebase Search Results\n\n`; output += `**Query**: ${args.query}\n`; output += `**Project**: ${result.projectPath}\n`; output += `**Matches**: ${result.matches.length} of ${result.totalFiles} files\n`; output += `**Duration**: ${result.duration}ms\n`; output += `**Keywords**: ${result.keywords.join(', ')}\n\n`; if (result.matches.length === 0) { output += `No files found matching the query.\n`; } else { output += `## Matches\n\n`; result.matches.forEach((match, index) => { output += `### ${index + 1}. ${match.path}\n`; output += `**Relevance**: ${(match.relevance * 100).toFixed(1)}%\n`; if (match.parseAnalysis) { output += `**Language**: ${match.parseAnalysis.language}\n`; output += `**Functions**: ${match.parseAnalysis.functionCount}\n`; output += `**Imports**: ${match.parseAnalysis.importCount}\n`; if (match.parseAnalysis.hasInfrastructure) { output += `**Infrastructure**: Yes\n`; } } if (match.content) { const preview = match.content.substring(0, 500); output += `\n**Content Preview**:\n\`\`\`\n${preview}${match.content.length > 500 ? '...' : ''}\n\`\`\`\n`; } output += `\n`; }); } return { content: [ { type: 'text', text: output, }, ], }; } catch (error) { return { content: [ { type: 'text', text: `❌ Search failed: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } }
src/tools/search-codebase-tool.ts:141-368 (helper)
Core helper function implementing the codebase search logic: keyword extraction, multi-phase file discovery, relevance scoring, tree-sitter parsing, result filtering/sorting.
export async function searchCodebase( args: { query: string; projectPath?: string; scope?: string[]; includeContent?: boolean; maxFiles?: number; enableTreeSitter?: boolean; relevanceThreshold?: number; }, deps: SearchCodebaseDependencies = defaultDeps ): Promise<CodebaseSearchResult> { const { query, projectPath = process.cwd(), scope, includeContent = false, maxFiles = 20, enableTreeSitter = true, relevanceThreshold = DEFAULT_RELEVANCE_THRESHOLD, } = args; if (!query || query.trim().length === 0) { throw new McpAdrError('Search query is required', 'INVALID_INPUT'); } const startTime = Date.now(); const keywords = extractKeywords(query); const queryLower = query.toLowerCase(); const relevanceMap: Map<string, number> = new Map(); const contentMap: Map<string, string> = new Map(); const parseAnalysisMap: Map< string, { language: string; hasInfrastructure: boolean; functionCount: number; importCount: number; } > = new Map(); const discoveredFiles = new Set<string>(); try { // PHASE 1: Intent-based file discovery using scanProjectStructure const projectStructure = await scanProjectStructure(projectPath, { readContent: false, maxFileSize: 100000, includeHidden: false, }); // Match relevant file categories based on query intent if (queryLower.match(/docker|container/i)) { projectStructure.dockerFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/kubernetes|k8s|pod|deployment/i)) { projectStructure.kubernetesFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/dependency|package|library/i)) { projectStructure.packageFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/config|configuration|environment|env/i)) { projectStructure.configFiles.forEach(f => discoveredFiles.add(f.path)); projectStructure.environmentFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/build|ci|cd|pipeline/i)) { projectStructure.buildFiles.forEach(f => discoveredFiles.add(f.path)); projectStructure.ciFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/test|testing|spec/i)) { const testResults = await findFiles(projectPath, [ '**/*.test.ts', '**/*.spec.ts', '**/*.test.js', '**/*.spec.js', '**/tests/**', '**/test/**', ]); testResults.files.forEach(f => discoveredFiles.add(f.path)); } // PHASE 2: Keyword-based file discovery if (keywords.length > 0) { try { const keywordPatterns = keywords.slice(0, 5).map(k => `**/*${k}*`); const keywordResults = await findFiles(projectPath, keywordPatterns, { limit: 50 }); keywordResults.files.forEach(f => discoveredFiles.add(f.path)); } catch { // Keyword discovery failed, continue with other methods } } // PHASE 3: Apply custom scope if provided if (scope && scope.length > 0) { const scopedResults = await findFiles(projectPath, scope, { limit: 100 }); scopedResults.files.forEach(f => discoveredFiles.add(f.path)); } // PHASE 4: Read and score file relevance const fileArray = Array.from(discoveredFiles).slice(0, 50); // Limit to 50 files for performance // Import analyzer if tree-sitter is enabled let analyzer: TreeSitterAnalyzer | undefined; if (enableTreeSitter) { try { const { TreeSitterAnalyzer: TSAnalyzer } = await import( '../utils/tree-sitter-analyzer.js' ); analyzer = new TSAnalyzer(); } catch { // Tree-sitter not available (common in test environments), continue without it // Not logging as this is expected behavior in some environments } } // Process files in parallel for better performance const fileProcessingPromises = fileArray.map(async filePath => { try { const fullPath = path.isAbsolute(filePath) ? filePath : path.join(projectPath, filePath); const content = await deps.fs.readFile(fullPath, 'utf-8'); // Calculate text-based relevance let relevance = calculateTextRelevance(content, query, keywords); if (includeContent) { contentMap.set(filePath, content); } // PHASE 5: Enhance with tree-sitter analysis if available if (analyzer && shouldParse(filePath)) { try { const analysis = await analyzer.analyzeFile(fullPath, content); // Enhance relevance based on AST analysis let astRelevance = relevance; // Check for infrastructure references if (analysis.infraStructure && analysis.infraStructure.length > 0) { const infraProviders = analysis.infraStructure.map(i => i.provider); const infraResources = analysis.infraStructure.map(i => i.name); const matchingProviders = keywords.filter(k => infraProviders.some(p => p.toLowerCase().includes(k.toLowerCase())) ); const matchingResources = keywords.filter(k => infraResources.some(r => r.toLowerCase().includes(k.toLowerCase())) ); const totalMatches = matchingProviders.length + matchingResources.length; if (totalMatches > 0) { astRelevance += 0.2 * Math.min(totalMatches, 3); // Cap bonus at 0.6 } } // Check for imports/dependencies if (analysis.imports && keywords.some(k => k.match(/import|require|dependency/i))) { astRelevance += 0.1; } relevance = Math.min(astRelevance, 1.0); parseAnalysisMap.set(filePath, { language: analysis.language, hasInfrastructure: !!analysis.infraStructure, functionCount: analysis.functions?.length || 0, importCount: analysis.imports?.length || 0, }); } catch { // Tree-sitter parsing failed, use text-based relevance } } relevanceMap.set(filePath, relevance); return { filePath, success: true }; } catch { // File read failed, skip return { filePath, success: false }; } }); // Wait for all file processing to complete await Promise.allSettled(fileProcessingPromises); // PHASE 6: Build and sort results const matches: FileMatch[] = Array.from(relevanceMap.entries()) .filter(([, relevance]) => relevance > relevanceThreshold) .sort((a, b) => b[1] - a[1]) // Sort by relevance descending .slice(0, maxFiles) .map(([filePath, relevance]) => { const match: FileMatch = { path: filePath, relevance, }; const content = contentMap.get(filePath); if (content !== undefined) { match.content = content; } const parseAnalysis = parseAnalysisMap.get(filePath); if (parseAnalysis !== undefined) { match.parseAnalysis = parseAnalysis; } return match; }); return { matches, totalFiles: discoveredFiles.size, keywords, projectPath, duration: Date.now() - startTime, }; } catch (error) { throw new McpAdrError( `Codebase search failed: ${error instanceof Error ? error.message : String(error)}`, 'SEARCH_ERROR', { query, projectPath } ); } }
src/tools/search-codebase-tool.ts:30-60 (schema)
Type definitions for input parameters (inline in functions), FileMatch results, and CodebaseSearchResult output schema.
export interface FileMatch { /** Full path to the file */ path: string; /** File content (if includeContent was true) */ content?: string; /** Relevance score 0-1 */ relevance: number; /** Tree-sitter parse analysis (if available) */ parseAnalysis?: { language: string; hasInfrastructure: boolean; functionCount: number; importCount: number; }; } /** * Codebase search result */ export interface CodebaseSearchResult { /** Array of matching files with relevance scores */ matches: FileMatch[]; /** Total number of files discovered */ totalFiles: number; /** Keywords extracted from query */ keywords: string[]; /** Project path searched */ projectPath: string; /** Search duration in milliseconds */ duration: number; }

mcp-adr-analysis-server

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API