mcp-adr-analysis-server

search-codebase-tool.ts•15.4 KiB

/** * Search Codebase Tool * * Atomic tool for searching codebase files based on query patterns. * Extracted from ResearchOrchestrator per ADR-018 (Atomic Tools Architecture). * * This tool provides dependency-injected codebase search functionality: * - Returns raw data (matches, files) without analysis/conclusions * - Supports configurable file system and analyzer dependencies * - No LLM calls or orchestration logic * - Testable without complex ESM mocking * * @see ResearchOrchestrator (deprecated) - Full multi-source orchestration * @since 3.0.0 * @category Tools * @category Research */ import { promises as fs } from 'fs'; import * as path from 'path'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { McpAdrError } from '../types/index.js'; import type { TreeSitterAnalyzer } from '../utils/tree-sitter-analyzer.js'; import { findFiles } from '../utils/file-system.js'; import { scanProjectStructure } from '../utils/actual-file-operations.js'; /** * File match result with relevance scoring */ export interface FileMatch { /** Full path to the file */ path: string; /** File content (if includeContent was true) */ content?: string; /** Relevance score 0-1 */ relevance: number; /** Tree-sitter parse analysis (if available) */ parseAnalysis?: { language: string; hasInfrastructure: boolean; functionCount: number; importCount: number; }; } /** * Codebase search result */ export interface CodebaseSearchResult { /** Array of matching files with relevance scores */ matches: FileMatch[]; /** Total number of files discovered */ totalFiles: number; /** Keywords extracted from query */ keywords: string[]; /** Project path searched */ projectPath: string; /** Search duration in milliseconds */ duration: number; } /** * Dependencies for search_codebase function (injectable for testing) */ export interface SearchCodebaseDependencies { /** File system operations */ fs: typeof fs; /** Tree-sitter analyzer (optional) */ analyzer?: TreeSitterAnalyzer; } /** * Default dependencies for production use */ export const defaultDeps: SearchCodebaseDependencies = { fs, }; /** * Minimum relevance threshold for including files in results (0-1) * Files with relevance below this threshold will be filtered out */ export const DEFAULT_RELEVANCE_THRESHOLD = 0.2; /** * Search codebase for files matching query * * @description Atomic tool that searches project files based on query patterns. * Returns raw file matches with relevance scores. Does NOT perform LLM analysis * or synthesis - returns structured data only. * * Search strategy: * 1. Extract keywords from query * 2. Use scanProjectStructure for intent-based file discovery (Docker, K8s, etc.) * 3. Use findFiles for glob-based keyword matching * 4. Read and score file relevance (optional tree-sitter enhancement) * 5. Return sorted results * * @param args - Search parameters * @param args.query - Search query (e.g., "Docker configuration", "authentication") * @param args.projectPath - Path to project root (defaults to cwd) * @param args.scope - Optional file scope patterns (e.g., ["src/**", "config/**"]) * @param args.includeContent - Include file content in results (default: false) * @param args.maxFiles - Maximum files to return (default: 20) * @param args.enableTreeSitter - Use tree-sitter for enhanced analysis (default: true) * @param deps - Injectable dependencies (for testing) * * @returns Promise<CodebaseSearchResult> Raw search results with matches and scores * * @throws {McpAdrError} When query is empty or search fails * * @example * ```typescript * // Basic search * const result = await searchCodebase({ * query: "Docker configuration", * projectPath: "/path/to/project" * }); * * console.log(`Found ${result.matches.length} files`); * result.matches.forEach(match => { * console.log(`${match.path}: ${(match.relevance * 100).toFixed(1)}%`); * }); * ``` * * @example * ```typescript * // Search with content and custom scope * const result = await searchCodebase({ * query: "authentication methods", * scope: ["src/**", "lib/**"], * includeContent: true, * maxFiles: 10 * }); * ``` * * @since 3.0.0 * @category Research * @category Atomic Tools */ export async function searchCodebase( args: { query: string; projectPath?: string; scope?: string[]; includeContent?: boolean; maxFiles?: number; enableTreeSitter?: boolean; relevanceThreshold?: number; }, deps: SearchCodebaseDependencies = defaultDeps ): Promise<CodebaseSearchResult> { const { query, projectPath = process.cwd(), scope, includeContent = false, maxFiles = 20, enableTreeSitter = true, relevanceThreshold = DEFAULT_RELEVANCE_THRESHOLD, } = args; if (!query || query.trim().length === 0) { throw new McpAdrError('Search query is required', 'INVALID_INPUT'); } const startTime = Date.now(); const keywords = extractKeywords(query); const queryLower = query.toLowerCase(); const relevanceMap: Map<string, number> = new Map(); const contentMap: Map<string, string> = new Map(); const parseAnalysisMap: Map< string, { language: string; hasInfrastructure: boolean; functionCount: number; importCount: number; } > = new Map(); const discoveredFiles = new Set<string>(); try { // PHASE 1: Intent-based file discovery using scanProjectStructure const projectStructure = await scanProjectStructure(projectPath, { readContent: false, maxFileSize: 100000, includeHidden: false, }); // Match relevant file categories based on query intent if (queryLower.match(/docker|container/i)) { projectStructure.dockerFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/kubernetes|k8s|pod|deployment/i)) { projectStructure.kubernetesFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/dependency|package|library/i)) { projectStructure.packageFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/config|configuration|environment|env/i)) { projectStructure.configFiles.forEach(f => discoveredFiles.add(f.path)); projectStructure.environmentFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/build|ci|cd|pipeline/i)) { projectStructure.buildFiles.forEach(f => discoveredFiles.add(f.path)); projectStructure.ciFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/test|testing|spec/i)) { const testResults = await findFiles(projectPath, [ '**/*.test.ts', '**/*.spec.ts', '**/*.test.js', '**/*.spec.js', '**/tests/**', '**/test/**', ]); testResults.files.forEach(f => discoveredFiles.add(f.path)); } // PHASE 2: Keyword-based file discovery if (keywords.length > 0) { try { const keywordPatterns = keywords.slice(0, 5).map(k => `**/*${k}*`); const keywordResults = await findFiles(projectPath, keywordPatterns, { limit: 50 }); keywordResults.files.forEach(f => discoveredFiles.add(f.path)); } catch { // Keyword discovery failed, continue with other methods } } // PHASE 3: Apply custom scope if provided if (scope && scope.length > 0) { const scopedResults = await findFiles(projectPath, scope, { limit: 100 }); scopedResults.files.forEach(f => discoveredFiles.add(f.path)); } // PHASE 4: Read and score file relevance const fileArray = Array.from(discoveredFiles).slice(0, 50); // Limit to 50 files for performance // Import analyzer if tree-sitter is enabled let analyzer: TreeSitterAnalyzer | undefined; if (enableTreeSitter) { try { const { TreeSitterAnalyzer: TSAnalyzer } = await import( '../utils/tree-sitter-analyzer.js' ); analyzer = new TSAnalyzer(); } catch { // Tree-sitter not available (common in test environments), continue without it // Not logging as this is expected behavior in some environments } } // Process files in parallel for better performance const fileProcessingPromises = fileArray.map(async filePath => { try { const fullPath = path.isAbsolute(filePath) ? filePath : path.join(projectPath, filePath); const content = await deps.fs.readFile(fullPath, 'utf-8'); // Calculate text-based relevance let relevance = calculateTextRelevance(content, query, keywords); if (includeContent) { contentMap.set(filePath, content); } // PHASE 5: Enhance with tree-sitter analysis if available if (analyzer && shouldParse(filePath)) { try { const analysis = await analyzer.analyzeFile(fullPath, content); // Enhance relevance based on AST analysis let astRelevance = relevance; // Check for infrastructure references if (analysis.infraStructure && analysis.infraStructure.length > 0) { const infraProviders = analysis.infraStructure.map(i => i.provider); const infraResources = analysis.infraStructure.map(i => i.name); const matchingProviders = keywords.filter(k => infraProviders.some(p => p.toLowerCase().includes(k.toLowerCase())) ); const matchingResources = keywords.filter(k => infraResources.some(r => r.toLowerCase().includes(k.toLowerCase())) ); const totalMatches = matchingProviders.length + matchingResources.length; if (totalMatches > 0) { astRelevance += 0.2 * Math.min(totalMatches, 3); // Cap bonus at 0.6 } } // Check for imports/dependencies if (analysis.imports && keywords.some(k => k.match(/import|require|dependency/i))) { astRelevance += 0.1; } relevance = Math.min(astRelevance, 1.0); parseAnalysisMap.set(filePath, { language: analysis.language, hasInfrastructure: !!analysis.infraStructure, functionCount: analysis.functions?.length || 0, importCount: analysis.imports?.length || 0, }); } catch { // Tree-sitter parsing failed, use text-based relevance } } relevanceMap.set(filePath, relevance); return { filePath, success: true }; } catch { // File read failed, skip return { filePath, success: false }; } }); // Wait for all file processing to complete await Promise.allSettled(fileProcessingPromises); // PHASE 6: Build and sort results const matches: FileMatch[] = Array.from(relevanceMap.entries()) .filter(([, relevance]) => relevance > relevanceThreshold) .sort((a, b) => b[1] - a[1]) // Sort by relevance descending .slice(0, maxFiles) .map(([filePath, relevance]) => { const match: FileMatch = { path: filePath, relevance, }; const content = contentMap.get(filePath); if (content !== undefined) { match.content = content; } const parseAnalysis = parseAnalysisMap.get(filePath); if (parseAnalysis !== undefined) { match.parseAnalysis = parseAnalysis; } return match; }); return { matches, totalFiles: discoveredFiles.size, keywords, projectPath, duration: Date.now() - startTime, }; } catch (error) { throw new McpAdrError( `Codebase search failed: ${error instanceof Error ? error.message : String(error)}`, 'SEARCH_ERROR', { query, projectPath } ); } } /** * MCP tool wrapper for search_codebase */ export async function searchCodebaseTool( args: { query: string; projectPath?: string; scope?: string[]; includeContent?: boolean; maxFiles?: number; enableTreeSitter?: boolean; } ): Promise<CallToolResult> { try { const result = await searchCodebase(args); // Format response let output = `# Codebase Search Results\n\n`; output += `**Query**: ${args.query}\n`; output += `**Project**: ${result.projectPath}\n`; output += `**Matches**: ${result.matches.length} of ${result.totalFiles} files\n`; output += `**Duration**: ${result.duration}ms\n`; output += `**Keywords**: ${result.keywords.join(', ')}\n\n`; if (result.matches.length === 0) { output += `No files found matching the query.\n`; } else { output += `## Matches\n\n`; result.matches.forEach((match, index) => { output += `### ${index + 1}. ${match.path}\n`; output += `**Relevance**: ${(match.relevance * 100).toFixed(1)}%\n`; if (match.parseAnalysis) { output += `**Language**: ${match.parseAnalysis.language}\n`; output += `**Functions**: ${match.parseAnalysis.functionCount}\n`; output += `**Imports**: ${match.parseAnalysis.importCount}\n`; if (match.parseAnalysis.hasInfrastructure) { output += `**Infrastructure**: Yes\n`; } } if (match.content) { const preview = match.content.substring(0, 500); output += `\n**Content Preview**:\n\`\`\`\n${preview}${match.content.length > 500 ? '...' : ''}\n\`\`\`\n`; } output += `\n`; }); } return { content: [ { type: 'text', text: output, }, ], }; } catch (error) { return { content: [ { type: 'text', text: `❌ Search failed: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } /** * Extract keywords from search query */ function extractKeywords(query: string): string[] { const stopWords = new Set([ 'the', 'is', 'are', 'was', 'were', 'what', 'when', 'where', 'why', 'how', 'which', 'who', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'with', 'about', 'as', 'by', 'from', 'of', 'can', 'could', 'should', 'would', 'do', 'does', 'did', 'have', 'has', 'had', 'we', 'our', 'us', ]); const words = query .toLowerCase() .replace(/[^\w\s-]/g, ' ') .split(/\s+/) .filter(w => w.length > 2 && !stopWords.has(w)); return [...new Set(words)]; } /** * Calculate text-based relevance score */ function calculateTextRelevance(content: string, query: string, keywords: string[]): number { const contentLower = content.toLowerCase(); const queryLower = query.toLowerCase(); let score = 0; // Keyword matching for (const keyword of keywords) { if (contentLower.includes(keyword.toLowerCase())) { score += 0.2; } } // Query phrase matching const queryWords = queryLower.split(/\s+/).filter(w => w.length > 3); for (const word of queryWords) { if (contentLower.includes(word)) { score += 0.1; } } return Math.min(score, 1.0); } /** * Check if file should be parsed with tree-sitter */ function shouldParse(filePath: string): boolean { const parsableExtensions = [ '.ts', '.tsx', '.js', '.jsx', '.py', '.yaml', '.yml', '.json', '.sh', '.bash', '.tf', '.hcl', ]; return parsableExtensions.some(ext => filePath.endsWith(ext)); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/mcp-adr-analysis-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search-codebase-tool.ts•15.4 KiB