Skip to main content
Glama
search-codebase-tool.ts15.8 kB
/** * Search Codebase Tool * * Atomic tool for searching codebase files based on query patterns. * Extracted from ResearchOrchestrator per ADR-018 (Atomic Tools Architecture). * * This tool provides dependency-injected codebase search functionality: * - Returns raw data (matches, files) without analysis/conclusions * - Supports configurable file system and analyzer dependencies * - No LLM calls or orchestration logic * - Testable without complex ESM mocking * * @see ResearchOrchestrator (deprecated) - Full multi-source orchestration * @since 3.0.0 * @category Tools * @category Research */ import { promises as fs } from 'fs'; import * as path from 'path'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { McpAdrError } from '../types/index.js'; import type { TreeSitterAnalyzer } from '../utils/tree-sitter-analyzer.js'; import { findFiles } from '../utils/file-system.js'; import { scanProjectStructure } from '../utils/actual-file-operations.js'; /** * File match result with relevance scoring */ export interface FileMatch { /** Full path to the file */ path: string; /** File content (if includeContent was true) */ content?: string; /** Relevance score 0-1 */ relevance: number; /** Tree-sitter parse analysis (if available) */ parseAnalysis?: { language: string; hasInfrastructure: boolean; functionCount: number; importCount: number; }; } /** * Codebase search result */ export interface CodebaseSearchResult { /** Array of matching files with relevance scores */ matches: FileMatch[]; /** Total number of files discovered */ totalFiles: number; /** Keywords extracted from query */ keywords: string[]; /** Project path searched */ projectPath: string; /** Search duration in milliseconds */ duration: number; } /** * Dependencies for search_codebase function (injectable for testing) */ export interface SearchCodebaseDependencies { /** File system operations */ fs: typeof fs; /** Tree-sitter analyzer (optional) */ analyzer?: TreeSitterAnalyzer; } /** * Default dependencies for production use */ export const defaultDeps: SearchCodebaseDependencies = { fs, }; /** * Minimum relevance threshold for including files in results (0-1) * Files with relevance below this threshold will be filtered out */ export const DEFAULT_RELEVANCE_THRESHOLD = 0.2; /** * Search codebase for files matching query * * @description Atomic tool that searches project files based on query patterns. * Returns raw file matches with relevance scores. Does NOT perform LLM analysis * or synthesis - returns structured data only. * * Search strategy: * 1. Extract keywords from query * 2. Use scanProjectStructure for intent-based file discovery (Docker, K8s, etc.) * 3. Use findFiles for glob-based keyword matching * 4. Read and score file relevance (optional tree-sitter enhancement) * 5. Return sorted results * * @param args - Search parameters * @param args.query - Search query (e.g., "Docker configuration", "authentication") * @param args.projectPath - Path to project root (defaults to cwd) * @param args.scope - Optional file scope patterns (e.g., ["src/**", "config/**"]) * @param args.includeContent - Include file content in results (default: false) * @param args.maxFiles - Maximum files to return (default: 20) * @param args.enableTreeSitter - Use tree-sitter for enhanced analysis (default: true) * @param deps - Injectable dependencies (for testing) * * @returns Promise<CodebaseSearchResult> Raw search results with matches and scores * * @throws {McpAdrError} When query is empty or search fails * * @example * ```typescript * // Basic search * const result = await searchCodebase({ * query: "Docker configuration", * projectPath: "/path/to/project" * }); * * console.log(`Found ${result.matches.length} files`); * result.matches.forEach(match => { * console.log(`${match.path}: ${(match.relevance * 100).toFixed(1)}%`); * }); * ``` * * @example * ```typescript * // Search with content and custom scope * const result = await searchCodebase({ * query: "authentication methods", * scope: ["src/**", "lib/**"], * includeContent: true, * maxFiles: 10 * }); * ``` * * @since 3.0.0 * @category Research * @category Atomic Tools */ export async function searchCodebase( args: { query: string; projectPath?: string; scope?: string[]; includeContent?: boolean; maxFiles?: number; enableTreeSitter?: boolean; relevanceThreshold?: number; }, deps: SearchCodebaseDependencies = defaultDeps ): Promise<CodebaseSearchResult> { const { query, projectPath = process.cwd(), scope, includeContent = false, maxFiles = 20, enableTreeSitter = true, relevanceThreshold = DEFAULT_RELEVANCE_THRESHOLD, } = args; if (!query || query.trim().length === 0) { throw new McpAdrError('Search query is required', 'INVALID_INPUT'); } const startTime = Date.now(); const keywords = extractKeywords(query); const queryLower = query.toLowerCase(); const relevanceMap: Map<string, number> = new Map(); const contentMap: Map<string, string> = new Map(); const parseAnalysisMap: Map< string, { language: string; hasInfrastructure: boolean; functionCount: number; importCount: number; } > = new Map(); const discoveredFiles = new Set<string>(); try { // PHASE 1: Intent-based file discovery using scanProjectStructure const projectStructure = await scanProjectStructure(projectPath, { readContent: false, maxFileSize: 100000, includeHidden: false, }); // Match relevant file categories based on query intent if (queryLower.match(/docker|container/i)) { projectStructure.dockerFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/kubernetes|k8s|pod|deployment/i)) { projectStructure.kubernetesFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/dependency|package|library/i)) { projectStructure.packageFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/config|configuration|environment|env/i)) { projectStructure.configFiles.forEach(f => discoveredFiles.add(f.path)); projectStructure.environmentFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/build|ci|cd|pipeline/i)) { projectStructure.buildFiles.forEach(f => discoveredFiles.add(f.path)); projectStructure.ciFiles.forEach(f => discoveredFiles.add(f.path)); } if (queryLower.match(/test|testing|spec/i)) { const testResults = await findFiles(projectPath, [ '**/*.test.ts', '**/*.spec.ts', '**/*.test.js', '**/*.spec.js', '**/tests/**', '**/test/**', ]); testResults.files.forEach(f => discoveredFiles.add(f.path)); } // PHASE 2: Keyword-based file discovery if (keywords.length > 0) { try { const keywordPatterns = keywords.slice(0, 5).map(k => `**/*${k}*`); const keywordResults = await findFiles(projectPath, keywordPatterns, { limit: 50 }); keywordResults.files.forEach(f => discoveredFiles.add(f.path)); } catch { // Keyword discovery failed, continue with other methods } } // PHASE 3: Apply custom scope if provided if (scope && scope.length > 0) { const scopedResults = await findFiles(projectPath, scope, { limit: 100 }); scopedResults.files.forEach(f => discoveredFiles.add(f.path)); } // PHASE 4: Read and score file relevance const fileArray = Array.from(discoveredFiles).slice(0, 50); // Limit to 50 files for performance // Import analyzer if tree-sitter is enabled let analyzer: TreeSitterAnalyzer | undefined; if (enableTreeSitter) { try { const { TreeSitterAnalyzer: TSAnalyzer } = await import( '../utils/tree-sitter-analyzer.js' ); analyzer = new TSAnalyzer(); } catch { // Tree-sitter not available (common in test environments), continue without it // Not logging as this is expected behavior in some environments } } // Process files in parallel for better performance const fileProcessingPromises = fileArray.map(async filePath => { try { const fullPath = path.isAbsolute(filePath) ? filePath : path.join(projectPath, filePath); const content = await deps.fs.readFile(fullPath, 'utf-8'); // Calculate text-based relevance let relevance = calculateTextRelevance(content, query, keywords); if (includeContent) { contentMap.set(filePath, content); } // PHASE 5: Enhance with tree-sitter analysis if available if (analyzer && shouldParse(filePath)) { try { const analysis = await analyzer.analyzeFile(fullPath, content); // Enhance relevance based on AST analysis let astRelevance = relevance; // Check for infrastructure references if (analysis.infraStructure && analysis.infraStructure.length > 0) { const infraProviders = analysis.infraStructure.map(i => i.provider); const infraResources = analysis.infraStructure.map(i => i.name); const matchingProviders = keywords.filter(k => infraProviders.some(p => p.toLowerCase().includes(k.toLowerCase())) ); const matchingResources = keywords.filter(k => infraResources.some(r => r.toLowerCase().includes(k.toLowerCase())) ); const totalMatches = matchingProviders.length + matchingResources.length; if (totalMatches > 0) { astRelevance += 0.2 * Math.min(totalMatches, 3); // Cap bonus at 0.6 } } // Check for imports/dependencies if (analysis.imports && keywords.some(k => k.match(/import|require|dependency/i))) { astRelevance += 0.1; } relevance = Math.min(astRelevance, 1.0); parseAnalysisMap.set(filePath, { language: analysis.language, hasInfrastructure: !!analysis.infraStructure, functionCount: analysis.functions?.length || 0, importCount: analysis.imports?.length || 0, }); } catch { // Tree-sitter parsing failed, use text-based relevance } } relevanceMap.set(filePath, relevance); return { filePath, success: true }; } catch { // File read failed, skip return { filePath, success: false }; } }); // Wait for all file processing to complete await Promise.allSettled(fileProcessingPromises); // PHASE 6: Build and sort results const matches: FileMatch[] = Array.from(relevanceMap.entries()) .filter(([, relevance]) => relevance > relevanceThreshold) .sort((a, b) => b[1] - a[1]) // Sort by relevance descending .slice(0, maxFiles) .map(([filePath, relevance]) => { const match: FileMatch = { path: filePath, relevance, }; const content = contentMap.get(filePath); if (content !== undefined) { match.content = content; } const parseAnalysis = parseAnalysisMap.get(filePath); if (parseAnalysis !== undefined) { match.parseAnalysis = parseAnalysis; } return match; }); return { matches, totalFiles: discoveredFiles.size, keywords, projectPath, duration: Date.now() - startTime, }; } catch (error) { throw new McpAdrError( `Codebase search failed: ${error instanceof Error ? error.message : String(error)}`, 'SEARCH_ERROR', { query, projectPath } ); } } /** * MCP tool wrapper for search_codebase */ export async function searchCodebaseTool( args: { query: string; projectPath?: string; scope?: string[]; includeContent?: boolean; maxFiles?: number; enableTreeSitter?: boolean; } ): Promise<CallToolResult> { try { const result = await searchCodebase(args); // Format response let output = `# Codebase Search Results\n\n`; output += `**Query**: ${args.query}\n`; output += `**Project**: ${result.projectPath}\n`; output += `**Matches**: ${result.matches.length} of ${result.totalFiles} files\n`; output += `**Duration**: ${result.duration}ms\n`; output += `**Keywords**: ${result.keywords.join(', ')}\n\n`; if (result.matches.length === 0) { output += `No files found matching the query.\n`; } else { output += `## Matches\n\n`; result.matches.forEach((match, index) => { output += `### ${index + 1}. ${match.path}\n`; output += `**Relevance**: ${(match.relevance * 100).toFixed(1)}%\n`; if (match.parseAnalysis) { output += `**Language**: ${match.parseAnalysis.language}\n`; output += `**Functions**: ${match.parseAnalysis.functionCount}\n`; output += `**Imports**: ${match.parseAnalysis.importCount}\n`; if (match.parseAnalysis.hasInfrastructure) { output += `**Infrastructure**: Yes\n`; } } if (match.content) { const preview = match.content.substring(0, 500); output += `\n**Content Preview**:\n\`\`\`\n${preview}${match.content.length > 500 ? '...' : ''}\n\`\`\`\n`; } output += `\n`; }); } return { content: [ { type: 'text', text: output, }, ], }; } catch (error) { return { content: [ { type: 'text', text: `❌ Search failed: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } /** * Extract keywords from search query */ function extractKeywords(query: string): string[] { const stopWords = new Set([ 'the', 'is', 'are', 'was', 'were', 'what', 'when', 'where', 'why', 'how', 'which', 'who', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'with', 'about', 'as', 'by', 'from', 'of', 'can', 'could', 'should', 'would', 'do', 'does', 'did', 'have', 'has', 'had', 'we', 'our', 'us', ]); const words = query .toLowerCase() .replace(/[^\w\s-]/g, ' ') .split(/\s+/) .filter(w => w.length > 2 && !stopWords.has(w)); return [...new Set(words)]; } /** * Calculate text-based relevance score */ function calculateTextRelevance(content: string, query: string, keywords: string[]): number { const contentLower = content.toLowerCase(); const queryLower = query.toLowerCase(); let score = 0; // Keyword matching for (const keyword of keywords) { if (contentLower.includes(keyword.toLowerCase())) { score += 0.2; } } // Query phrase matching const queryWords = queryLower.split(/\s+/).filter(w => w.length > 3); for (const word of queryWords) { if (contentLower.includes(word)) { score += 0.1; } } return Math.min(score, 1.0); } /** * Check if file should be parsed with tree-sitter */ function shouldParse(filePath: string): boolean { const parsableExtensions = [ '.ts', '.tsx', '.js', '.jsx', '.py', '.yaml', '.yml', '.json', '.sh', '.bash', '.tf', '.hcl', ]; return parsableExtensions.some(ext => filePath.endsWith(ext)); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/mcp-adr-analysis-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server