Skip to main content
Glama
readSymbol.ts11.8 kB
import fg from 'fast-glob' import fs from 'fs' import _ from 'lodash' import pLimit from 'p-limit' import { z } from 'zod' import env from '../env.js' import { defineTool } from '../tools.js' import util from '../util.js' const MAX_CONCURRENCY = 32 const MAX_FILE_SIZE = 10 * 1024 * 1024 // 10MB // Max number of files to scan const MAX_FILE_COUNT = 2000 // Max length of a block to consider const MAX_BLOCK_LENGTH = 30e3 // Max distance from the beginning of the line to the symbol const MAX_SYMBOL_OFFSET = 200 // Abort once we have this many matches const MAX_MATCHES = 20 const DEFAULT_MAX_RESULTS = 5 const DEFAULT_EXTENSIONS = ['d.ts', 'ts', 'tsx', 'js', 'jsx', 'mjs', 'cjs', 'cts', 'java', 'cs', 'cpp', 'c', 'h', 'hpp', 'cc', 'go', 'rs', 'php', 'swift', 'scss', 'css', 'less', 'graphql', 'gql', 'prisma', 'proto'] const IGNORED_DIRECTORIES = ['node_modules', '.git', 'test', 'tests', 'examples', 'runtime'] const IGNORED_ROOT_DIRECTORIES = ['dist', 'build', 'out'] const IGNORED_DEEP_DIRECTORIES = ['bin', 'scripts'] const IGNORED_FILES = ['*.test.*', '*.spec.*', '_*', '*.min.*'] const BONUS_KEYWORDS = /\b(class|interface|type|function|enum|namespace|module|model|declare|abstract|const|extends|implements)\b/gi const readSymbol = defineTool({ id: 'read_symbol', schema: z.object({ symbols: z.array(z.string().min(1)).describe('Symbol name(s) to find (functions, classes, types, etc.), case-sensitive, supports * for wildcard'), file_paths: z.array(z.string().min(1)).optional().describe('File paths to search (supports relative and glob). Defaults to "." (current directory). IMPORTANT: Be specific with paths when possible, minimize broad patterns like "node_modules/**" to avoid mismatches'), limit: z.number().optional().describe(`Maximum number of results to return. Defaults to ${DEFAULT_MAX_RESULTS}`), optimize: z.boolean().optional().describe('Unless explicitly false, this tool will strip comments and spacing to preserve AI\'s context window, omit unless you REALLY it unchanged (default: true)'), }), description: 'Find and extract symbol block by name from files, supports a lot of file formats (like TS, JS, GraphQL, CSS and most that use braces for blocks). Uses streaming with concurrency control for better performance', isReadOnly: true, fromArgs: ([symbols, ...paths]) => ({ symbols: symbols.split(','), file_paths: paths.length ? paths : undefined }), handler: async (args) => { const { symbols, file_paths: filePaths = [], limit = DEFAULT_MAX_RESULTS, optimize = true } = args if (!filePaths.length) { filePaths.push('.') } const patterns = filePaths.map(mapPattern) const results: Block[] = [] let totalFound = 0 const maxMatches = MAX_MATCHES * symbols.length try { for await (const result of scanForSymbol(symbols, patterns)) { totalFound++ results.push(result) if (results.length >= maxMatches) { break } } } catch (err) { if (!results.length) { throw err } } if (!results.length) { if (env.COLLECT_MISMATCHES) { collectMismatch({ symbols, file_paths: filePaths, limit }) } throw new Error(`Failed to find the \`${symbols.join(', ') }\` symbol(s) in any files`) } let output = results .sort((a, b) => b.score - a.score) .slice(0, limit * symbols.length) .map(block => formatResult(block, optimize)) .join('\n\n') if (totalFound > results.length) { output += `\n\n--- Showing ${results.length} matches out of ${totalFound} ---` } return output }, }) export function mapPattern(pattern: string) { const exts = `.{${DEFAULT_EXTENSIONS.join(',')}}` if (pattern === '*' || pattern === '*.*') { return `*${exts}` } // If pattern already has an extension, return as-is if (/[/*\w]\.\w+$/.test(pattern)) { return pattern } const hasGlobChars = /[*?[{]/.test(pattern) // Implicitly include @types/ packages pattern = pattern.replace(/node_modules\/(\w+)/g, 'node_modules/{@types/,}$1') // Trailing slash = explicit directory, search recursively if (pattern.endsWith('/')) { return `${pattern}**/*${exts}` } if (!hasGlobChars) { // No slash and no dot = simple directory name if (!/[/\\.]/.test(pattern)) { return `${pattern}/**/*${exts}` } // Otherwise trust as-is (file path) return pattern } return `${pattern}${exts}` } export function generateIgnorePatterns(patterns: string[]): string[] { const globalDirs = IGNORED_DIRECTORIES .filter(dir => !patterns.some(pattern => pattern.includes(`${dir}/`))) const rootDirs = IGNORED_ROOT_DIRECTORIES .filter(dir => !patterns.some(pattern => pattern.includes(`${dir}/`))) const filteredDeepDirs = IGNORED_DEEP_DIRECTORIES .filter(dir => !patterns.some(pattern => pattern.includes(`*/**/${dir}`))) .map(dir => `*/**/${dir}`) const ignore = [`!**/{${IGNORED_FILES.join(',')}}`] // Global directories: ignore everywhere if (globalDirs.length) { ignore.push(`!{${globalDirs.join(',')}}/**`) } // Root-only directories : only ignore at root level rootDirs.forEach(dir => { ignore.push(`!${dir}/**`) }) // Deep directories: ignore when nested deep if (filteredDeepDirs.length) { ignore.push(`!{${filteredDeepDirs.join(',')}}`) } return ignore } async function* scanForSymbol(symbols: string[], patterns: string[]): AsyncGenerator<Block> { const limit = pLimit(MAX_CONCURRENCY) let shouldStop = false const ignorePatterns = generateIgnorePatterns(patterns) const allPatterns = [...patterns, ...ignorePatterns] const entries = fg.stream(allPatterns, { cwd: util.CWD, onlyFiles: true, absolute: false, stats: true, suppressErrors: true, deep: 4, }) as AsyncIterable<fg.Entry> const pendingTasks = new Set<Promise<Block[]>>() let filesProcessed = 0 try { for await (const entry of entries) { if (++filesProcessed === MAX_FILE_COUNT) break if (shouldStop) break if (entry.stats && entry.stats.size > MAX_FILE_SIZE) continue const fileIndex = filesProcessed const task = limit(async () => { if (shouldStop) return [] try { const content = await fs.promises.readFile(util.resolve(entry.path), 'utf8') if (shouldStop) return [] return findBlocks(content, symbols, entry.path, fileIndex) } catch { return [] } }) pendingTasks.add(task) const taskResults = await task pendingTasks.delete(task) for (const result of taskResults) { yield result } } } finally { shouldStop = true await Promise.allSettled([...pendingTasks]) } } export interface Block { text: string startLine: number endLine: number path: string score: number index: number fileIndex: number } export function findBlocks(content: string, symbols: string[], path: string, fileIndex: number): Block[] { const results: Block[] = [] if (!content.includes('\n')) { // Quick escape for large minified files return results } for (const match of matchSymbol(content, symbols)) { const lines = content.substring(0, match.index).split('\n') const startLine = lines.length const [text] = match if (text.length > MAX_BLOCK_LENGTH) { continue } const endLine = startLine + text.split('\n').length - 1 const score = scoreSymbol(text, path) results.push({ text, startLine, endLine, path, score, index: match.index, fileIndex }) } return results } export function matchSymbol(content: string, symbols: string[]) { const regex = createRegex(symbols) regex.lastIndex = 0 return (content.matchAll(regex) || []) } // AI: NEVER change this regex without user approval const createRegex = _.memoize((symbols: string[]) => { const escapedSymbols = symbolsToString(symbols) return new RegExp(( // Comment line(s) '^(?:\\s*/[/*][^\n]*\n)*' + // Initial line and look behind for symbol `([ \t]*).{0,${MAX_SYMBOL_OFFSET}}(?<![([.\'"])` + // Symbol(s) with alternation `(?:${escapedSymbols})` + // Look ahead until brace '(?![.\'")\]]).*\\s*\\{' + // Indented lines '(?:\r?\n\\1\\s+.*)+' + // Until closing brace '[^}]*\\}' ), 'mg') }, symbolsToString) function symbolsToString(symbols: string[]) { return symbols.map(escapeSymbol).join('|') } function escapeSymbol(symbol: string) { let escaped = _.escapeRegExp(symbol) // Add word boundary only if alpha if (/^[\w*]/.test(symbol)) { escaped = `\\b${escaped}` } if (/[\w*]$/.test(symbol)) { escaped += '\\b' } return escaped.replace(/\\\*/g, '\\w*') } function scoreSymbol(text: string, path: string): number { let score = 0 const lines = text.split('\n') // Larger blocks are better score += lines.length * 2 const keywords = lines[0].match(BONUS_KEYWORDS) || [] score += keywords.length * 1e3 // Penalize depth const depth = path.split('/').length score -= depth * 10 if (path.endsWith('.d.ts')) { // They are good, pure declarations score += 100 } return Math.round(score) } function formatResult(block: Block, optimize = false): string { let header = `${block.startLine}:${block.endLine}:${block.path}` let { text } = block const { length } = text if (optimize) { text = optimizeCode(text) } if (env.CLI && env.DEBUG) { const elapsed = Math.round(process.uptime() * 1000) header += ` | Chars: ${length}->${text.length} | Index: ${block.index}-${block.index + length} | File: #${block.fileIndex} | Score: ${block.score} | Time: ${elapsed}ms` } return `=== ${header} ===\n${text}` } /** Optimize code by stripping comments and normalizing indentation */ export function optimizeCode(text: string): string { // Strip comments and clean up whitespace const cleaned = text .replace(/([^:])\/\/.*$/gm, '$1') // JS/TS line comments (avoid URLs with ://) .replace(/\/\*[\s\S]*?\*\//g, '') // JS/TS/CSS block comments .replace(/"""[\s\S]*?"""/g, '') // GraphQL/Python triple-quote strings .replace(/^\s*#.*/gm, '') // Bash/Python/GraphQL/YAML line comments .replace(/[ \t]+$/gm, '') // Trailing whitespace .replace(/^\s*$/gm, '') // Empty lines .replace(/\n\n+/g, '\n') // Multiple newlines const lines = cleaned.split('\n') const nonEmptyLines = lines.filter(line => line.trim()) if (!nonEmptyLines.length) return cleaned.trim() // Find base indentation and detect indentation token size const indentLengths = nonEmptyLines.map(line => line.match(/^\s*/)?.[0].length || 0) const minIndent = Math.min(...indentLengths) const adjustedLengths = indentLengths.map(len => len - minIndent).filter(len => len > 0) const indentSize = adjustedLengths.length ? Math.min(...adjustedLengths) : 2 // Convert to tab-based indentation return lines .map(line => { if (!line.trim()) return '' const spaces = line.match(/^\s*/)?.[0] || '' const content = line.slice(spaces.length) const remaining = spaces.length - minIndent return remaining > 0 ? '\t'.repeat(Math.floor(remaining / indentSize)) + content : content }) .join('\n') .trim() } interface MismatchEntry { symbols: string[] file_paths: string[] limit: number cwd: string timestamp: string } function collectMismatch(args: Pick<MismatchEntry, 'symbols' | 'file_paths' | 'limit'>): void { try { const mismatchFile = util.resolve('./mismatches.ndjson', util.REPO) const entry: MismatchEntry = { ...args, cwd: util.CWD, timestamp: new Date().toISOString(), } util.appendNdjson(mismatchFile, entry) } catch { // Silently fail to avoid breaking the main functionality } } export default readSymbol

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/flesler/mcp-tools'

If you have feedback or need assistance with the MCP directory API, please join our Discord server