Local Explorer MCP

Overview Schema Related Servers Score Discussions

local_ripgrep.ts•20 KiB

/** * Zod schema for local_ripgrep tool * Optimized ripgrep implementation with performance enhancements */ import { z } from 'zod'; import { BaseQuerySchema, createBulkQuerySchema } from './baseSchema.js'; import { TOOL_NAMES } from '../constants.js'; /** * Tool description for MCP registration */ export const LOCAL_RIPGREP_DESCRIPTION = `PRIMARY SEARCH - Fast ripgrep pattern matching (STRUCTURED OUTPUT) PURPOSE: Find functions, classes, patterns in code. Returns structured matches with precise locations. USE_WHEN: Know code patterns | Need fast discovery | Want advanced regex DECISION_TREE: 2. UNDERSTAND CODE PATTERNS └─► RIPGREP (mode=discovery, pattern, type filter) ├─► Found → FETCH_CONTENT (use location.charOffset for precision) └─► Empty → VIEW_STRUCTURE (explore dirs) WORKFLOW: Discovery (filesOnly) → Detailed (structured matches) → Read (FETCH_CONTENT with charOffset) OUTPUT FORMAT (TWO-LEVEL PAGINATION): Returns structured matches grouped by file: { "files": [{ "path": "/absolute/path", "matchCount": 25, // Total matches in this file "matches": [/* First 10 matches by default */], "modifiedTime": "2025-01-04T10:00:00Z", "pagination": { // Per-file pagination (if more than matchesPerPage) "currentPage": 1, "totalPages": 3, "matchesPerPage": 10, "hasMore": true } }], "totalMatches": 250, "totalFiles": 50, "pagination": { // File-level pagination "currentPage": 1, "totalPages": 5, "filesPerPage": 10, "totalFiles": 50, "hasMore": true } } PAGINATION MODEL: - Files sorted by modification time (most recent first) - File-level: Control with filesPerPage (default 10) + filePageNumber - Match-level: Control with matchesPerPage (default 10) per file - Each file shows first matchesPerPage matches with pagination info LARGE FILES/CODEBASES: Use filesOnly first, then FETCH_CONTENT with location.charOffset Pattern: Discovery → Structured matches → Precise extraction Match values automatically truncated (default 200 chars, configurable 1-800 via matchContentLength) ⚠️ BYTE OFFSETS: location.charOffset/charLength are BYTE offsets, not character offsets! - For ASCII files: byte offset = character offset ✓ - For UTF-8 with multi-byte chars (é, 中, emoji): byte offset ≠ character offset - Example: "Hello 世界 World" → "World" is at byte 13 but character 7 - FETCH_CONTENT uses byte offsets, so integration works directly MODES: - discovery: filesOnly=true [fastest, minimal output] - paginated: filesPerPage=10, matchesPerPage=10 - detailed: contextLines=3, filesPerPage=10, matchesPerPage=20 PATTERN_TYPES: - smartCase (default): lowercase=case-insensitive, mixed=case-sensitive - fixedString: literal string, no regex - perlRegex: PCRE2 with lookahead/backreferences FILTERS: - type="ts": File type (preferred) - include=["*.{ts,tsx}"]: Globs (use {} for better performance) - excludeDir=["node_modules"]: Skip dirs KEY_PARAMS: - filesOnly: Just paths [BEST for discovery] - maxFiles: Limit total files (1-1000, stops after this many files) - filesPerPage: Files per page (default 10, max 50) - filePageNumber: File page number (default 1) - matchesPerPage: Matches per file (default 10, max 100) - matchContentLength: Max chars per match (default 200, max 800) - contextLines: Context included in match value GOTCHAS: - Files always sorted by modification time (most recent first) - Match values truncated automatically (default 200 chars, use matchContentLength to adjust) - Per-file pagination shows first matchesPerPage matches with info if more available - multiline mode → very slow - Separate globs ["*.ts","*.tsx"] → slower than ["*.{ts,tsx}"] NEXT_STEP: hasResults → FETCH_CONTENT charOffset (STRONG) | FIND_FILES modifiedWithin (MODERATE) empty → VIEW_STRUCTURE (MODERATE) | Broaden: noIgnore, hidden EXAMPLES: mode="discovery", pattern="validateUser", type="ts" # Find files (sorted by date) mode="paginated", pattern="TODO:", filePageNumber=2 # Page 2 of files pattern="export.*function", filesPerPage=5, matchesPerPage=15 # Custom pagination pattern="(?<=export )\\w+", perlRegex=true # Advanced regex pattern="import", path="/node_modules", filesOnly=true, maxFiles=50 # Limit large dirs`; /** * Ripgrep search content query schema * Optimized based on performance research */ export const RipgrepQuerySchema = BaseQuerySchema.extend({ // REQUIRED FIELDS pattern: z .string() .min(1) .describe('Regex pattern or string to search (use fixedString for literals)'), path: z .string() .describe('Root directory to search'), // WORKFLOW MODE (recommended presets) mode: z .enum(['discovery', 'paginated', 'detailed']) .optional() .describe( 'Search workflow mode (auto-configures optimal settings):\n' + ' - "discovery" - Fast file discovery (sets filesOnly=true, minimal output)\n' + ' - "paginated" - Content with pagination (sets charLength=10000, maxMatchesPerFile=3)\n' + ' - "detailed" - Full matches with context (sets contextLines=3, charLength=10000)\n' + 'NOTE: Manual parameters override mode settings.' ), // PATTERN MODES (mutually exclusive - validated at runtime) fixedString: z .boolean() .optional() .describe('Treat pattern as literal string (faster, prevents regex injection). When enabled, ripgrep uses -F flag which treats all characters literally - no regex interpretation, no escaping needed.'), perlRegex: z .boolean() .optional() .describe('Use PCRE2 regex engine (advanced: lookahead, backreferences, named groups)'), // CASE SENSITIVITY (smart case recommended) smartCase: z .boolean() .optional() .default(true) .describe('Smart case: lowercase pattern - case-insensitive, otherwise - case-sensitive (RECOMMENDED default)'), caseInsensitive: z .boolean() .optional() .describe('Always case-insensitive (overrides smartCase)'), caseSensitive: z .boolean() .optional() .describe('Always case-sensitive (overrides smartCase and caseInsensitive)'), // MATCH BEHAVIOR wholeWord: z .boolean() .optional() .describe('Match whole words only (equivalent to \\b boundaries)'), invertMatch: z .boolean() .optional() .describe('Invert matching: show lines that DON\'T match'), // FILE FILTERING (optimized strategies) type: z .string() .optional() .describe('File type filter (e.g., "ts", "js", "py", "rust") - PREFERRED over globs for known types. Use rg --type-list to see all types'), include: z .array(z.string()) .optional() .describe('Include globs. TIP: Use alternatives ["*.{ts,tsx}"] instead of ["*.ts","*.tsx"] for better performance'), exclude: z .array(z.string()) .optional() .describe('Exclude globs (e.g., ["*.test.*", "*.spec.*"])'), excludeDir: z .array(z.string()) .optional() .describe('Exclude directories (e.g., ["node_modules", ".git", "dist"])'), // IGNORE CONTROL (gitignore behavior) noIgnore: z .boolean() .optional() .describe('Don\'t respect .gitignore files (search everything)'), hidden: z .boolean() .optional() .describe('Search hidden files and directories (starting with .)'), followSymlinks: z .boolean() .optional() .describe('Follow symbolic links (default: false for security)'), // OUTPUT CONTROL (critical for performance) filesOnly: z .boolean() .optional() .describe('List matching files only (RECOMMENDED for discovery: most token-efficient)'), filesWithoutMatch: z .boolean() .optional() .describe('List files WITHOUT matches (inverse of filesOnly)'), count: z .boolean() .optional() .describe('Count matches per file (shows "file:count" format)'), countMatches: z .boolean() .optional() .describe('Count total matches across all occurrences (vs count which is per-line)'), // CONTEXT & LINE CONTROL (semantic: defines WHAT to extract) contextLines: z .number() .int() .min(0) .max(50) .optional() .describe('Context lines around matches (0-50). Semantic: defines WHAT to extract. WARNING: Multiplies output significantly! Use charLength for pagination (defines HOW MUCH to return)'), beforeContext: z .number() .int() .min(0) .max(50) .optional() .describe('Lines before match (0-50). Semantic parameter.'), afterContext: z .number() .int() .min(0) .max(50) .optional() .describe('Lines after match (0-50). Semantic parameter.'), matchContentLength: z .number() .int() .min(1) .max(800) .optional() .default(200) .describe('Maximum characters per match value (1-800, default 200). Controls truncation of match content for token efficiency.'), lineNumbers: z .boolean() .optional() .default(true) .describe('Show line numbers (default: true)'), column: z .boolean() .optional() .describe('Show column numbers (useful for IDE integration)'), // MATCH LIMITING (prevents output explosion) maxMatchesPerFile: z .number() .int() .min(1) .max(100) .optional() .describe('Max matches per file (legacy, use matchesPerPage instead)'), maxFiles: z .number() .int() .min(1) .max(1000) .optional() .describe('Max files to search (1-1000, stops after this many files with matches)'), // TWO-LEVEL PAGINATION (file-level + per-file matches) filesPerPage: z .number() .int() .min(1) .max(50) .optional() .default(10) .describe('Number of files per page (default 10, max 50). Files are sorted by modification time (most recent first).'), filePageNumber: z .number() .int() .min(1) .optional() .default(1) .describe('File page number to retrieve (1-based, default 1). Use with filesPerPage for file pagination.'), matchesPerPage: z .number() .int() .min(1) .max(100) .optional() .default(10) .describe('Number of matches to show per file (default 10, max 100). Each file shows up to this many matches with pagination info.'), // ADVANCED FEATURES (use with caution) multiline: z .boolean() .optional() .describe('Enable multiline mode (WARNING: slower, memory-intensive, loads entire file into memory). Only use when pattern genuinely spans lines'), multilineDotall: z .boolean() .optional() .describe('Make . match newlines in multiline mode (use with multiline=true)'), binaryFiles: z .enum(['text', 'without-match', 'binary']) .optional() .default('without-match') .describe('Binary file handling: "text" (search as text), "without-match" (skip, default), "binary" (detect and continue)'), // OUTPUT FORMAT & METADATA includeStats: z .boolean() .optional() .default(true) .describe('Include search statistics (matches, files searched, bytes searched, time). Default: true'), jsonOutput: z .boolean() .optional() .describe('Output in JSON format (NDJSON - newline delimited, structured data for programmatic parsing)'), vimgrepFormat: z .boolean() .optional() .describe('Output in vim-compatible format (file:line:col:text)'), // STRUCTURED DATA (NEW - Enhanced response fields) parseStructured: z .boolean() .optional() .default(true) .describe('Parse JSON output into structured matches (requires jsonOutput). Default: true'), includeDistribution: z .boolean() .optional() .default(true) .describe('Calculate and include match distribution across files. Default: true'), includeStructured: z .boolean() .optional() .default(false) .describe('Include structuredMatches array (verbose, rarely needed). Default: false for efficiency'), // PERFORMANCE TUNING threads: z .number() .int() .min(1) .max(32) .optional() .describe('Number of threads to use (default: auto-detect based on CPU cores)'), mmap: z .boolean() .optional() .describe('Use memory mapping (default: true, faster on large files)'), noUnicode: z .boolean() .optional() .describe( 'Disable Unicode mode for all patterns. ' + 'PERFORMANCE: Faster searches, but \\w only matches ASCII [a-zA-Z0-9_], not Unicode letters. ' + 'TRADEOFF: Won\'t match Unicode identifiers (café, 世界, etc.). ' + 'Useful for pure ASCII codebases or when maximum performance is needed.' ), encoding: z .string() .optional() .describe( 'Text encoding to use. ' + 'Values: "auto" (default, BOM detection), "none" (no encoding detection, raw bytes), ' + 'or specific encoding like "utf-8", "utf-16le", "iso-8859-1", etc. ' + 'PERFORMANCE: Using "none" can be 10-30% faster on large files by skipping BOM detection. ' + 'See: https://encoding.spec.whatwg.org/#names' ), // SORTING sort: z .enum(['path', 'modified', 'accessed', 'created']) .optional() .default('path') .describe( 'Sort results for consistent output. ' + 'Options: "path" (default), "modified", "accessed", "created". ' + '⚠️ PERFORMANCE: Sorting disables parallelism and can be 3-10x slower on large directories. ' + 'Only use when result order matters (e.g., tests, deterministic output).' ), sortReverse: z .boolean() .optional() .describe('Reverse sort order'), // UTILITY FLAGS noMessages: z .boolean() .optional() .describe( 'Suppress error messages (e.g., permission denied, file too large). ' + 'Useful for automated scripts where errors are expected and should be silent.' ), lineRegexp: z .boolean() .optional() .describe( 'Only show matches for entire lines (equivalent to wrapping pattern with ^...$). ' + 'Example: pattern "foo" with lineRegexp=true only matches line "foo", not "foobar".' ), passthru: z .boolean() .optional() .describe( 'Print all lines, whether they match or not, with matches highlighted. ' + 'Useful for viewing context while highlighting matches. ' + 'WARNING: Can produce very large output on large files. Conflicts with filesOnly.' ), debug: z .boolean() .optional() .describe( 'Show debug information: why files were ignored, configuration loaded, ' + 'search strategy used, and performance characteristics. ' + 'Useful for troubleshooting unexpected results or performance issues. ' + 'Debug output goes to stderr.' ), }); /** * Bulk ripgrep search schema (1-10 queries per call) */ export const BulkRipgrepQuerySchema = createBulkQuerySchema( TOOL_NAMES.LOCAL_RIPGREP || 'local_ripgrep', RipgrepQuerySchema ); export type RipgrepQuery = z.infer<typeof RipgrepQuerySchema>; export type BulkRipgrepQuery = z.infer<typeof BulkRipgrepQuerySchema>; /** * Apply workflow mode presets to query * Mode settings are applied first, then overridden by explicit parameters */ export function applyWorkflowMode(query: RipgrepQuery): RipgrepQuery { if (!query.mode) { return query; } const modeDefaults: Partial<RipgrepQuery> = {}; switch (query.mode) { case 'discovery': // Workflow A: Fast file discovery (25x more efficient) modeDefaults.filesOnly = true; modeDefaults.smartCase = true; break; case 'paginated': // Workflow B: Paginated content with sensible limits modeDefaults.filesPerPage = 10; modeDefaults.matchesPerPage = 10; modeDefaults.smartCase = true; break; case 'detailed': // Full matches with context modeDefaults.contextLines = 3; modeDefaults.filesPerPage = 10; modeDefaults.matchesPerPage = 20; modeDefaults.smartCase = true; break; } // Apply mode defaults, but allow explicit parameters to override return { ...modeDefaults, ...query, }; } /** * Validation helper: Check for common misconfigurations */ export function validateRipgrepQuery(query: RipgrepQuery): { isValid: boolean; warnings: string[]; errors: string[]; } { const warnings: string[] = []; const errors: string[] = []; // Mutual exclusivity checks if (query.fixedString && query.perlRegex) { errors.push('fixedString and perlRegex are mutually exclusive. Choose one.'); } if (query.filesOnly && query.count) { warnings.push('filesOnly and count are mutually exclusive. Using filesOnly.'); } if (query.filesOnly && query.filesWithoutMatch) { errors.push('filesOnly and filesWithoutMatch are mutually exclusive. Choose one.'); } if (query.passthru && query.filesOnly) { errors.push('passthru and filesOnly are mutually exclusive.'); } if (query.passthru) { warnings.push( 'passthru prints ALL lines from matched files. ' + 'This can produce very large output. Consider using context lines instead.' ); } if (query.lineRegexp && query.wholeWord) { warnings.push('lineRegexp and wholeWord both specified. lineRegexp takes precedence.'); } // Case sensitivity const caseModes = [query.caseInsensitive, query.caseSensitive, query.smartCase].filter(Boolean); if (caseModes.length > 1) { warnings.push('Multiple case sensitivity modes specified. Priority: caseSensitive > caseInsensitive > smartCase'); } const hasContext = (query.contextLines && query.contextLines > 2) || (query.beforeContext && query.beforeContext > 2) || (query.afterContext && query.afterContext > 2); if (hasContext) { const contentLength = query.matchContentLength || 200; warnings.push( `Context lines enabled (${query.contextLines || query.beforeContext || query.afterContext} lines). ` + `Match values will include context and be truncated to ${contentLength} chars. Use matchesPerPage for pagination.` ); } if (query.multiline) { warnings.push( 'Multiline mode is memory-intensive and slower. ' + 'Entire files are loaded into memory. Only use when pattern genuinely spans multiple lines.' ); } if (query.perlRegex && !query.noUnicode && query.multiline) { warnings.push( 'PERFORMANCE TIP: For fastest PCRE2 multiline searches on ASCII codebases, ' + 'consider using noUnicode=true (2-3x faster).' ); } if (!query.filesOnly && !query.count && !query.maxMatchesPerFile) { warnings.push( 'No output limiting specified. Consider setting maxMatchesPerFile (default: 3) to control output size.' ); } if (query.include && query.include.length > 1) { const allSimpleGlobs = query.include.every(g => g.match(/^\*\.[a-zA-Z0-9]+$/) ); if (allSimpleGlobs && !query.include[0].includes('{')) { const exts = query.include.map(g => g.replace('*.', '')).join(','); warnings.push( `TIP: Consolidate globs for better performance: include=["*.{${exts}}"] instead of separate globs.` ); } } if (query.include && !query.type) { const simpleType = query.include[0]?.match(/^\*\.([a-z]+)$/)?.[1]; const knownTypes = ['ts', 'js', 'py', 'rust', 'go', 'java', 'cpp', 'c']; if (simpleType && knownTypes.includes(simpleType)) { warnings.push( `TIP: Use type="${simpleType}" instead of include glob for cleaner syntax.` ); } } return { isValid: errors.length === 0, warnings, errors, }; } /** * Helper: Estimate match count for pagination recommendations */ export function estimateMatchCount(query: RipgrepQuery, fileCount: number): { estimatedMatches: number; needsPagination: boolean; recommendation: string; } { const AVG_MATCHES_PER_FILE = query.maxMatchesPerFile || 3; const estimatedMatches = fileCount * AVG_MATCHES_PER_FILE; const needsPagination = estimatedMatches > 100; let recommendation = ''; if (needsPagination) { recommendation = `Estimated ${estimatedMatches} matches. Use matchesPerPage parameter for pagination.`; } else if (query.filesOnly) { recommendation = 'Using filesOnly mode - optimal for discovery (~25x more token-efficient)'; } return { estimatedMatches, needsPagination, recommendation, }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bgauryy/local-explorer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

local_ripgrep.ts•20 KiB