import { RipgrepCommandBuilder } from '../commands/RipgrepCommandBuilder.js';
import { safeExec } from '../utils/exec.js';
import { getToolHints, getLargeFileWorkflowHints } from './hints.js';
import {
validateRipgrepQuery,
applyWorkflowMode,
type RipgrepQuery,
} from '../scheme/local_ripgrep.js';
import {
validateToolPath,
createErrorResult,
} from '../utils/toolHelpers.js';
import { RESOURCE_LIMITS } from '../constants.js';
import type {
SearchContentResult,
SearchStats,
RipgrepFileMatches,
RipgrepMatch,
} from '../types.js';
import { promises as fs } from 'fs';
import { join } from 'path';
export async function searchContentRipgrep(
query: RipgrepQuery
): Promise<SearchContentResult> {
const configuredQuery = applyWorkflowMode(query);
try {
const validation = validateRipgrepQuery(configuredQuery);
if (!validation.isValid) {
return createErrorResult(
new Error(`Query validation failed: ${validation.errors.join(', ')}`),
'LOCAL_RIPGREP',
configuredQuery,
{ warnings: validation.warnings }
) as SearchContentResult;
}
const pathValidation = validateToolPath(configuredQuery, 'LOCAL_RIPGREP');
if (!pathValidation.isValid) {
return {
...pathValidation.errorResult,
warnings: validation.warnings,
} as SearchContentResult;
}
const dirStats = await estimateDirectoryStats(configuredQuery.path);
const chunkingWarnings: string[] = [];
if (dirStats.isLarge && !configuredQuery.filesOnly) {
chunkingWarnings.push(
`Large directory detected (~${Math.round(dirStats.estimatedSizeMB)}MB, ~${dirStats.estimatedFileCount} files). Consider chunking workflow for better performance.`
);
chunkingWarnings.push(...getLargeFileWorkflowHints('search'));
}
const builder = new RipgrepCommandBuilder();
const { command, args } = builder.fromQuery(configuredQuery).build();
if (process.env.DEBUG) {
console.log(`[RIPGREP] Command: ${command} ${args.join(' ')}`);
if (validation.warnings.length > 0) {
console.warn('[RIPGREP] Warnings:', validation.warnings);
}
}
const result = await safeExec(command, args);
if (!result.success || !result.stdout.trim()) {
return {
status: 'empty',
searchPath: configuredQuery.path,
searchEngine: 'rg',
warnings: [...validation.warnings, ...chunkingWarnings],
researchGoal: configuredQuery.researchGoal,
reasoning: configuredQuery.reasoning,
hints: [
...getToolHints('LOCAL_RIPGREP', 'empty'),
'π‘ Refine your search:',
' - Try broadening the pattern (e.g., use partial words)',
' - Check case sensitivity (use caseInsensitive=true)',
' - Use noIgnore=true to search ignored files',
' - Use hidden=true to search hidden files',
].filter(Boolean),
};
}
const { files: parsedFiles } = parseRipgrepJson(result.stdout, configuredQuery);
const filesWithMetadata = await Promise.all(
parsedFiles.map(async (f) => ({
...f,
modifiedTime: await getFileModifiedTime(f.path),
}))
);
filesWithMetadata.sort((a, b) => {
if (!a.modifiedTime || !b.modifiedTime) return 0;
return new Date(b.modifiedTime).getTime() - new Date(a.modifiedTime).getTime();
});
// Apply maxFiles limit if specified
let limitedFiles = filesWithMetadata;
let wasLimited = false;
if (configuredQuery.maxFiles && filesWithMetadata.length > configuredQuery.maxFiles) {
limitedFiles = filesWithMetadata.slice(0, configuredQuery.maxFiles);
wasLimited = true;
}
const totalFiles = limitedFiles.length;
const totalMatches = limitedFiles.reduce((sum, f) => sum + f.matchCount, 0);
const filesPerPage = configuredQuery.filesPerPage || RESOURCE_LIMITS.DEFAULT_FILES_PER_PAGE;
const filePageNumber = configuredQuery.filePageNumber || 1;
const totalFilePages = Math.ceil(totalFiles / filesPerPage);
const startIdx = (filePageNumber - 1) * filesPerPage;
const endIdx = Math.min(startIdx + filesPerPage, totalFiles);
const paginatedFiles = limitedFiles.slice(startIdx, endIdx);
const matchesPerPage = configuredQuery.matchesPerPage || RESOURCE_LIMITS.DEFAULT_MATCHES_PER_PAGE;
const finalFiles: RipgrepFileMatches[] = paginatedFiles.map(file => {
const totalFileMatches = file.matches.length;
const totalMatchPages = Math.ceil(totalFileMatches / matchesPerPage);
const paginatedMatches = configuredQuery.filesOnly ? [] : file.matches.slice(0, matchesPerPage);
return {
path: file.path,
matchCount: totalFileMatches,
matches: paginatedMatches,
modifiedTime: file.modifiedTime,
pagination: !configuredQuery.filesOnly && totalFileMatches > matchesPerPage ? {
currentPage: 1,
totalPages: totalMatchPages,
matchesPerPage,
totalMatches: totalFileMatches,
hasMore: totalMatchPages > 1,
} : undefined,
};
});
const paginationHints = [
`π File page ${filePageNumber} of ${totalFilePages} (showing ${finalFiles.length} of ${totalFiles} files)`,
`π Total: ${totalMatches} matches across ${totalFiles} files`,
filePageNumber < totalFilePages ? `βΆοΈ Next files: Use filePageNumber=${filePageNumber + 1}` : 'β
Final file page',
];
// Add hint if files were limited
if (wasLimited) {
paginationHints.push(`β οΈ Results limited to ${configuredQuery.maxFiles} files (found ${filesWithMetadata.length} matching files)`);
}
const filesWithMoreMatches = finalFiles.filter(f => f.pagination?.hasMore);
if (filesWithMoreMatches.length > 0) {
paginationHints.push(`π‘ ${filesWithMoreMatches.length} file(s) have more matches. Use matchesPerPage to see more per file.`);
}
const refinementHints = _getStructuredResultSizeHints(finalFiles, configuredQuery, totalMatches);
return {
status: 'hasResults',
files: finalFiles,
totalMatches: configuredQuery.filesOnly ? 0 : totalMatches,
totalFiles,
searchPath: configuredQuery.path,
searchEngine: 'rg',
pagination: {
currentPage: filePageNumber,
totalPages: totalFilePages,
filesPerPage,
totalFiles,
hasMore: filePageNumber < totalFilePages,
},
warnings: [...validation.warnings, ...chunkingWarnings],
researchGoal: configuredQuery.researchGoal,
reasoning: configuredQuery.reasoning,
hints: [...paginationHints, ...refinementHints, ...getToolHints('LOCAL_RIPGREP', 'hasResults')],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Provide helpful hints for output size limit errors
if (errorMessage.includes('Output size limit exceeded')) {
return {
status: 'error',
error: 'Search output too large. Refine your search to reduce results.',
searchPath: configuredQuery.path,
searchEngine: 'rg',
researchGoal: configuredQuery.researchGoal,
reasoning: configuredQuery.reasoning,
hints: [
'π‘ Output exceeded 10MB limit. Try these refinements:',
' 1. Use more specific pattern (current pattern may match too many files)',
' 2. Add type filter: type="js" or type="ts" to reduce scope',
' 3. Narrow the search path to a specific subdirectory',
' 4. Use FIND_FILES instead for metadata-only searches',
'',
'π For node_modules searches:',
' - Search specific packages: path="node_modules/@scope/package"',
' - Use type filter + specific pattern: type="ts", pattern="MyClass"',
' - Alternative: Use FIND_FILES with name="*specific-file*"',
'',
'β οΈ Note: maxFiles limits results AFTER parsing, not raw output size.',
' For huge directories, narrow the search path instead.',
'',
'Original error: ' + errorMessage,
],
};
}
return createErrorResult(error, 'LOCAL_RIPGREP', configuredQuery) as SearchContentResult;
}
}
function _getStructuredResultSizeHints(
files: RipgrepFileMatches[],
query: RipgrepQuery,
totalMatches: number
): string[] {
const hints: string[] = [];
if (totalMatches > 100 || files.length > 20) {
hints.push('', 'π‘ Large result set detected. Refine your search:');
if (!query.type && !query.include) hints.push(' - Narrow by file type: type="ts" or include=["*.{ts,tsx}"]');
if (!query.excludeDir?.length) hints.push(' - Exclude directories: excludeDir=["test", "vendor", "generated"]');
if (query.pattern.length < 5) hints.push(' - Use more specific pattern (current pattern is very short)');
}
if (totalMatches > 0 && totalMatches <= 100) hints.push('', 'β¨ Good result size - easy to analyze!');
if (totalMatches > 0) {
const contentLength = query.matchContentLength || RESOURCE_LIMITS.DEFAULT_MATCH_CONTENT_LENGTH;
hints.push('', 'π Integration tips:', ' - Use location.charOffset with FETCH_CONTENT for precise extraction', ' - β οΈ charOffset/charLength are BYTE offsets, not character offsets (matters for UTF-8 multi-byte chars)', ` - Match values truncated to ${contentLength} chars (configurable via matchContentLength: 1-800)`, ' - Line/column numbers provided for human reference');
}
return hints;
}
async function estimateDirectoryStats(dirPath: string): Promise<{
estimatedSizeMB: number;
estimatedFileCount: number;
isLarge: boolean;
}> {
try {
let fileCount = 0;
let totalSize = 0;
const rootEntries = await fs.readdir(dirPath, { withFileTypes: true });
for (const entry of rootEntries) {
if (entry.isFile()) {
try {
const stats = await fs.stat(join(dirPath, entry.name));
totalSize += stats.size;
fileCount++;
} catch {
// Skip inaccessible files
}
} else if (entry.isDirectory() && !entry.name.startsWith('.')) {
try {
const subEntries = await fs.readdir(join(dirPath, entry.name), { withFileTypes: true });
const subFiles = subEntries.filter(e => e.isFile());
fileCount += subFiles.length;
totalSize += subFiles.length * RESOURCE_LIMITS.ESTIMATED_AVG_FILE_SIZE_BYTES;
} catch {
// Skip inaccessible directories
}
}
}
const estimatedSizeMB = totalSize / (1024 * 1024);
const isLarge = estimatedSizeMB > RESOURCE_LIMITS.MAX_RIPGREP_DIRECTORY_SIZE_MB ||
fileCount > RESOURCE_LIMITS.MAX_FILE_COUNT_FOR_SEARCH;
return {
estimatedSizeMB,
estimatedFileCount: fileCount,
isLarge,
};
} catch {
return {
estimatedSizeMB: 0,
estimatedFileCount: 0,
isLarge: false,
};
}
}
interface RipgrepJsonMatch {
type: 'match';
data: {
path: { text: string };
lines: { text: string };
line_number: number;
absolute_offset: number;
submatches: Array<{
match: { text: string };
start: number;
end: number;
}>;
};
}
interface RipgrepJsonContext {
type: 'context';
data: {
path: { text: string };
lines: { text: string };
line_number: number;
absolute_offset: number;
};
}
interface RipgrepJsonBegin {
type: 'begin';
data: {
path: { text: string };
};
}
interface RipgrepJsonEnd {
type: 'end';
data: {
path: { text: string };
stats?: {
elapsed: { human: string };
searches: number;
searches_with_match: number;
};
};
}
interface RipgrepJsonSummary {
type: 'summary';
data: {
elapsed_total: { human: string };
stats: {
elapsed: { human: string };
searches: number;
searches_with_match: number;
bytes_searched: number;
bytes_printed: number;
matched_lines: number;
matches: number;
};
};
}
type RipgrepJsonMessage =
| RipgrepJsonMatch
| RipgrepJsonContext
| RipgrepJsonBegin
| RipgrepJsonEnd
| RipgrepJsonSummary;
function parseRipgrepJson(jsonOutput: string, query: RipgrepQuery): {
files: RipgrepFileMatches[];
stats: SearchStats;
} {
const lines = jsonOutput.trim().split('\n').filter(Boolean);
const fileMap = new Map<string, {
matches: RipgrepMatch[];
contexts: Map<number, string>;
}>();
let stats: SearchStats = {};
for (const line of lines) {
if (!line.trim()) continue;
if (!line.trim().startsWith('{')) {
if (process.env.DEBUG) {
console.warn(`[RIPGREP] Skipping non-JSON line: ${line.substring(0, 100)}`);
}
continue;
}
try {
const msg: RipgrepJsonMessage = JSON.parse(line);
if (msg.type === 'match') {
const path = msg.data.path.text;
const lineText = msg.data.lines.text;
const lineNumber = msg.data.line_number;
const absoluteOffset = msg.data.absolute_offset;
if (!fileMap.has(path)) {
fileMap.set(path, { matches: [], contexts: new Map() });
}
const fileEntry = fileMap.get(path)!;
let matchValue = lineText;
const contextLines: string[] = [];
if (query.contextLines || query.beforeContext) {
const contextBefore = query.contextLines || query.beforeContext || 0;
for (let i = contextBefore; i > 0; i--) {
const contextLine = fileEntry.contexts.get(lineNumber - i);
if (contextLine) contextLines.push(contextLine);
}
}
contextLines.push(lineText);
matchValue = contextLines.join('\n');
const maxLength = query.matchContentLength || RESOURCE_LIMITS.DEFAULT_MATCH_CONTENT_LENGTH;
if (matchValue.length > maxLength) {
matchValue = matchValue.substring(0, maxLength) + '...';
}
const column = msg.data.submatches.length > 0 ? msg.data.submatches[0].start : 0;
// NOTE: absoluteOffset is a BYTE offset from ripgrep, not a character offset!
// For UTF-8 files with multi-byte characters, byte offset β character offset.
// We keep the name "charOffset" for backward compatibility, but it's actually bytes.
// See type definition in src/types.ts for detailed warning.
const charOffset = absoluteOffset;
const matchLength = msg.data.submatches.length > 0 ? msg.data.submatches[0].end - msg.data.submatches[0].start : lineText.length;
fileEntry.matches.push({
value: matchValue,
location: { charOffset, charLength: matchLength },
line: lineNumber,
column,
});
} else if (msg.type === 'context') {
const path = msg.data.path.text;
const lineNumber = msg.data.line_number;
const lineText = msg.data.lines.text;
if (!fileMap.has(path)) {
fileMap.set(path, { matches: [], contexts: new Map() });
}
fileMap.get(path)!.contexts.set(lineNumber, lineText);
} else if (msg.type === 'summary') {
stats = {
matchCount: msg.data.stats.matches,
matchedLines: msg.data.stats.matched_lines,
filesMatched: msg.data.stats.searches_with_match,
filesSearched: msg.data.stats.searches,
bytesSearched: msg.data.stats.bytes_searched,
searchTime: msg.data.stats.elapsed.human,
};
}
} catch (err) {
if (process.env.DEBUG) {
console.warn('[RIPGREP] Failed to parse JSON line:', line.substring(0, 100), err);
}
}
}
const files: RipgrepFileMatches[] = Array.from(fileMap.entries()).map(
([path, entry]) => ({
path,
matchCount: entry.matches.length,
matches: entry.matches,
})
);
return { files, stats };
}
export async function isRipgrepAvailable(): Promise<boolean> {
try {
const result = await safeExec('rg', ['--version'], { timeout: 1000 });
return result.success;
} catch {
return false;
}
}
export async function getRipgrepVersion(): Promise<string | null> {
try {
const result = await safeExec('rg', ['--version'], { timeout: 1000 });
if (result.success) {
const firstLine = result.stdout.split('\n')[0];
return firstLine || null;
}
return null;
} catch {
return null;
}
}
async function getFileModifiedTime(filePath: string): Promise<string | undefined> {
try {
const stats = await fs.stat(filePath);
return stats.mtime.toISOString();
} catch {
return undefined;
}
}