import { spawn, ChildProcess } from 'child_process';
import path from 'path';
import fs from 'fs/promises';
import { validatePath } from './tools/filesystem.js';
import { capture } from './utils/capture.js';
import { getRipgrepPath } from './utils/ripgrep-resolver.js';
import { isExcelFile } from './utils/files/index.js';
export interface SearchResult {
file: string;
line?: number;
match?: string;
type: 'file' | 'content';
}
export interface SearchSession {
id: string;
process: ChildProcess;
results: SearchResult[];
isComplete: boolean;
isError: boolean;
error?: string;
startTime: number;
lastReadTime: number;
options: SearchSessionOptions;
buffer: string; // For processing incomplete JSON lines
totalMatches: number;
totalContextLines: number; // Track context lines separately
wasIncomplete?: boolean; // NEW: Track if search was incomplete due to permissions/access issues
}
export interface SearchSessionOptions {
rootPath: string;
pattern: string;
searchType: 'files' | 'content';
filePattern?: string;
ignoreCase?: boolean;
maxResults?: number;
includeHidden?: boolean;
contextLines?: number;
timeout?: number;
earlyTermination?: boolean; // Stop search early when exact filename match is found
literalSearch?: boolean; // Force literal string matching (-F flag) instead of regex
}
/**
* Search Session Manager - handles ripgrep processes like terminal sessions
* Supports both file search and content search with progressive results
*/export class SearchManager {
private sessions = new Map<string, SearchSession>();
private sessionCounter = 0;
/**
* Start a new search session (like start_process)
* Returns immediately with initial state and results
*/
async startSearch(options: SearchSessionOptions): Promise<{
sessionId: string;
isComplete: boolean;
isError: boolean;
results: SearchResult[];
totalResults: number;
runtime: number;
}> {
const sessionId = `search_${++this.sessionCounter}_${Date.now()}`;
// Validate path first
const validPath = await validatePath(options.rootPath);
// Build ripgrep arguments
const args = this.buildRipgrepArgs({ ...options, rootPath: validPath });
// Get ripgrep path with fallback resolution
let rgPath: string;
try {
rgPath = await getRipgrepPath();
} catch (err) {
throw new Error(`Failed to locate ripgrep binary: ${err instanceof Error ? err.message : String(err)}`);
}
// Start ripgrep process
const rgProcess = spawn(rgPath, args);
if (!rgProcess.pid) {
throw new Error('Failed to start ripgrep process');
}
// Create session
const session: SearchSession = {
id: sessionId,
process: rgProcess,
results: [],
isComplete: false,
isError: false,
startTime: Date.now(),
lastReadTime: Date.now(),
options,
buffer: '',
totalMatches: 0,
totalContextLines: 0
};
this.sessions.set(sessionId, session);
// Set up process event handlers
this.setupProcessHandlers(session);
// Start cleanup interval now that we have a session
startCleanupIfNeeded();
// Set up timeout if specified and auto-terminate
// For exact filename searches, use a shorter default timeout
const timeoutMs = options.timeout ?? (this.isExactFilename(options.pattern) ? 1500 : undefined);
let killTimer: NodeJS.Timeout | null = null;
if (timeoutMs) {
killTimer = setTimeout(() => {
if (!session.isComplete && !session.process.killed) {
session.process.kill('SIGTERM');
}
}, timeoutMs);
}
// Clear timer on process completion
session.process.once('close', () => {
if (killTimer) {
clearTimeout(killTimer);
killTimer = null;
}
});
session.process.once('error', () => {
if (killTimer) {
clearTimeout(killTimer);
killTimer = null;
}
});
capture('search_session_started', {
sessionId,
searchType: options.searchType,
hasTimeout: !!timeoutMs,
timeoutMs,
requestedPath: options.rootPath,
validatedPath: validPath
});
// For content searches, only search Excel files when contextually relevant:
// - filePattern explicitly targets Excel files (*.xlsx, *.xls, etc.)
// - or rootPath is an Excel file itself
const shouldSearchExcel = options.searchType === 'content' &&
this.shouldIncludeExcelSearch(options.filePattern, validPath);
if (shouldSearchExcel) {
this.searchExcelFiles(
validPath,
options.pattern,
options.ignoreCase !== false,
options.maxResults,
options.filePattern // Pass filePattern to filter Excel files too
).then(excelResults => {
// Add Excel results to session (merged after initial response)
for (const result of excelResults) {
session.results.push(result);
session.totalMatches++;
}
}).catch((err) => {
// Log Excel search errors but don't fail the whole search
capture('excel_search_error', { error: err instanceof Error ? err.message : String(err) });
});
}
// Wait for first chunk of data or early completion instead of fixed delay
// Excel search runs in background and results are merged via readSearchResults
const firstChunk = new Promise<void>(resolve => {
const onData = () => {
session.process.stdout?.off('data', onData);
resolve();
};
session.process.stdout?.once('data', onData);
setTimeout(resolve, 40); // cap at 40ms instead of 50-100ms
});
// Only wait for ripgrep first chunk - Excel results merge asynchronously
await firstChunk;
return {
sessionId,
isComplete: session.isComplete,
isError: session.isError,
results: [...session.results],
totalResults: session.totalMatches,
runtime: Date.now() - session.startTime
};
}
/**
* Read search results with offset-based pagination (like read_file)
* Supports both range reading and tail behavior
*/
readSearchResults(
sessionId: string,
offset: number = 0,
length: number = 100
): {
results: SearchResult[];
returnedCount: number; // Renamed from newResultsCount
totalResults: number;
totalMatches: number; // Actual matches (excluding context)
isComplete: boolean;
isError: boolean;
error?: string;
hasMoreResults: boolean; // New field
runtime: number;
wasIncomplete?: boolean; // NEW: Indicates if search was incomplete due to permissions
} {
const session = this.sessions.get(sessionId);
if (!session) {
throw new Error(`Search session ${sessionId} not found`);
}
// Get all results (excluding internal markers)
const allResults = session.results.filter(r => r.file !== '__LAST_READ_MARKER__');
// Handle negative offsets (tail behavior) - like file reading
if (offset < 0) {
const tailCount = Math.abs(offset);
const tailResults = allResults.slice(-tailCount);
return {
results: tailResults,
returnedCount: tailResults.length,
totalResults: session.totalMatches + session.totalContextLines,
totalMatches: session.totalMatches, // Actual matches only
isComplete: session.isComplete,
isError: session.isError && !!session.error?.trim(), // Only error if we have actual errors
error: session.error?.trim() || undefined,
hasMoreResults: false, // Tail always returns what's available
runtime: Date.now() - session.startTime,
wasIncomplete: session.wasIncomplete
};
}
// Handle positive offsets (range behavior) - like file reading
const slicedResults = allResults.slice(offset, offset + length);
const hasMoreResults = offset + length < allResults.length || !session.isComplete;
session.lastReadTime = Date.now();
return {
results: slicedResults,
returnedCount: slicedResults.length,
totalResults: session.totalMatches + session.totalContextLines,
totalMatches: session.totalMatches, // Actual matches only
isComplete: session.isComplete,
isError: session.isError && !!session.error?.trim(), // Only error if we have actual errors
error: session.error?.trim() || undefined,
hasMoreResults,
runtime: Date.now() - session.startTime,
wasIncomplete: session.wasIncomplete
};
}
/**
* Terminate a search session (like force_terminate)
*/
terminateSearch(sessionId: string): boolean {
const session = this.sessions.get(sessionId);
if (!session) {
return false;
}
if (!session.process.killed) {
session.process.kill('SIGTERM');
}
// Don't delete session immediately - let user read final results
// It will be cleaned up by cleanup process
return true;
}
/**
* Get list of active search sessions (like list_sessions)
*/
listSearchSessions(): Array<{
id: string;
searchType: string;
pattern: string;
isComplete: boolean;
isError: boolean;
runtime: number;
totalResults: number;
}> {
return Array.from(this.sessions.values()).map(session => ({
id: session.id,
searchType: session.options.searchType,
pattern: session.options.pattern,
isComplete: session.isComplete,
isError: session.isError,
runtime: Date.now() - session.startTime,
totalResults: session.totalMatches + session.totalContextLines
}));
}
/**
* Search Excel files for content matches
* Called during content search to include Excel files alongside text files
* Searches ALL sheets in each Excel file (row-wise for cross-column matching)
*
* TODO: Refactor - Extract Excel search logic to separate module (src/utils/search/excel-search.ts)
* and inject into SearchManager, similar to how file handlers are structured in src/utils/files/
* This would allow adding other file type searches (PDF, etc.) without bloating search-manager.ts
*/
private async searchExcelFiles(
rootPath: string,
pattern: string,
ignoreCase: boolean,
maxResults?: number,
filePattern?: string
): Promise<SearchResult[]> {
const results: SearchResult[] = [];
// Build regex for matching content
const flags = ignoreCase ? 'i' : '';
let regex: RegExp;
try {
regex = new RegExp(pattern, flags);
} catch {
// If pattern is not valid regex, escape it for literal matching
const escaped = pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
regex = new RegExp(escaped, flags);
}
// Find Excel files recursively
let excelFiles = await this.findExcelFiles(rootPath);
// Filter by filePattern if provided
if (filePattern) {
const patterns = filePattern.split('|').map(p => p.trim()).filter(Boolean);
excelFiles = excelFiles.filter(filePath => {
const fileName = path.basename(filePath);
return patterns.some(pat => {
// Support glob-like patterns
if (pat.includes('*')) {
const regexPat = pat.replace(/\./g, '\\.').replace(/\*/g, '.*');
return new RegExp(`^${regexPat}$`, 'i').test(fileName);
}
// Exact match (case-insensitive)
return fileName.toLowerCase() === pat.toLowerCase();
});
});
}
// Dynamically import ExcelJS to search all sheets
const ExcelJS = await import('exceljs');
for (const filePath of excelFiles) {
if (maxResults && results.length >= maxResults) break;
try {
const workbook = new ExcelJS.default.Workbook();
await workbook.xlsx.readFile(filePath);
// Search ALL sheets in the workbook (row-wise for speed and cross-column matching)
for (const worksheet of workbook.worksheets) {
if (maxResults && results.length >= maxResults) break;
const sheetName = worksheet.name;
// Iterate through rows (faster than cell-by-cell)
worksheet.eachRow({ includeEmpty: false }, (row, rowNumber) => {
if (maxResults && results.length >= maxResults) return;
// Build a concatenated string of all cell values in the row
const rowValues: string[] = [];
row.eachCell({ includeEmpty: false }, (cell) => {
if (cell.value === null || cell.value === undefined) return;
let cellStr: string;
if (typeof cell.value === 'object') {
if ('result' in cell.value) {
cellStr = String(cell.value.result ?? '');
} else if ('richText' in cell.value) {
cellStr = (cell.value as any).richText.map((rt: any) => rt.text).join('');
} else if ('text' in cell.value) {
cellStr = String((cell.value as any).text);
} else {
cellStr = String(cell.value);
}
} else {
cellStr = String(cell.value);
}
if (cellStr.trim()) {
rowValues.push(cellStr);
}
});
// Join all cell values with space for cross-column matching
const rowText = rowValues.join(' ');
if (regex.test(rowText)) {
// Extract the matching portion for display
const match = rowText.match(regex);
const matchContext = match
? this.getMatchContext(rowText, match.index || 0, match[0].length)
: rowText.substring(0, 150);
results.push({
file: `${filePath}:${sheetName}!Row${rowNumber}`,
line: rowNumber,
match: matchContext,
type: 'content'
});
}
});
}
} catch (error) {
// Skip files that can't be read (permission issues, corrupted, etc.)
continue;
}
}
return results;
}
/**
* Find all Excel files in a directory recursively
*/
private async findExcelFiles(rootPath: string): Promise<string[]> {
const excelFiles: string[] = [];
async function walk(dir: string): Promise<void> {
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
// Skip node_modules, .git, etc.
if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
await walk(fullPath);
}
} else if (entry.isFile() && isExcelFile(entry.name)) {
excelFiles.push(fullPath);
}
}
} catch {
// Skip directories we can't read
}
}
// Check if rootPath is a file or directory
try {
const stats = await fs.stat(rootPath);
if (stats.isFile() && isExcelFile(rootPath)) {
return [rootPath];
} else if (stats.isDirectory()) {
await walk(rootPath);
}
} catch {
// Path doesn't exist or can't be accessed
}
return excelFiles;
}
/**
* Extract context around a match for display (show surrounding text)
*/
private getMatchContext(text: string, matchStart: number, matchLength: number): string {
const contextChars = 50; // chars before and after match
const start = Math.max(0, matchStart - contextChars);
const end = Math.min(text.length, matchStart + matchLength + contextChars);
let context = text.substring(start, end);
// Add ellipsis if truncated
if (start > 0) context = '...' + context;
if (end < text.length) context = context + '...';
return context;
}
/**
* Clean up completed sessions older than specified time
* Called automatically by cleanup interval
*/
cleanupSessions(maxAge: number = 5 * 60 * 1000): void {
const cutoffTime = Date.now() - maxAge;
for (const [sessionId, session] of this.sessions) {
if (session.isComplete && session.lastReadTime < cutoffTime) {
this.sessions.delete(sessionId);
}
}
}
/**
* Get total number of active sessions (excluding completed ones)
*/
getActiveSessionCount(): number {
return Array.from(this.sessions.values()).filter(session => !session.isComplete).length;
}
/**
* Detect if pattern looks like an exact filename
* (has file extension and no glob wildcards)
*/
private isExactFilename(pattern: string): boolean {
return /\.[a-zA-Z0-9]+$/.test(pattern) &&
!this.isGlobPattern(pattern);
}
/**
* Detect if pattern contains glob wildcards
*/
private isGlobPattern(pattern: string): boolean {
return pattern.includes('*') ||
pattern.includes('?') ||
pattern.includes('[') ||
pattern.includes('{') ||
pattern.includes(']') ||
pattern.includes('}');
}
/**
* Determine if Excel search should be included based on context
* Only searches Excel files when:
* - filePattern explicitly targets Excel files (*.xlsx, *.xls, *.xlsm, *.xlsb)
* - or the rootPath itself is an Excel file
*/
private shouldIncludeExcelSearch(filePattern?: string, rootPath?: string): boolean {
const excelExtensions = ['.xlsx', '.xls', '.xlsm', '.xlsb'];
// Check if rootPath is an Excel file
if (rootPath) {
const lowerPath = rootPath.toLowerCase();
if (excelExtensions.some(ext => lowerPath.endsWith(ext))) {
return true;
}
}
// Check if filePattern targets Excel files
if (filePattern) {
const lowerPattern = filePattern.toLowerCase();
// Check for patterns like *.xlsx, *.xls, or explicit Excel extensions
if (excelExtensions.some(ext =>
lowerPattern.includes(`*${ext}`) ||
lowerPattern.endsWith(ext)
)) {
return true;
}
}
return false;
}
private buildRipgrepArgs(options: SearchSessionOptions): string[] {
const args: string[] = [];
if (options.searchType === 'content') {
// Content search mode
args.push('--json', '--line-number');
// Add literal search support for content searches
if (options.literalSearch) {
args.push('-F'); // Fixed string matching (literal)
}
if (options.contextLines && options.contextLines > 0) {
args.push('-C', options.contextLines.toString());
}
} else {
// File search mode
args.push('--files');
}
// Case-insensitive: content searches use -i flag, file searches use --iglob
if (options.searchType === 'content' && options.ignoreCase !== false) {
args.push('-i');
}
if (options.includeHidden) {
args.push('--hidden');
}
if (options.maxResults && options.maxResults > 0) {
args.push('-m', options.maxResults.toString());
}
// File pattern filtering (for file type restrictions like *.js, *.d.ts)
if (options.filePattern) {
const patterns = options.filePattern
.split('|')
.map(p => p.trim())
.filter(Boolean);
for (const p of patterns) {
if (options.searchType === 'content') {
args.push('-g', p);
} else {
// For file search: use --iglob for case-insensitive or --glob for case-sensitive
if (options.ignoreCase !== false) {
args.push('--iglob', p);
} else {
args.push('--glob', p);
}
}
}
}
// Handle the main search pattern
if (options.searchType === 'files') {
// For file search: determine how to treat the pattern
const globFlag = options.ignoreCase !== false ? '--iglob' : '--glob';
if (this.isExactFilename(options.pattern)) {
// Exact filename: use appropriate glob flag with the exact pattern
args.push(globFlag, options.pattern);
} else if (this.isGlobPattern(options.pattern)) {
// Already a glob pattern: use appropriate glob flag as-is
args.push(globFlag, options.pattern);
} else {
// Substring/fuzzy search: wrap with wildcards
args.push(globFlag, `*${options.pattern}*`);
}
// Add the root path for file mode
args.push(options.rootPath);
} else {
// Content search: terminate options before the pattern to prevent
// patterns starting with '-' being interpreted as flags
args.push('--', options.pattern, options.rootPath);
}
return args;
}
private setupProcessHandlers(session: SearchSession): void {
const { process } = session;
process.stdout?.on('data', (data: Buffer) => {
session.buffer += data.toString();
this.processBufferedOutput(session);
});
process.stderr?.on('data', (data: Buffer) => {
const errorText = data.toString();
// Store error text for potential user display, but don't capture individual errors
// We'll capture incomplete search status in the completion event instead
session.error = (session.error || '') + errorText;
// Filter meaningful errors
const filteredErrors = errorText
.split('\n')
.filter(line => {
const trimmed = line.trim();
// Skip empty lines and lines with just symbols/numbers/colons
if (!trimmed || trimmed.match(/^[\)\(\s\d:]*$/)) return false;
// Skip all ripgrep system errors that start with "rg:"
if (trimmed.startsWith('rg:')) return false;
return true;
});
// Only add to session.error if there are actual meaningful errors after filtering
if (filteredErrors.length > 0) {
const meaningfulErrors = filteredErrors.join('\n').trim();
if (meaningfulErrors) {
session.error = (session.error || '') + meaningfulErrors + '\n';
capture('search_session_error', {
sessionId: session.id,
error: meaningfulErrors.substring(0, 200)
});
}
}
});
process.on('close', (code: number) => {
// Process any remaining buffer content
if (session.buffer.trim()) {
this.processBufferedOutput(session, true);
}
session.isComplete = true;
// Track if search was incomplete due to access issues
// Ripgrep exit code 2 means "some files couldn't be searched"
if (code === 2) {
session.wasIncomplete = true;
}
// Only treat as error if:
// 1. Unexpected exit code (not 0, 1, or 2) AND
// 2. We have meaningful errors after filtering AND
// 3. We found no results at all
if (code !== 0 && code !== 1 && code !== 2) {
// Codes 0=success, 1=no matches, 2=some files couldn't be searched
if (session.error?.trim() && session.totalMatches === 0) {
session.isError = true;
session.error = session.error || `ripgrep exited with code ${code}`;
}
}
// If we have results, don't mark as error even if there were permission issues
if (session.totalMatches > 0) {
session.isError = false;
}
capture('search_session_completed', {
sessionId: session.id,
exitCode: code,
totalResults: session.totalMatches + session.totalContextLines,
totalMatches: session.totalMatches,
runtime: Date.now() - session.startTime,
wasIncomplete: session.wasIncomplete || false // NEW: Track incomplete searches
});
// Rely on cleanupSessions(maxAge) only; no per-session timer
});
process.on('error', (error: Error) => {
session.isComplete = true;
session.isError = true;
session.error = `Process error: ${error.message}`;
// Rely on cleanupSessions(maxAge) only; no per-session timer
});
}
private processBufferedOutput(session: SearchSession, isFinal: boolean = false): void {
const lines = session.buffer.split('\n');
// Keep the last incomplete line in the buffer unless this is final processing
if (!isFinal) {
session.buffer = lines.pop() || '';
} else {
session.buffer = '';
}
for (const line of lines) {
if (!line.trim()) continue;
const result = this.parseLine(line, session.options.searchType);
if (result) {
session.results.push(result);
// Separate counting of matches vs context lines
if (result.type === 'content' && line.includes('"type":"context"')) {
session.totalContextLines++;
} else {
session.totalMatches++;
}
// Early termination for exact filename matches (if enabled)
if (session.options.earlyTermination !== false && // Default to true
session.options.searchType === 'files' &&
this.isExactFilename(session.options.pattern)) {
const pat = path.normalize(session.options.pattern);
const filePath = path.normalize(result.file);
const ignoreCase = session.options.ignoreCase !== false;
const ends = ignoreCase
? filePath.toLowerCase().endsWith(pat.toLowerCase())
: filePath.endsWith(pat);
if (ends) {
// Found exact match, terminate search early
setTimeout(() => {
if (!session.process.killed) {
session.process.kill('SIGTERM');
}
}, 100); // Small delay to allow any remaining results
break;
}
}
}
}
}
private parseLine(line: string, searchType: 'files' | 'content'): SearchResult | null {
if (searchType === 'content') {
// Parse JSON output from content search
try {
const parsed = JSON.parse(line);
if (parsed.type === 'match') {
// Handle multiple submatches per line - return first submatch
const submatch = parsed.data?.submatches?.[0];
return {
file: parsed.data.path.text,
line: parsed.data.line_number,
match: submatch?.match?.text || parsed.data.lines.text,
type: 'content'
};
}
if (parsed.type === 'context') {
return {
file: parsed.data.path.text,
line: parsed.data.line_number,
match: parsed.data.lines.text.trim(),
type: 'content'
};
}
// Handle summary to reconcile totals
if (parsed.type === 'summary') {
// Optional: could reconcile totalMatches with parsed.data.stats?.matchedLines
return null;
}
return null;
} catch (error) {
// Skip invalid JSON lines
return null;
}
} else {
// File search - each line is a file path
return {
file: line.trim(),
type: 'file'
};
}
}
}
// Global search manager instance
export const searchManager = new SearchManager();
// Cleanup management - run on fixed schedule
let cleanupInterval: NodeJS.Timeout | null = null;
/**
* Start cleanup interval - now runs on fixed schedule
*/
function startCleanupIfNeeded(): void {
if (!cleanupInterval) {
cleanupInterval = setInterval(() => {
searchManager.cleanupSessions();
}, 5 * 60 * 1000);
// Also check immediately after a short delay (let search process finish)
setTimeout(() => {
searchManager.cleanupSessions();
}, 1000);
}
}