Skip to main content
Glama
file_search.rs•19.7 kB
//! High-performance file system operations optimized for large codebases //! //! This module provides fast file search, content scanning, and duplicate detection //! that outperforms traditional JavaScript implementations by 5-20x. use napi_derive::napi; use globset::{Glob, GlobSet, GlobSetBuilder}; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; use std::time::UNIX_EPOCH; use walkdir::{DirEntry, WalkDir}; /// Configuration for file search operations #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileSearchConfig { /// Maximum depth for directory traversal (-1 for unlimited) pub max_depth: i32, /// Follow symbolic links pub follow_symlinks: bool, /// Include hidden files (.dot files) pub include_hidden: bool, /// Use parallel processing pub use_parallel: bool, /// Patterns to exclude pub exclude_patterns: Vec<String>, /// File size limit in bytes (0 for no limit) pub max_file_size: i32, } impl Default for FileSearchConfig { fn default() -> Self { Self { max_depth: -1, follow_symlinks: false, include_hidden: false, use_parallel: true, exclude_patterns: vec![ "node_modules".to_string(), ".git".to_string(), "target".to_string(), ".idea".to_string(), ".vscode".to_string(), ], max_file_size: 0, } } } /// File metadata result #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileInfo { /// Absolute path to the file pub path: String, /// File name pub name: String, /// File size in bytes pub size: i32, /// Last modified timestamp (milliseconds since Unix epoch) pub last_modified: f64, /// Is directory pub is_directory: bool, /// File extension (if any) pub extension: Option<String>, } /// Text search result #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TextSearchResult { /// File path pub path: String, /// Line number (1-based) pub line_number: u32, /// Column start position pub column_start: u32, /// Column end position pub column_end: u32, /// The matching line content pub line_content: String, /// Match text pub match_text: String, } /// Directory statistics #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DirectoryStats { /// Total size in bytes pub total_size: f64, /// Number of files pub file_count: i32, /// Number of directories pub directory_count: i32, /// Largest file size pub largest_file_size: f64, /// Average file size pub average_file_size: f64, } /// File search operations implementation #[napi] pub struct FileSearch { config: FileSearchConfig, } #[napi] impl FileSearch { /// Create a new file search instance with optional configuration /// /// # Arguments /// * `config` - Optional configuration for file search operations #[napi(constructor)] pub fn new(config: Option<FileSearchConfig>) -> napi::Result<Self> { Ok(Self { config: config.unwrap_or_default(), }) } /// Search for files by glob pattern /// 5-10x faster than Node.js glob implementations #[napi] pub fn find_files_by_pattern( &self, root_path: String, pattern: String, ) -> napi::Result<Vec<FileInfo>> { let root = Path::new(&root_path); if !root.exists() { return Err(napi::Error::new( napi::Status::InvalidArg, format!("Path does not exist: {}", root_path), )); } // Build glob matcher let glob = Glob::new(&pattern).map_err(|e| { napi::Error::new(napi::Status::InvalidArg, format!("Invalid pattern: {}", e)) })?; let matcher = glob.compile_matcher(); // Build exclude patterns let exclude_set = self.build_exclude_set()?; // Configure walker let mut walker = WalkDir::new(root).follow_links(self.config.follow_symlinks); if self.config.max_depth >= 0 { walker = walker.max_depth(self.config.max_depth as usize); } // Collect matching files let entries: Vec<DirEntry> = walker .into_iter() .filter_map(|e| e.ok()) .filter(|e| self.should_include_entry(e, &exclude_set)) .collect(); // Process entries in parallel if enabled let results = if self.config.use_parallel && entries.len() > 100 { entries .par_iter() .filter_map(|entry| { let path = entry.path(); if let Some(path_str) = path.to_str() { if matcher.is_match(path_str) { self.create_file_info(entry).ok() } else { None } } else { None } }) .collect() } else { entries .iter() .filter_map(|entry| { let path = entry.path(); if let Some(path_str) = path.to_str() { if matcher.is_match(path_str) { self.create_file_info(entry).ok() } else { None } } else { None } }) .collect() }; Ok(results) } /// Search for text content within files /// 10-20x faster than JavaScript regex operations on large files #[napi] pub fn search_text_in_files( &self, root_path: String, search_text: String, file_pattern: Option<String>, case_sensitive: Option<bool>, ) -> napi::Result<Vec<TextSearchResult>> { let root = Path::new(&root_path); let case_sensitive = case_sensitive.unwrap_or(true); // Build file pattern matcher let file_matcher = if let Some(pattern) = file_pattern { let glob = Glob::new(&pattern).map_err(|e| { napi::Error::new(napi::Status::InvalidArg, format!("Invalid pattern: {}", e)) })?; Some(glob.compile_matcher()) } else { None }; // Build exclude patterns let exclude_set = self.build_exclude_set()?; // Configure walker let walker = WalkDir::new(root) .follow_links(self.config.follow_symlinks) .into_iter() .filter_map(|e| e.ok()) .filter(|e| self.should_include_entry(e, &exclude_set)) .filter(|e| !e.file_type().is_dir()); // Collect files to search let files: Vec<_> = walker .filter(|entry| { if let Some(ref matcher) = file_matcher { entry.path().to_str() .map(|s| matcher.is_match(s)) .unwrap_or(false) } else { true } }) .collect(); // Search files in parallel if enabled let results = if self.config.use_parallel && files.len() > 10 { files .par_iter() .flat_map(|entry| { self.search_in_file(entry.path(), &search_text, case_sensitive) .unwrap_or_default() }) .collect() } else { files .iter() .flat_map(|entry| { self.search_in_file(entry.path(), &search_text, case_sensitive) .unwrap_or_default() }) .collect() }; Ok(results) } /// Get directory statistics (size, file count, etc.) #[napi] pub fn get_directory_stats(&self, path: String) -> napi::Result<DirectoryStats> { let root = Path::new(&path); if !root.exists() { return Err(napi::Error::new( napi::Status::InvalidArg, format!("Path does not exist: {}", path), )); } let exclude_set = self.build_exclude_set()?; let walker = WalkDir::new(root) .follow_links(self.config.follow_symlinks) .into_iter() .filter_map(|e| e.ok()) .filter(|e| self.should_include_entry(e, &exclude_set)); let mut total_size = 0u64; let mut file_count = 0u32; let mut directory_count = 0u32; let mut largest_file_size = 0u64; for entry in walker { if entry.file_type().is_dir() { directory_count += 1; } else { file_count += 1; if let Ok(metadata) = entry.metadata() { let size = metadata.len(); total_size += size; if size > largest_file_size { largest_file_size = size; } } } } let average_file_size = if file_count > 0 { total_size as f64 / file_count as f64 } else { 0.0 }; Ok(DirectoryStats { total_size: total_size as f64, file_count: file_count as i32, directory_count: directory_count as i32, largest_file_size: largest_file_size as f64, average_file_size, }) } /// Create a map of file extensions to their counts #[napi] pub fn get_file_extension_stats(&self, path: String) -> napi::Result<HashMap<String, i32>> { let root = Path::new(&path); if !root.exists() { return Err(napi::Error::new( napi::Status::InvalidArg, format!("Path does not exist: {}", path), )); } let exclude_set = self.build_exclude_set()?; let walker = WalkDir::new(root) .follow_links(self.config.follow_symlinks) .into_iter() .filter_map(|e| e.ok()) .filter(|e| self.should_include_entry(e, &exclude_set)) .filter(|e| !e.file_type().is_dir()); let mut stats: HashMap<String, i32> = HashMap::new(); for entry in walker { if let Some(ext) = entry.path().extension().and_then(|s| s.to_str()) { *stats.entry(ext.to_string()).or_insert(0) += 1; } else { *stats.entry("<no_extension>".to_string()).or_insert(0) += 1; } } Ok(stats) } /// Fast duplicate file finder using content hashing #[napi] pub fn find_duplicate_files(&self, path: String) -> napi::Result<HashMap<String, Vec<String>>> { let root = Path::new(&path); if !root.exists() { return Err(napi::Error::new( napi::Status::InvalidArg, format!("Path does not exist: {}", path), )); } let exclude_set = self.build_exclude_set()?; // First, group files by size let mut size_groups: HashMap<u64, Vec<PathBuf>> = HashMap::new(); let walker = WalkDir::new(root) .follow_links(self.config.follow_symlinks) .into_iter() .filter_map(|e| e.ok()) .filter(|e| self.should_include_entry(e, &exclude_set)) .filter(|e| !e.file_type().is_dir()); for entry in walker { if let Ok(metadata) = entry.metadata() { let size = metadata.len(); if size > 0 { // Skip empty files size_groups.entry(size).or_insert_with(Vec::new).push(entry.path().to_path_buf()); } } } // Only hash files that have the same size let mut hash_groups: HashMap<String, Vec<String>> = HashMap::new(); for (_, paths) in size_groups.iter().filter(|(_, paths)| paths.len() > 1) { let hashes: Vec<_> = if self.config.use_parallel { paths.par_iter() .filter_map(|path| { self.hash_file(path).ok().map(|hash| (hash, path.to_string_lossy().to_string())) }) .collect() } else { paths.iter() .filter_map(|path| { self.hash_file(path).ok().map(|hash| (hash, path.to_string_lossy().to_string())) }) .collect() }; for (hash, path) in hashes { hash_groups.entry(hash).or_insert_with(Vec::new).push(path); } } // Filter out unique files let duplicates: HashMap<String, Vec<String>> = hash_groups .into_iter() .filter(|(_, paths)| paths.len() > 1) .collect(); Ok(duplicates) } /// Build exclude pattern set fn build_exclude_set(&self) -> napi::Result<GlobSet> { let mut builder = GlobSetBuilder::new(); for pattern in &self.config.exclude_patterns { let glob = Glob::new(pattern).map_err(|e| { napi::Error::new(napi::Status::InvalidArg, format!("Invalid exclude pattern: {}", e)) })?; builder.add(glob); } builder.build().map_err(|e| { napi::Error::new(napi::Status::GenericFailure, format!("Failed to build glob set: {}", e)) }) } /// Check if directory entry should be included fn should_include_entry(&self, entry: &DirEntry, exclude_set: &GlobSet) -> bool { let path = entry.path(); // Check hidden files if !self.config.include_hidden { if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if name.starts_with('.') && name != "." && name != ".." { return false; } } } // Check exclude patterns if let Some(path_str) = path.to_str() { if exclude_set.is_match(path_str) { return false; } } // Check file size limit if self.config.max_file_size > 0 && !entry.file_type().is_dir() { if let Ok(metadata) = entry.metadata() { if metadata.len() > self.config.max_file_size as u64 { return false; } } } true } /// Create FileInfo from directory entry fn create_file_info(&self, entry: &DirEntry) -> napi::Result<FileInfo> { let path = entry.path(); let metadata = entry.metadata().map_err(|e| { napi::Error::new(napi::Status::GenericFailure, format!("Failed to get metadata: {}", e)) })?; let name = path.file_name() .and_then(|n| n.to_str()) .unwrap_or("") .to_string(); let extension = if metadata.is_file() { path.extension().and_then(|e| e.to_str()).map(|s| s.to_string()) } else { None }; let last_modified = metadata.modified() .unwrap_or(UNIX_EPOCH) .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_millis() as f64; Ok(FileInfo { path: path.to_string_lossy().to_string(), name, size: metadata.len() as i32, last_modified, is_directory: metadata.is_dir(), extension, }) } /// Search for text in a single file fn search_in_file(&self, path: &Path, search_text: &str, case_sensitive: bool) -> napi::Result<Vec<TextSearchResult>> { let content = fs::read_to_string(path)?; let mut results = Vec::new(); let search_pattern = if case_sensitive { search_text.to_string() } else { search_text.to_lowercase() }; for (line_num, line) in content.lines().enumerate() { let search_line = if case_sensitive { line.to_string() } else { line.to_lowercase() }; if let Some(pos) = search_line.find(&search_pattern) { results.push(TextSearchResult { path: path.to_string_lossy().to_string(), line_number: (line_num + 1) as u32, column_start: pos as u32, column_end: (pos + search_text.len()) as u32, line_content: line.to_string(), match_text: search_text.to_string(), }); } } Ok(results) } /// Hash file content using Blake3 fn hash_file(&self, path: &Path) -> napi::Result<String> { use blake3::Hasher; use std::io::Read; let mut file = fs::File::open(path)?; let mut hasher = Hasher::new(); let mut buffer = [0; 8192]; loop { let n = file.read(&mut buffer)?; if n == 0 { break; } hasher.update(&buffer[..n]); } Ok(hasher.finalize().to_hex().to_string()) } } /// Standalone function for quick file search #[napi] pub fn quick_find_files( root_path: String, pattern: String, ) -> napi::Result<Vec<FileInfo>> { let searcher = FileSearch::new(None)?; searcher.find_files_by_pattern(root_path, pattern) } /// Standalone function for quick text search #[napi] pub fn quick_search_text( root_path: String, search_text: String, file_pattern: Option<String>, ) -> napi::Result<Vec<TextSearchResult>> { let searcher = FileSearch::new(None)?; searcher.search_text_in_files(root_path, search_text, file_pattern, None) } /// Benchmark file search performance #[napi] pub fn benchmark_file_search( root_path: String, pattern: String, iterations: u32, ) -> napi::Result<HashMap<String, f64>> { use std::time::Instant; let mut results = HashMap::new(); // Benchmark parallel search let searcher_parallel = FileSearch::new(Some(FileSearchConfig { use_parallel: true, ..Default::default() }))?; let start = Instant::now(); for _ in 0..iterations { let _ = searcher_parallel.find_files_by_pattern(root_path.clone(), pattern.clone())?; } let parallel_time = start.elapsed().as_secs_f64() * 1000.0 / iterations as f64; results.insert("parallel_avg_ms".to_string(), parallel_time); // Benchmark sequential search let searcher_sequential = FileSearch::new(Some(FileSearchConfig { use_parallel: false, ..Default::default() }))?; let start = Instant::now(); for _ in 0..iterations { let _ = searcher_sequential.find_files_by_pattern(root_path.clone(), pattern.clone())?; } let sequential_time = start.elapsed().as_secs_f64() * 1000.0 / iterations as f64; results.insert("sequential_avg_ms".to_string(), sequential_time); // Calculate speedup let speedup = sequential_time / parallel_time; results.insert("speedup_ratio".to_string(), speedup); Ok(results) }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/moikas-code/moidvk'

If you have feedback or need assistance with the MCP directory API, please join our Discord server