Rust MCP Filesystem

Overview Schema Related Servers Score Discussions

rust-mcp-filesystem
src

fs_service.rs•59.1 KiB

pub mod file_info; pub mod utils; use crate::{ error::{ServiceError, ServiceResult}, fs_service::utils::is_system_metadata_file, tools::EditOperation, }; use async_zip::tokio::{read::seek::ZipFileReader, write::ZipFileWriter}; use base64::{engine::general_purpose, write::EncoderWriter}; use file_info::FileInfo; use futures::{StreamExt, stream}; use glob_match::glob_match; use grep::{ matcher::{Match, Matcher}, regex::RegexMatcherBuilder, searcher::{BinaryDetection, Searcher, sinks::UTF8}, }; use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; use rust_mcp_sdk::schema::RpcError; use serde_json::{Value, json}; use sha2::{Digest, Sha256}; use similar::TextDiff; use std::{ collections::{HashMap, HashSet}, env, fs::{self}, io::{SeekFrom, Write}, path::{Path, PathBuf}, sync::Arc, }; use tokio::{ fs::{File, metadata}, io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}, sync::RwLock, }; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use utils::{ contains_symlink, expand_home, format_bytes, normalize_line_endings, normalize_path, write_zip_entry, }; use walkdir::WalkDir; const SNIPPET_MAX_LENGTH: usize = 200; const SNIPPET_BACKWARD_CHARS: usize = 30; const MAX_CONCURRENT_FILE_READ: usize = 5; #[cfg(windows)] pub const OS_LINE_ENDING: &str = "\r\n"; #[cfg(not(windows))] pub const OS_LINE_ENDING: &str = "\n"; type PathResultList = Vec<Result<PathBuf, ServiceError>>; pub struct FileSystemService { allowed_path: RwLock<Arc<Vec<PathBuf>>>, } /// Represents a single match found in a file's content. #[derive(Debug, Clone)] pub struct ContentMatchResult { /// The line number where the match occurred (1-based). pub line_number: u64, pub start_pos: usize, /// The line of text containing the match. /// If the line exceeds 255 characters (excluding the search term), only a truncated portion will be shown. pub line_text: String, } /// Represents all matches found in a specific file. #[derive(Debug, Clone)] pub struct FileSearchResult { /// The path to the file where matches were found. pub file_path: PathBuf, /// All individual match results within the file. pub matches: Vec<ContentMatchResult>, } /// This addresses the issue with the DockerHub mcp-registry & mcp-gateway where tool discovery fails to resolve /// references to 'example' or 'default' values when running the run->command from the server.yaml file /// should be removed once mcp-gateway is more mature /// reference: https://github.com/docker/mcp-registry/blob/7d815fac2f3b7a9717eebc3f3db215de3ce3c3c7/internal/mcp/client.go#L170-L173 #[allow(clippy::ptr_arg)] fn fix_dockerhub_mcp_registry_gateway(input: &String) -> &str { if input.contains("{{rust-mcp-filesystem.allowed_directories|volume-target|into}}") { "." } else { input } } impl FileSystemService { pub fn try_new(allowed_directories: &[String]) -> ServiceResult<Self> { let normalized_dirs: Vec<PathBuf> = allowed_directories .iter() .map(fix_dockerhub_mcp_registry_gateway) .map_while(|dir| { let expand_result = expand_home(dir.into()); if !expand_result.is_dir() { panic!("{}", format!("Error: {dir} is not a directory")); } Some(expand_result) }) .collect(); Ok(Self { allowed_path: RwLock::new(Arc::new(normalized_dirs)), }) } pub async fn allowed_directories(&self) -> Arc<Vec<PathBuf>> { let guard = self.allowed_path.read().await; guard.clone() } } impl FileSystemService { pub fn valid_roots(&self, roots: Vec<&str>) -> ServiceResult<(Vec<PathBuf>, Option<String>)> { let paths: Vec<Result<PathBuf, ServiceError>> = roots .iter() .map(|p| self.parse_file_path(p)) .collect::<Vec<_>>(); // Partition into Ok and Err results let (ok_paths, err_paths): (PathResultList, PathResultList) = paths.into_iter().partition(|p| p.is_ok()); // using HashSet to remove duplicates let (valid_roots, no_dir_roots): (HashSet<PathBuf>, HashSet<PathBuf>) = ok_paths .into_iter() .collect::<Result<Vec<_>, _>>()? .into_iter() .map(expand_home) .partition(|path| path.is_dir()); let skipped_roots = if !err_paths.is_empty() || !no_dir_roots.is_empty() { Some(format!( "Warning: skipped {} invalid roots.", err_paths.len() + no_dir_roots.len() )) } else { None }; let valid_roots = valid_roots.into_iter().collect(); Ok((valid_roots, skipped_roots)) } pub async fn update_allowed_paths(&self, valid_roots: Vec<PathBuf>) { let mut guard = self.allowed_path.write().await; *guard = Arc::new(valid_roots) } /// Converts a string to a `PathBuf`, supporting both raw paths and `file://` URIs. fn parse_file_path(&self, input: &str) -> ServiceResult<PathBuf> { Ok(PathBuf::from( input.strip_prefix("file://").unwrap_or(input).trim(), )) } pub fn validate_path( &self, requested_path: &Path, allowed_directories: Arc<Vec<PathBuf>>, ) -> ServiceResult<PathBuf> { if allowed_directories.is_empty() { return Err(ServiceError::FromString( "Allowed directories list is empty. Client did not provide any valid root directories.".to_string() )); } // Expand ~ to home directory let expanded_path = expand_home(requested_path.to_path_buf()); // Resolve the absolute path let absolute_path = if expanded_path.as_path().is_absolute() { expanded_path.clone() } else { env::current_dir().unwrap().join(&expanded_path) }; // Normalize the path let normalized_requested = normalize_path(&absolute_path); // Check if path is within allowed directories if !allowed_directories.iter().any(|dir| { // Must account for both scenarios — the requested path may not exist yet, making canonicalization impossible. normalized_requested.starts_with(dir) || normalized_requested.starts_with(normalize_path(dir)) }) { let symlink_target = if contains_symlink(&absolute_path)? { "a symlink target path" } else { "path" }; return Err(ServiceError::FromString(format!( "Access denied - {} is outside allowed directories: {} not in {}", symlink_target, absolute_path.display(), allowed_directories .iter() .map(|p| p.display().to_string()) .collect::<Vec<_>>() .join(",\n"), ))); } Ok(absolute_path) } // Get file stats pub async fn get_file_stats(&self, file_path: &Path) -> ServiceResult<FileInfo> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; let metadata = fs::metadata(valid_path)?; let size = metadata.len(); let created = metadata.created().ok(); let modified = metadata.modified().ok(); let accessed = metadata.accessed().ok(); let is_directory = metadata.is_dir(); let is_file = metadata.is_file(); Ok(FileInfo { size, created, modified, accessed, is_directory, is_file, metadata, }) } fn detect_line_ending(&self, text: &str) -> &str { if text.contains("\r\n") { "\r\n" } else if text.contains('\r') { "\r" } else { "\n" } } pub async fn zip_directory( &self, input_dir: String, pattern: String, target_zip_file: String, ) -> ServiceResult<String> { let allowed_directories = self.allowed_directories().await; let valid_dir_path = self.validate_path(Path::new(&input_dir), allowed_directories.clone())?; let input_dir_str = &valid_dir_path .as_os_str() .to_str() .ok_or(std::io::Error::new( std::io::ErrorKind::InvalidInput, "Invalid UTF-8 in file name", ))?; let target_path = self.validate_path(Path::new(&target_zip_file), allowed_directories.clone())?; if target_path.exists() { return Err(std::io::Error::new( std::io::ErrorKind::AlreadyExists, format!("'{target_zip_file}' already exists!"), ) .into()); } let updated_pattern = if pattern.contains('*') { pattern.to_lowercase() } else { format!("*{}*", &pattern.to_lowercase()) }; let glob_pattern = &updated_pattern; let entries: Vec<_> = WalkDir::new(&valid_dir_path) .follow_links(true) .into_iter() .filter_map(|entry| entry.ok()) .filter_map(|entry| { let full_path = entry.path(); self.validate_path(full_path, allowed_directories.clone()) .ok() .and_then(|path| { if path != valid_dir_path && glob_match(glob_pattern, path.display().to_string().as_ref()) { Some(path) } else { None } }) }) .collect(); let zip_file = File::create(&target_path).await?; let mut zip_writer = ZipFileWriter::new(zip_file.compat()); for entry_path_buf in &entries { if entry_path_buf.is_dir() { continue; } let entry_path = entry_path_buf.as_path(); let entry_str = entry_path.as_os_str().to_str().ok_or(std::io::Error::new( std::io::ErrorKind::InvalidInput, "Invalid UTF-8 in file name", ))?; if !entry_str.starts_with(input_dir_str) { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, "Entry file path does not start with base input directory path.", ) .into()); } let entry_str = &entry_str[input_dir_str.len() + 1..]; write_zip_entry(entry_str, entry_path, &mut zip_writer).await?; } let z_file = zip_writer.close().await?; let zip_file_size = if let Ok(meta_data) = z_file.into_inner().metadata().await { format_bytes(meta_data.len()) } else { "unknown".to_string() }; let result_message = format!( "Successfully compressed '{}' directory into '{}' ({}).", input_dir, target_path.display(), zip_file_size ); Ok(result_message) } pub async fn zip_files( &self, input_files: Vec<String>, target_zip_file: String, ) -> ServiceResult<String> { let file_count = input_files.len(); if file_count == 0 { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, "No file(s) to zip. The input files array is empty.", ) .into()); } let allowed_directories = self.allowed_directories().await; let target_path = self.validate_path(Path::new(&target_zip_file), allowed_directories.clone())?; if target_path.exists() { return Err(std::io::Error::new( std::io::ErrorKind::AlreadyExists, format!("'{target_zip_file}' already exists!"), ) .into()); } let source_paths = input_files .iter() .map(|p| self.validate_path(Path::new(p), allowed_directories.clone())) .collect::<Result<Vec<_>, _>>()?; let zip_file = File::create(&target_path).await?; let mut zip_writer = ZipFileWriter::new(zip_file.compat()); for path in source_paths { let filename = path.file_name().ok_or(std::io::Error::new( std::io::ErrorKind::InvalidInput, "Invalid path!", ))?; let filename = filename.to_str().ok_or(std::io::Error::new( std::io::ErrorKind::InvalidInput, "Invalid UTF-8 in file name", ))?; write_zip_entry(filename, &path, &mut zip_writer).await?; } let z_file = zip_writer.close().await?; let zip_file_size = if let Ok(meta_data) = z_file.into_inner().metadata().await { format_bytes(meta_data.len()) } else { "unknown".to_string() }; let result_message = format!( "Successfully compressed {} {} into '{}' ({}).", file_count, if file_count == 1 { "file" } else { "files" }, target_path.display(), zip_file_size ); Ok(result_message) } pub async fn unzip_file(&self, zip_file: &str, target_dir: &str) -> ServiceResult<String> { let allowed_directories = self.allowed_directories().await; let zip_file = self.validate_path(Path::new(&zip_file), allowed_directories.clone())?; let target_dir_path = self.validate_path(Path::new(target_dir), allowed_directories)?; if !zip_file.exists() { return Err(std::io::Error::new( std::io::ErrorKind::NotFound, "Zip file does not exists.", ) .into()); } if target_dir_path.exists() { return Err(std::io::Error::new( std::io::ErrorKind::AlreadyExists, format!("'{target_dir}' directory already exists!"), ) .into()); } let file = BufReader::new(File::open(zip_file).await?); let mut zip = ZipFileReader::with_tokio(file).await?; let file_count = zip.file().entries().len(); for index in 0..file_count { let entry = zip.file().entries().get(index).unwrap(); let entry_path = target_dir_path.join(entry.filename().as_str()?); // Ensure the parent directory exists if let Some(parent) = entry_path.parent() { tokio::fs::create_dir_all(parent).await?; } // Extract the file let reader = zip.reader_without_entry(index).await?; let mut compat_reader = reader.compat(); let mut output_file = File::create(&entry_path).await?; tokio::io::copy(&mut compat_reader, &mut output_file).await?; output_file.flush().await?; } let result_message = format!( "Successfully extracted {} {} into '{}'.", file_count, if file_count == 1 { "file" } else { "files" }, target_dir_path.display() ); Ok(result_message) } pub fn mime_from_path(&self, path: &Path) -> ServiceResult<infer::Type> { let is_svg = path .extension() .is_some_and(|e| e.to_str().is_some_and(|s| s == "svg")); // consider it is a svg file as we cannot detect svg from bytes pattern if is_svg { return Ok(infer::Type::new( infer::MatcherType::Image, "image/svg+xml", "svg", |_: &[u8]| true, )); // infer::Type::new(infer::MatcherType::Image, "", "svg",); } let kind = infer::get_from_path(path)?.ok_or(ServiceError::FromString( "File tyle is unknown!".to_string(), ))?; Ok(kind) } pub fn filesize_in_range( &self, file_size: u64, min_bytes: Option<u64>, max_bytes: Option<u64>, ) -> bool { if min_bytes.is_none() && max_bytes.is_none() { return true; } match (min_bytes, max_bytes) { (_, Some(max)) if file_size > max => false, (Some(min), _) if file_size < min => false, _ => true, } } pub async fn validate_file_size<P: AsRef<Path>>( &self, path: P, min_bytes: Option<usize>, max_bytes: Option<usize>, ) -> ServiceResult<()> { if min_bytes.is_none() && max_bytes.is_none() { return Ok(()); } let file_size = metadata(&path).await?.len() as usize; match (min_bytes, max_bytes) { (_, Some(max)) if file_size > max => Err(ServiceError::FileTooLarge(max)), (Some(min), _) if file_size < min => Err(ServiceError::FileTooSmall(min)), _ => Ok(()), } } pub async fn read_media_files( &self, paths: Vec<String>, max_bytes: Option<usize>, ) -> ServiceResult<Vec<(infer::Type, String)>> { let results = stream::iter(paths) .map(|path| async { self.read_media_file(Path::new(&path), max_bytes) .await .map_err(|e| (path, e)) }) .buffer_unordered(MAX_CONCURRENT_FILE_READ) // Process up to MAX_CONCURRENT_FILE_READ files concurrently .filter_map(|result| async move { result.ok() }) .collect::<Vec<_>>() .await; Ok(results) } pub async fn read_media_file( &self, file_path: &Path, max_bytes: Option<usize>, ) -> ServiceResult<(infer::Type, String)> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; self.validate_file_size(&valid_path, None, max_bytes) .await?; let kind = self.mime_from_path(&valid_path)?; let content = self.read_file_as_base64(&valid_path).await?; Ok((kind, content)) } // reads file as base64 efficiently in a streaming manner async fn read_file_as_base64(&self, file_path: &Path) -> ServiceResult<String> { let file = File::open(file_path).await?; let mut reader = BufReader::new(file); let mut output = Vec::new(); { // Wrap output Vec<u8> in a Base64 encoder writer let mut encoder = EncoderWriter::new(&mut output, &general_purpose::STANDARD); let mut buffer = [0u8; 8192]; loop { let n = reader.read(&mut buffer).await?; if n == 0 { break; } // Write raw bytes to the Base64 encoder encoder.write_all(&buffer[..n])?; } // Make sure to flush any remaining bytes encoder.flush()?; } // drop encoder before consuming output // Convert the Base64 bytes to String (safe UTF-8) let base64_string = String::from_utf8(output).map_err(|err| ServiceError::FromString(format!("{err}")))?; Ok(base64_string) } pub async fn read_text_file(&self, file_path: &Path) -> ServiceResult<String> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; let content = tokio::fs::read_to_string(valid_path).await?; Ok(content) } pub async fn create_directory(&self, file_path: &Path) -> ServiceResult<()> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; tokio::fs::create_dir_all(valid_path).await?; Ok(()) } pub async fn move_file(&self, src_path: &Path, dest_path: &Path) -> ServiceResult<()> { let allowed_directories = self.allowed_directories().await; let valid_src_path = self.validate_path(src_path, allowed_directories.clone())?; let valid_dest_path = self.validate_path(dest_path, allowed_directories)?; tokio::fs::rename(valid_src_path, valid_dest_path).await?; Ok(()) } pub async fn list_directory(&self, dir_path: &Path) -> ServiceResult<Vec<tokio::fs::DirEntry>> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(dir_path, allowed_directories)?; let mut dir = tokio::fs::read_dir(valid_path).await?; let mut entries = Vec::new(); // Use a loop to collect the directory entries while let Some(entry) = dir.next_entry().await? { entries.push(entry); } Ok(entries) } pub async fn write_file(&self, file_path: &Path, content: &String) -> ServiceResult<()> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; tokio::fs::write(valid_path, content).await?; Ok(()) } /// Searches for files in the directory tree starting at `root_path` that match the given `pattern`, /// excluding paths that match any of the `exclude_patterns`. /// /// # Arguments /// * `root_path` - The root directory to start the search from. /// * `pattern` - A glob pattern to match file names (case-insensitive). If no wildcards are provided, /// the pattern is wrapped in '*' for partial matching. /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive). /// /// # Returns /// A `ServiceResult` containing a vector of`walkdir::DirEntry` objects for matching files, /// or a `ServiceError` if an error occurs. pub async fn search_files( &self, root_path: &Path, pattern: String, exclude_patterns: Vec<String>, min_bytes: Option<u64>, max_bytes: Option<u64>, ) -> ServiceResult<Vec<walkdir::DirEntry>> { let result = self .search_files_iter(root_path, pattern, exclude_patterns, min_bytes, max_bytes) .await?; Ok(result.collect::<Vec<walkdir::DirEntry>>()) } /// Returns an iterator over files in the directory tree starting at `root_path` that match /// the given `pattern`, excluding paths that match any of the `exclude_patterns`. /// /// # Arguments /// * `root_path` - The root directory to start the search from. /// * `pattern` - A glob pattern to match file names. If no wildcards are provided, the pattern is wrapped in `**/*{pattern}*` for partial matching. /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive). /// /// # Returns /// A `ServiceResult` containing an iterator yielding `walkdir::DirEntry` objects for matching files, /// or a `ServiceError` if an error occurs. pub async fn search_files_iter<'a>( &'a self, // root_path: impl Into<PathBuf>, root_path: &'a Path, pattern: String, exclude_patterns: Vec<String>, min_bytes: Option<u64>, max_bytes: Option<u64>, ) -> ServiceResult<impl Iterator<Item = walkdir::DirEntry> + 'a> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(root_path, allowed_directories.clone())?; let updated_pattern = if pattern.contains('*') { pattern.to_lowercase() } else { format!("**/*{}*", &pattern.to_lowercase()) }; let glob_pattern = updated_pattern; let result = WalkDir::new(valid_path) .follow_links(true) .into_iter() .filter_entry(move |dir_entry| { let full_path = dir_entry.path(); // Validate each path before processing let validated_path = self .validate_path(full_path, allowed_directories.clone()) .ok(); if validated_path.is_none() { // Skip invalid paths during search return false; } // Get the relative path from the root_path let relative_path = full_path.strip_prefix(root_path).unwrap_or(full_path); let mut should_exclude = exclude_patterns.iter().any(|pattern| { let glob_pattern = if pattern.contains('*') { pattern.strip_prefix("/").unwrap_or(pattern).to_owned() } else { format!("*{pattern}*") }; glob_match(&glob_pattern, relative_path.to_str().unwrap_or("")) }); // enforce min/max bytes if !should_exclude && (min_bytes.is_none() || max_bytes.is_none()) { match dir_entry.metadata().ok() { Some(metadata) => { if !self.filesize_in_range(metadata.len(), min_bytes, max_bytes) { should_exclude = true; } } None => { should_exclude = true; } } } !should_exclude }) .filter_map(|v| v.ok()) .filter(move |entry| { if root_path == entry.path() { return false; } glob_match( &glob_pattern, &entry.file_name().to_str().unwrap_or("").to_lowercase(), ) }); Ok(result) } /// Generates a JSON representation of a directory tree starting at the given path. /// /// This function recursively builds a JSON array object representing the directory structure, /// where each entry includes a `name` (file or directory name), `type` ("file" or "directory"), /// and for directories, a `children` array containing their contents. Files do not have a /// `children` field. /// /// The function supports optional constraints to limit the tree size: /// - `max_depth`: Limits the depth of directory traversal. /// - `max_files`: Limits the total number of entries (files and directories). /// /// # IMPORTANT NOTE /// /// use max_depth or max_files could lead to partial or skewed representations of actual directory tree pub fn directory_tree<P: AsRef<Path>>( &self, root_path: P, max_depth: Option<usize>, max_files: Option<usize>, current_count: &mut usize, allowed_directories: Arc<Vec<PathBuf>>, ) -> ServiceResult<(Value, bool)> { let valid_path = self.validate_path(root_path.as_ref(), allowed_directories.clone())?; let metadata = fs::metadata(&valid_path)?; if !metadata.is_dir() { return Err(ServiceError::FromString( "Root path must be a directory".into(), )); } let mut children = Vec::new(); let mut reached_max_depth = false; if max_depth != Some(0) { for entry in WalkDir::new(valid_path) .min_depth(1) .max_depth(1) .follow_links(true) .into_iter() .filter_map(|e| e.ok()) { let child_path = entry.path(); let metadata = fs::metadata(child_path)?; let entry_name = child_path .file_name() .ok_or(ServiceError::FromString("Invalid path".to_string()))? .to_string_lossy() .into_owned(); // Increment the count for this entry *current_count += 1; // Check if we've exceeded max_files (if set) if let Some(max) = max_files { if *current_count > max { continue; // Skip this entry but continue processing others } } let mut json_entry = json!({ "name": entry_name, "type": if metadata.is_dir() { "directory" } else { "file" } }); if metadata.is_dir() { let next_depth = max_depth.map(|d| d - 1); let (child_children, child_reached_max_depth) = self.directory_tree( child_path, next_depth, max_files, current_count, allowed_directories.clone(), )?; json_entry .as_object_mut() .unwrap() .insert("children".to_string(), child_children); reached_max_depth |= child_reached_max_depth; } children.push(json_entry); } } else { // If max_depth is 0, we skip processing this directory's children reached_max_depth = true; } Ok((Value::Array(children), reached_max_depth)) } pub fn create_unified_diff( &self, original_content: &str, new_content: &str, filepath: Option<String>, ) -> String { // Ensure consistent line endings for diff let normalized_original = normalize_line_endings(original_content); let normalized_new = normalize_line_endings(new_content); // // Generate the diff using TextDiff let diff = TextDiff::from_lines(&normalized_original, &normalized_new); let file_name = filepath.unwrap_or("file".to_string()); // Format the diff as a unified diff let patch = diff .unified_diff() .header( format!("{file_name}\toriginal").as_str(), format!("{file_name}\tmodified").as_str(), ) .context_radius(4) .to_string(); format!("Index: {}\n{}\n{}", file_name, "=".repeat(68), patch) } pub async fn apply_file_edits( &self, file_path: &Path, edits: Vec<EditOperation>, dry_run: Option<bool>, save_to: Option<&Path>, ) -> ServiceResult<String> { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; // Read file content and normalize line endings let content_str = tokio::fs::read_to_string(&valid_path).await?; let original_line_ending = self.detect_line_ending(&content_str); let content_str = normalize_line_endings(&content_str); // Apply edits sequentially let mut modified_content = content_str.clone(); for edit in edits { let normalized_old = normalize_line_endings(&edit.old_text); let normalized_new = normalize_line_endings(&edit.new_text); // If exact match exists, use it if modified_content.contains(&normalized_old) { modified_content = modified_content.replacen(&normalized_old, &normalized_new, 1); continue; } // Otherwise, try line-by-line matching with flexibility for whitespace let old_lines: Vec<String> = normalized_old .trim_end() .split('\n') .map(|s| s.to_string()) .collect(); let content_lines: Vec<String> = modified_content .trim_end() .split('\n') .map(|s| s.to_string()) .collect(); let mut match_found = false; // skip when the match is impossible: if old_lines.len() > content_lines.len() { let error_message = format!( "Cannot apply edit: the original text spans more lines ({}) than the file content ({}).", old_lines.len(), content_lines.len() ); return Err(RpcError::internal_error() .with_message(error_message) .into()); } let max_start = content_lines.len().saturating_sub(old_lines.len()); for i in 0..=max_start { let potential_match = &content_lines[i..i + old_lines.len()]; // Compare lines with normalized whitespace let is_match = old_lines.iter().enumerate().all(|(j, old_line)| { let content_line = &potential_match[j]; old_line.trim() == content_line.trim() }); if is_match { // Preserve original indentation of first line let original_indent = content_lines[i] .chars() .take_while(|&c| c.is_whitespace()) .collect::<String>(); let new_lines: Vec<String> = normalized_new .split('\n') .enumerate() .map(|(j, line)| { // Keep indentation of the first line if j == 0 { return format!("{}{}", original_indent, line.trim_start()); } // For subsequent lines, preserve relative indentation and original whitespace type let old_indent = old_lines .get(j) .map(|line| { line.chars() .take_while(|&c| c.is_whitespace()) .collect::<String>() }) .unwrap_or_default(); let new_indent = line .chars() .take_while(|&c| c.is_whitespace()) .collect::<String>(); // Use the same whitespace character as original_indent (tabs or spaces) let indent_char = if original_indent.contains('\t') { "\t" } else { " " }; let relative_indent = if new_indent.len() >= old_indent.len() { new_indent.len() - old_indent.len() } else { 0 // Don't reduce indentation below original }; format!( "{}{}{}", &original_indent, &indent_char.repeat(relative_indent), line.trim_start() ) }) .collect(); let mut content_lines = content_lines.clone(); content_lines.splice(i..i + old_lines.len(), new_lines); modified_content = content_lines.join("\n"); match_found = true; break; } } if !match_found { return Err(RpcError::internal_error() .with_message(format!( "Could not find exact match for edit:\n{}", edit.old_text )) .into()); } } let diff = self.create_unified_diff( &content_str, &modified_content, Some(valid_path.display().to_string()), ); // Format diff with appropriate number of backticks let mut num_backticks = 3; while diff.contains(&"`".repeat(num_backticks)) { num_backticks += 1; } let formatted_diff = format!( "{}diff\n{}{}\n\n", "`".repeat(num_backticks), diff, "`".repeat(num_backticks) ); let is_dry_run = dry_run.unwrap_or(false); if !is_dry_run { let target = save_to.unwrap_or(valid_path.as_path()); let modified_content = modified_content.replace("\n", original_line_ending); tokio::fs::write(target, modified_content).await?; } Ok(formatted_diff) } pub fn escape_regex(&self, text: &str) -> String { // Covers special characters in regex engines (RE2, PCRE, JS, Python) const SPECIAL_CHARS: &[char] = &[ '.', '^', '$', '*', '+', '?', '(', ')', '[', ']', '{', '}', '\\', '|', '/', ]; let mut escaped = String::with_capacity(text.len()); for ch in text.chars() { if SPECIAL_CHARS.contains(&ch) { escaped.push('\\'); } escaped.push(ch); } escaped } // Searches the content of a file for occurrences of the given query string. /// /// This method searches the file specified by `file_path` for lines matching the `query`. /// The search can be performed as a regular expression or as a literal string, /// depending on the `is_regex` flag. /// /// If matched line is larger than 255 characters, a snippet will be extracted around the matched text. /// pub fn content_search( &self, query: &str, file_path: impl AsRef<Path>, is_regex: Option<bool>, ) -> ServiceResult<Option<FileSearchResult>> { let query = if is_regex.unwrap_or_default() { query.to_string() } else { self.escape_regex(query) }; let matcher = RegexMatcherBuilder::new() .case_insensitive(true) .build(query.as_str())?; let mut searcher = Searcher::new(); let mut result = FileSearchResult { file_path: file_path.as_ref().to_path_buf(), matches: vec![], }; searcher.set_binary_detection(BinaryDetection::quit(b'\x00')); searcher.search_path( &matcher, file_path, UTF8(|line_number, line| { let actual_match = matcher.find(line.as_bytes())?.unwrap(); result.matches.push(ContentMatchResult { line_number, start_pos: actual_match.start(), line_text: self.extract_snippet(line, actual_match, None, None), }); Ok(true) }), )?; if result.matches.is_empty() { return Ok(None); } Ok(Some(result)) } /// Extracts a snippet from a given line of text around a match. /// /// It extracts a substring starting a fixed number of characters (`SNIPPET_BACKWARD_CHARS`) /// before the start position of the `match`, and extends up to `max_length` characters /// If the snippet does not include the beginning or end of the original line, ellipses (`"..."`) are added /// to indicate the truncation. pub fn extract_snippet( &self, line: &str, match_result: Match, max_length: Option<usize>, backward_chars: Option<usize>, ) -> String { let max_length = max_length.unwrap_or(SNIPPET_MAX_LENGTH); let backward_chars = backward_chars.unwrap_or(SNIPPET_BACKWARD_CHARS); // Calculate the number of leading whitespace bytes to adjust for trimmed input let start_pos = line.len() - line.trim_start().len(); // Trim leading and trailing whitespace from the input line let line = line.trim(); // Calculate the desired start byte index by adjusting match start for trimming and backward chars // match_result.start() is the byte index in the original string // Subtract start_pos to account for trimmed whitespace and backward_chars to include context before the match let desired_start = (match_result.start() - start_pos).saturating_sub(backward_chars); // Find the nearest valid UTF-8 character boundary at or after desired_start // Prevents "byte index is not a char boundary" panic by ensuring the slice starts at a valid character (issue #37) let snippet_start = line .char_indices() .map(|(i, _)| i) .find(|&i| i >= desired_start) .unwrap_or(desired_start.min(line.len())); // Initialize a counter for tracking characters to respect max_length let mut char_count = 0; // Calculate the desired end byte index by counting max_length characters from snippet_start // Take max_length + 1 to find the boundary after the last desired character let desired_end = line[snippet_start..] .char_indices() .take(max_length + 1) .find(|&(_, _)| { char_count += 1; char_count > max_length }) .map(|(i, _)| snippet_start + i) .unwrap_or(line.len()); // Ensure snippet_end is a valid UTF-8 character boundary at or after desired_end // This prevents slicing issues with multi-byte characters let snippet_end = line .char_indices() .map(|(i, _)| i) .find(|&i| i >= desired_end) .unwrap_or(line.len()); // Cap snippet_end to avoid exceeding the string length let snippet_end = snippet_end.min(line.len()); // Extract the snippet from the trimmed line using the calculated byte indices let snippet = &line[snippet_start..snippet_end]; let mut result = String::new(); // Add leading ellipsis if the snippet doesn't start at the beginning of the trimmed line if snippet_start > 0 { result.push_str("..."); } result.push_str(snippet); // Add trailing ellipsis if the snippet doesn't reach the end of the trimmed line if snippet_end < line.len() { result.push_str("..."); } result } #[allow(clippy::too_many_arguments)] pub async fn search_files_content( &self, root_path: impl AsRef<Path>, pattern: &str, query: &str, is_regex: bool, exclude_patterns: Option<Vec<String>>, min_bytes: Option<u64>, max_bytes: Option<u64>, ) -> ServiceResult<Vec<FileSearchResult>> { let files_iter = self .search_files_iter( root_path.as_ref(), pattern.to_string(), exclude_patterns.to_owned().unwrap_or_default(), min_bytes, max_bytes, ) .await?; let results: Vec<FileSearchResult> = files_iter .filter_map(|entry| { self.content_search(query, entry.path(), Some(is_regex)) .ok() .and_then(|v| v) }) .collect(); Ok(results) } /// Reads the first n lines from a text file, preserving line endings. /// Args: /// file_path: Path to the file /// n: Number of lines to read /// Returns a String containing the first n lines with original line endings or an error if the path is invalid or file cannot be read. pub async fn head_file(&self, file_path: &Path, n: usize) -> ServiceResult<String> { // Validate file path against allowed directories let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; // Open file asynchronously and create a BufReader let file = File::open(&valid_path).await?; let mut reader = BufReader::new(file); let mut result = String::with_capacity(n * 100); // Estimate capacity (avg 100 bytes/line) let mut count = 0; // Read lines asynchronously, preserving line endings let mut line = Vec::new(); while count < n { line.clear(); let bytes_read = reader.read_until(b'\n', &mut line).await?; if bytes_read == 0 { break; // Reached EOF } result.push_str(&String::from_utf8_lossy(&line)); count += 1; } Ok(result) } /// Reads the last n lines from a text file, preserving line endings. /// Args: /// file_path: Path to the file /// n: Number of lines to read /// Returns a String containing the last n lines with original line endings or an error if the path is invalid or file cannot be read. pub async fn tail_file(&self, file_path: &Path, n: usize) -> ServiceResult<String> { // Validate file path against allowed directories let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; // Open file asynchronously let file = File::open(&valid_path).await?; let file_size = file.metadata().await?.len(); // If file is empty or n is 0, return empty string if file_size == 0 || n == 0 { return Ok(String::new()); } // Create a BufReader let mut reader = BufReader::new(file); let mut line_count = 0; let mut pos = file_size; let chunk_size = 8192; // 8KB chunks let mut buffer = vec![0u8; chunk_size]; let mut newline_positions = Vec::new(); // Read backwards to collect all newline positions while pos > 0 { let read_size = chunk_size.min(pos as usize); pos -= read_size as u64; reader.seek(SeekFrom::Start(pos)).await?; let read_bytes = reader.read_exact(&mut buffer[..read_size]).await?; // Process chunk in reverse to find newlines for (i, byte) in buffer[..read_bytes].iter().enumerate().rev() { if *byte == b'\n' { newline_positions.push(pos + i as u64); line_count += 1; } } } // Check if file ends with a non-newline character (partial last line) if file_size > 0 { let mut temp_reader = BufReader::new(File::open(&valid_path).await?); temp_reader.seek(SeekFrom::End(-1)).await?; let mut last_byte = [0u8; 1]; temp_reader.read_exact(&mut last_byte).await?; if last_byte[0] != b'\n' { line_count += 1; } } // Determine start position for reading the last n lines let start_pos = if line_count <= n { 0 // Read from start if fewer than n lines } else { *newline_positions.get(line_count - n).unwrap_or(&0) + 1 }; // Read forward from start_pos reader.seek(SeekFrom::Start(start_pos)).await?; let mut result = String::with_capacity(n * 100); // Estimate capacity let mut line = Vec::new(); let mut lines_read = 0; while lines_read < n { line.clear(); let bytes_read = reader.read_until(b'\n', &mut line).await?; if bytes_read == 0 { // Handle partial last line at EOF if !line.is_empty() { result.push_str(&String::from_utf8_lossy(&line)); } break; } result.push_str(&String::from_utf8_lossy(&line)); lines_read += 1; } Ok(result) } /// Reads lines from a text file starting at the specified offset (0-based), preserving line endings. /// Args: /// path: Path to the file /// offset: Number of lines to skip (0-based) /// limit: Optional maximum number of lines to read /// Returns a String containing the selected lines with original line endings or an error if the path is invalid or file cannot be read. pub async fn read_file_lines( &self, path: &Path, offset: usize, limit: Option<usize>, ) -> ServiceResult<String> { // Validate file path against allowed directories let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(path, allowed_directories)?; // Open file and get metadata before moving into BufReader let file = File::open(&valid_path).await?; let file_size = file.metadata().await?.len(); let mut reader = BufReader::new(file); // If file is empty or limit is 0, return empty string if file_size == 0 || limit == Some(0) { return Ok(String::new()); } // Skip offset lines (0-based indexing) let mut buffer = Vec::new(); for _ in 0..offset { buffer.clear(); if reader.read_until(b'\n', &mut buffer).await? == 0 { return Ok(String::new()); // EOF before offset } } // Read lines up to limit (or all remaining if limit is None) let mut result = String::with_capacity(limit.unwrap_or(100) * 100); // Estimate capacity match limit { Some(max_lines) => { for _ in 0..max_lines { buffer.clear(); let bytes_read = reader.read_until(b'\n', &mut buffer).await?; if bytes_read == 0 { break; // Reached EOF } result.push_str(&String::from_utf8_lossy(&buffer)); } } None => { loop { buffer.clear(); let bytes_read = reader.read_until(b'\n', &mut buffer).await?; if bytes_read == 0 { break; // Reached EOF } result.push_str(&String::from_utf8_lossy(&buffer)); } } } Ok(result) } /// Calculates the total size (in bytes) of all files within a directory tree. /// /// This function recursively searches the specified `root_path` for files, /// filters out directories and non-file entries, and sums the sizes of all found files. /// The size calculation is parallelized using Rayon for improved performance on large directories. /// /// # Arguments /// * `root_path` - The root directory path to start the size calculation. /// /// # Returns /// Returns a `ServiceResult<u64>` containing the total size in bytes of all files under the `root_path`. /// /// # Notes /// - Only files are included in the size calculation; directories and other non-file entries are ignored. /// - The search pattern is `"**/*"` (all files) and no exclusions are applied. /// - Parallel iteration is used to speed up the metadata fetching and summation. pub async fn calculate_directory_size(&self, root_path: &Path) -> ServiceResult<u64> { let entries = self .search_files_iter(root_path, "**/*".to_string(), vec![], None, None) .await? .filter(|e| e.file_type().is_file()); // Only process files // Use rayon to parallelize size summation let total_size: u64 = entries .par_bridge() // Convert to parallel iterator .filter_map(|entry| entry.metadata().ok().map(|meta| meta.len())) .sum(); Ok(total_size) } /// Recursively finds all empty directories within the given root path. /// /// A directory is considered empty if it contains no files in itself or any of its subdirectories /// except OS metadata files: `.DS_Store` (macOS) and `Thumbs.db` (Windows) /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). /// /// # Arguments /// - `root_path`: The starting directory to search. /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. /// Directories matching these patterns will be ignored. /// /// # Errors /// Returns an error if the root path is invalid or inaccessible. /// /// # Returns /// A list of paths to empty directories, as strings, including parent directories that contain only empty subdirectories. /// Recursively finds all empty directories within the given root path. /// /// A directory is considered empty if it contains no files in itself or any of its subdirectories. /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). /// /// # Arguments /// - `root_path`: The starting directory to search. /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. /// Directories matching these patterns will be ignored. /// /// # Errors /// Returns an error if the root path is invalid or inaccessible. /// /// # Returns /// A list of paths to all empty directories, as strings, including parent directories that contain only empty subdirectories. pub async fn find_empty_directories( &self, root_path: &Path, exclude_patterns: Option<Vec<String>>, ) -> ServiceResult<Vec<String>> { let walker = self .search_files_iter( root_path, "**/*".to_string(), exclude_patterns.unwrap_or_default(), None, None, ) .await? .filter(|e| e.file_type().is_dir()); // Only directories let mut empty_dirs = Vec::new(); // Check each directory for emptiness for entry in walker { let is_empty = WalkDir::new(entry.path()) .into_iter() .filter_map(|e| e.ok()) .all(|e| !e.file_type().is_file() || is_system_metadata_file(e.file_name())); // Directory is empty if no files are found in it or subdirs, ".DS_Store" will be ignores on Mac if is_empty { if let Some(path_str) = entry.path().to_str() { empty_dirs.push(path_str.to_string()); } } } Ok(empty_dirs) } /// Finds groups of duplicate files within the given root path. /// Returns a vector of vectors, where each inner vector contains paths to files with identical content. /// Files are considered duplicates if they have the same size and SHA-256 hash. pub async fn find_duplicate_files( &self, root_path: &Path, pattern: Option<String>, exclude_patterns: Option<Vec<String>>, min_bytes: Option<u64>, max_bytes: Option<u64>, ) -> ServiceResult<Vec<Vec<String>>> { // Validate root path against allowed directories let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(root_path, allowed_directories)?; // Get Tokio runtime handle let rt = tokio::runtime::Handle::current(); // Step 1: Collect files and group by size let mut size_map: HashMap<u64, Vec<String>> = HashMap::new(); let entries = self .search_files_iter( &valid_path, pattern.unwrap_or("**/*".to_string()), exclude_patterns.unwrap_or_default(), min_bytes, max_bytes, ) .await? .filter(|e| e.file_type().is_file()); // Only files for entry in entries { if let Ok(metadata) = entry.metadata() { if let Some(path_str) = entry.path().to_str() { size_map .entry(metadata.len()) .or_default() .push(path_str.to_string()); } } } // Filter out sizes with only one file (no duplicates possible) let size_groups: Vec<Vec<String>> = size_map .into_iter() .collect::<Vec<_>>() // Collect into Vec to enable parallel iteration .into_par_iter() .filter(|(_, paths)| paths.len() > 1) .map(|(_, paths)| paths) .collect(); // Step 2: Group by quick hash (first 4KB) let mut quick_hash_map: HashMap<Vec<u8>, Vec<String>> = HashMap::new(); for paths in size_groups.into_iter() { let quick_hashes: Vec<(String, Vec<u8>)> = paths .into_par_iter() .filter_map(|path| { let rt = rt.clone(); // Clone the runtime handle for this task rt.block_on(async { let file = File::open(&path).await.ok()?; let mut reader = tokio::io::BufReader::new(file); let mut buffer = vec![0u8; 4096]; // Read first 4KB let bytes_read = reader.read(&mut buffer).await.ok()?; let mut hasher = Sha256::new(); hasher.update(&buffer[..bytes_read]); Some((path, hasher.finalize().to_vec())) }) }) .collect(); for (path, hash) in quick_hashes { quick_hash_map.entry(hash).or_default().push(path); } } // Step 3: Group by full hash for groups with multiple files let mut full_hash_map: HashMap<Vec<u8>, Vec<String>> = HashMap::new(); let filtered_quick_hashes: Vec<(Vec<u8>, Vec<String>)> = quick_hash_map .into_iter() .collect::<Vec<_>>() .into_par_iter() .filter(|(_, paths)| paths.len() > 1) .collect(); for (_quick_hash, paths) in filtered_quick_hashes { let full_hashes: Vec<(String, Vec<u8>)> = paths .into_par_iter() .filter_map(|path| { let rt = rt.clone(); // Clone the runtime handle for this task rt.block_on(async { let file = File::open(&path).await.ok()?; let mut reader = tokio::io::BufReader::new(file); let mut hasher = Sha256::new(); let mut buffer = vec![0u8; 8192]; // 8KB chunks loop { let bytes_read = reader.read(&mut buffer).await.ok()?; if bytes_read == 0 { break; } hasher.update(&buffer[..bytes_read]); } Some((path, hasher.finalize().to_vec())) }) }) .collect(); for (path, hash) in full_hashes { full_hash_map.entry(hash).or_default().push(path); } } // Collect groups of duplicates (only groups with more than one file) let duplicates: Vec<Vec<String>> = full_hash_map .into_values() .filter(|group| group.len() > 1) .collect(); Ok(duplicates) } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rust-mcp-stack/rust-mcp-filesystem'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

fs_service.rs•59.1 KiB