find_large_files
Locate files exceeding a specified size within a directory on the MCP Filesystem Server. Define path, size, and preferences like recursive search, exclusions, and output format for efficient large file identification.
Instructions
Find files larger than the specified size.
Args:
path: Starting directory
min_size_mb: Minimum file size in megabytes
recursive: Whether to search subdirectories
max_results: Maximum number of results to return
exclude_patterns: Optional patterns to exclude
format: Output format ('text' or 'json')
ctx: MCP context
Returns:
Large file information
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| exclude_patterns | No | ||
| format | No | text | |
| max_results | No | ||
| min_size_mb | No | ||
| path | Yes | ||
| recursive | No |
Implementation Reference
- mcp_filesystem/server.py:616-665 (handler)MCP tool handler and registration for 'find_large_files'. This is the entry point for the tool, decorated with @mcp.tool(), which validates inputs implicitly via type hints, calls the core implementation, and formats output as text or JSON.@mcp.tool() async def find_large_files( path: str, ctx: Context, min_size_mb: float = 100, recursive: bool = True, max_results: int = 100, exclude_patterns: Optional[List[str]] = None, format: str = "text", ) -> str: """Find files larger than the specified size. Args: path: Starting directory min_size_mb: Minimum file size in megabytes recursive: Whether to search subdirectories max_results: Maximum number of results to return exclude_patterns: Optional patterns to exclude format: Output format ('text' or 'json') ctx: MCP context Returns: Large file information """ try: components = get_components() results = await components["advanced"].find_large_files( path, min_size_mb, recursive, max_results, exclude_patterns ) if format.lower() == "json": return json.dumps(results, indent=2) # Format as text if not results: return f"No files larger than {min_size_mb} MB found" lines = [] for file in results: size_mb = file["size"] / (1024 * 1024) lines.append(f"{file['path']} - {size_mb:.2f} MB") return ( f"Found {len(results)} files larger than {min_size_mb} MB:\n\n" + "\n".join(lines) ) except Exception as e: return f"Error finding large files: {str(e)}"
- mcp_filesystem/advanced.py:585-678 (helper)Core helper function implementing the file scanning logic for finding large files, including path validation, recursive search with exclusions, size checks, and result sorting.async def find_large_files( self, root_path: Union[str, Path], min_size_mb: float = 100, recursive: bool = True, max_results: int = 100, exclude_patterns: Optional[List[str]] = None, ) -> List[Dict]: """Find files larger than the specified size. Args: root_path: Starting directory min_size_mb: Minimum file size in megabytes recursive: Whether to search subdirectories max_results: Maximum number of results to return exclude_patterns: Optional patterns to exclude Returns: List of file information dictionaries for large files Raises: ValueError: If root_path is outside allowed directories """ min_size_bytes = int(min_size_mb * 1024 * 1024) abs_path, allowed = await self.validator.validate_path(root_path) if not allowed: raise ValueError(f"Path outside allowed directories: {root_path}") if not abs_path.is_dir(): raise ValueError(f"Not a directory: {root_path}") # Compile exclude patterns if provided exclude_regexes = [] if exclude_patterns: for exclude in exclude_patterns: try: exclude_regexes.append(re.compile(exclude)) except re.error: logger.warning(f"Invalid exclude pattern: {exclude}") # Find large files results: List[Dict[str, Any]] = [] async def scan_for_large_files(dir_path: Path) -> None: if len(results) >= max_results: return try: entries = await anyio.to_thread.run_sync(list, dir_path.iterdir()) for entry in entries: if len(results) >= max_results: return # Skip if matched by exclude pattern path_str = str(entry) excluded = False for exclude_re in exclude_regexes: if exclude_re.search(path_str): excluded = True break if excluded: continue try: if entry.is_file(): size = entry.stat().st_size if size >= min_size_bytes: info = FileInfo(entry) results.append(info.to_dict()) elif entry.is_dir() and recursive: # Check if this path is still allowed ( entry_abs, entry_allowed, ) = await self.validator.validate_path(entry) if entry_allowed: await scan_for_large_files(entry) except (PermissionError, FileNotFoundError): # Skip entries we can't access pass except (PermissionError, FileNotFoundError): # Skip directories we can't access pass await scan_for_large_files(abs_path) # Sort by size (largest first) return sorted(results, key=lambda x: x["size"], reverse=True)