Deskaid

#!/usr/bin/env python3 import asyncio import logging import os import time from pathlib import Path from typing import Any, Dict, Optional from ..common import normalize_file_path __all__ = [ "glob_files", "glob", "render_result_for_assistant", ] # Define constants MAX_RESULTS = 100 async def glob( pattern: str, path: str, options: Optional[Dict[str, Any]] = None, signal=None, ) -> Dict[str, Any]: """Find files matching a glob pattern. Args: pattern: The glob pattern to match files against path: The directory to search in options: Optional parameters for pagination (limit, offset) signal: Optional abort signal to terminate the operation Returns: A dictionary with matched files and metadata """ if options is None: options = {} limit = options.get("limit", MAX_RESULTS) offset = options.get("offset", 0) # Normalize the directory path absolute_path = normalize_file_path(path) # In non-test environment, verify the path exists if not os.environ.get("DESKAID_TESTING"): # Check if path exists if not os.path.exists(absolute_path): raise FileNotFoundError(f"Path does not exist: {path}") # Check if it's a directory if not os.path.isdir(absolute_path): raise ValueError(f"Path is not a directory: {path}") # Create Path object for the directory path_obj = Path(absolute_path) try: # Use pathlib's glob functionality to find matching files if pattern.startswith("/"): # Treat as absolute path if it starts with / matches = list(Path("/").glob(pattern[1:])) else: # Use relative path otherwise matches = list(path_obj.glob(pattern)) # Filter out directories if they match the pattern matches = [match for match in matches if match.is_file()] # Sort matches by modification time (newest first) try: loop = asyncio.get_event_loop() # Get file stats asynchronously stats = [] for match in matches: stat = await loop.run_in_executor( None, lambda m=match: os.stat(m) if os.path.exists(m) else None ) stats.append(stat) matches_with_stats = list(zip(matches, stats, strict=False)) # In tests, sort by filename for deterministic results if os.environ.get("NODE_ENV") == "test": matches_with_stats.sort(key=lambda x: str(x[0])) else: # Sort by modification time (newest first), with filename as tiebreaker matches_with_stats.sort( key=lambda x: (-(x[1].st_mtime if x[1] else 0), str(x[0])) ) matches = [match for match, _ in matches_with_stats] except Exception as e: # Fall back to sorting by name if there's an error logging.debug( f"Error sorting by modification time, falling back to name sort: {e!s}", ) matches.sort(key=lambda x: str(x)) # Convert Path objects to strings file_paths = [str(match) for match in matches] # Apply pagination total_files = len(file_paths) if offset > 0: file_paths = file_paths[offset:] truncated = total_files > (offset + limit) # Limit the number of results file_paths = file_paths[:limit] return { "files": file_paths, "truncated": truncated, "total": total_files, } except Exception as e: logging.exception(f"Error executing glob: {e!s}") raise def render_result_for_assistant(output: Dict[str, Any]) -> str: """Render the results in a format suitable for the assistant. Args: output: The glob results dictionary Returns: A formatted string representation of the results """ filenames = output.get("filenames", []) num_files = output.get("numFiles", 0) if num_files == 0: return "No files found" result = os.linesep.join(filenames) # Only add truncation message if results were actually truncated if output.get("truncated", False): result += ( "\n(Results are truncated. Consider using a more specific path or pattern.)" ) return result async def glob_files( pattern: str, path: str | None = None, limit: int = MAX_RESULTS, offset: int = 0, chat_id: str | None = None, signal=None, ) -> Dict[str, Any]: """Search for files matching a glob pattern. Args: pattern: The glob pattern to match files against path: The directory to search in (defaults to current working directory) limit: Maximum number of results to return offset: Number of results to skip (for pagination) chat_id: The unique ID of the current chat session signal: Optional abort signal to terminate the operation Returns: A dictionary with execution stats and matched files """ start_time = time.time() # Use current working directory if path is not provided if path is None: path = os.getcwd() try: # Set up options for glob options = { "limit": limit, "offset": offset, } # Execute glob result = await glob(pattern, path, options, signal) # Calculate execution time execution_time = int( (time.time() - start_time) * 1000 ) # Convert to milliseconds # Get matching files files = result.get("files", []) # Prepare output output = { "filenames": files, "durationMs": execution_time, "numFiles": len(files), "truncated": result.get("truncated", False), } # Add formatted result for assistant output["resultForAssistant"] = render_result_for_assistant(output) return output except Exception as e: # Calculate execution time even on error execution_time = int((time.time() - start_time) * 1000) # Return empty results with error info error_output = { "filenames": [], "durationMs": execution_time, "numFiles": 0, "error": str(e), "resultForAssistant": f"Error: {e!s}", } return error_output