Deskaid
by ezyang
- codemcp
- tools
#!/usr/bin/env python3
import asyncio
import logging
import os
import time
from pathlib import Path
from typing import Any, Dict, Optional
from ..common import normalize_file_path
__all__ = [
"glob_files",
"glob",
"render_result_for_assistant",
]
# Define constants
MAX_RESULTS = 100
async def glob(
pattern: str,
path: str,
options: Optional[Dict[str, Any]] = None,
signal=None,
) -> Dict[str, Any]:
"""Find files matching a glob pattern.
Args:
pattern: The glob pattern to match files against
path: The directory to search in
options: Optional parameters for pagination (limit, offset)
signal: Optional abort signal to terminate the operation
Returns:
A dictionary with matched files and metadata
"""
if options is None:
options = {}
limit = options.get("limit", MAX_RESULTS)
offset = options.get("offset", 0)
# Normalize the directory path
absolute_path = normalize_file_path(path)
# In non-test environment, verify the path exists
if not os.environ.get("DESKAID_TESTING"):
# Check if path exists
if not os.path.exists(absolute_path):
raise FileNotFoundError(f"Path does not exist: {path}")
# Check if it's a directory
if not os.path.isdir(absolute_path):
raise ValueError(f"Path is not a directory: {path}")
# Create Path object for the directory
path_obj = Path(absolute_path)
try:
# Use pathlib's glob functionality to find matching files
if pattern.startswith("/"):
# Treat as absolute path if it starts with /
matches = list(Path("/").glob(pattern[1:]))
else:
# Use relative path otherwise
matches = list(path_obj.glob(pattern))
# Filter out directories if they match the pattern
matches = [match for match in matches if match.is_file()]
# Sort matches by modification time (newest first)
try:
loop = asyncio.get_event_loop()
# Get file stats asynchronously
stats = []
for match in matches:
stat = await loop.run_in_executor(
None, lambda m=match: os.stat(m) if os.path.exists(m) else None
)
stats.append(stat)
matches_with_stats = list(zip(matches, stats, strict=False))
# In tests, sort by filename for deterministic results
if os.environ.get("NODE_ENV") == "test":
matches_with_stats.sort(key=lambda x: str(x[0]))
else:
# Sort by modification time (newest first), with filename as tiebreaker
matches_with_stats.sort(
key=lambda x: (-(x[1].st_mtime if x[1] else 0), str(x[0]))
)
matches = [match for match, _ in matches_with_stats]
except Exception as e:
# Fall back to sorting by name if there's an error
logging.debug(
f"Error sorting by modification time, falling back to name sort: {e!s}",
)
matches.sort(key=lambda x: str(x))
# Convert Path objects to strings
file_paths = [str(match) for match in matches]
# Apply pagination
total_files = len(file_paths)
if offset > 0:
file_paths = file_paths[offset:]
truncated = total_files > (offset + limit)
# Limit the number of results
file_paths = file_paths[:limit]
return {
"files": file_paths,
"truncated": truncated,
"total": total_files,
}
except Exception as e:
logging.exception(f"Error executing glob: {e!s}")
raise
def render_result_for_assistant(output: Dict[str, Any]) -> str:
"""Render the results in a format suitable for the assistant.
Args:
output: The glob results dictionary
Returns:
A formatted string representation of the results
"""
filenames = output.get("filenames", [])
num_files = output.get("numFiles", 0)
if num_files == 0:
return "No files found"
result = os.linesep.join(filenames)
# Only add truncation message if results were actually truncated
if output.get("truncated", False):
result += (
"\n(Results are truncated. Consider using a more specific path or pattern.)"
)
return result
async def glob_files(
pattern: str,
path: str | None = None,
limit: int = MAX_RESULTS,
offset: int = 0,
chat_id: str | None = None,
signal=None,
) -> Dict[str, Any]:
"""Search for files matching a glob pattern.
Args:
pattern: The glob pattern to match files against
path: The directory to search in (defaults to current working directory)
limit: Maximum number of results to return
offset: Number of results to skip (for pagination)
chat_id: The unique ID of the current chat session
signal: Optional abort signal to terminate the operation
Returns:
A dictionary with execution stats and matched files
"""
start_time = time.time()
# Use current working directory if path is not provided
if path is None:
path = os.getcwd()
try:
# Set up options for glob
options = {
"limit": limit,
"offset": offset,
}
# Execute glob
result = await glob(pattern, path, options, signal)
# Calculate execution time
execution_time = int(
(time.time() - start_time) * 1000
) # Convert to milliseconds
# Get matching files
files = result.get("files", [])
# Prepare output
output = {
"filenames": files,
"durationMs": execution_time,
"numFiles": len(files),
"truncated": result.get("truncated", False),
}
# Add formatted result for assistant
output["resultForAssistant"] = render_result_for_assistant(output)
return output
except Exception as e:
# Calculate execution time even on error
execution_time = int((time.time() - start_time) * 1000)
# Return empty results with error info
error_output = {
"filenames": [],
"durationMs": execution_time,
"numFiles": 0,
"error": str(e),
"resultForAssistant": f"Error: {e!s}",
}
return error_output