Deskaid

by ezyang
File Systems
Python
Apache 2.0
259
Reddit Discord
OverviewInspectSchema Related Servers Reviews Score
Need Help?View Source Code Report Issue
codemcp
tools
#!/usr/bin/env python3

import asyncio
import logging
import os
import time
from pathlib import Path
from typing import Any, Dict, Optional

from ..common import normalize_file_path

__all__ = [
    "glob_files",
    "glob",
    "render_result_for_assistant",
]

# Define constants
MAX_RESULTS = 100


async def glob(
    pattern: str,
    path: str,
    options: Optional[Dict[str, Any]] = None,
    signal=None,
) -> Dict[str, Any]:
    """Find files matching a glob pattern.

    Args:
        pattern: The glob pattern to match files against
        path: The directory to search in
        options: Optional parameters for pagination (limit, offset)
        signal: Optional abort signal to terminate the operation

    Returns:
        A dictionary with matched files and metadata
    """
    if options is None:
        options = {}

    limit = options.get("limit", MAX_RESULTS)
    offset = options.get("offset", 0)

    # Normalize the directory path
    absolute_path = normalize_file_path(path)

    # In non-test environment, verify the path exists
    if not os.environ.get("DESKAID_TESTING"):
        # Check if path exists
        if not os.path.exists(absolute_path):
            raise FileNotFoundError(f"Path does not exist: {path}")

        # Check if it's a directory
        if not os.path.isdir(absolute_path):
            raise ValueError(f"Path is not a directory: {path}")

    # Create Path object for the directory
    path_obj = Path(absolute_path)

    try:
        # Use pathlib's glob functionality to find matching files
        if pattern.startswith("/"):
            # Treat as absolute path if it starts with /
            matches = list(Path("/").glob(pattern[1:]))
        else:
            # Use relative path otherwise
            matches = list(path_obj.glob(pattern))

        # Filter out directories if they match the pattern
        matches = [match for match in matches if match.is_file()]

        # Sort matches by modification time (newest first)
        try:
            loop = asyncio.get_event_loop()

            # Get file stats asynchronously
            stats = []
            for match in matches:
                stat = await loop.run_in_executor(
                    None, lambda m=match: os.stat(m) if os.path.exists(m) else None
                )
                stats.append(stat)

            matches_with_stats = list(zip(matches, stats, strict=False))

            # In tests, sort by filename for deterministic results
            if os.environ.get("NODE_ENV") == "test":
                matches_with_stats.sort(key=lambda x: str(x[0]))
            else:
                # Sort by modification time (newest first), with filename as tiebreaker
                matches_with_stats.sort(
                    key=lambda x: (-(x[1].st_mtime if x[1] else 0), str(x[0]))
                )

            matches = [match for match, _ in matches_with_stats]
        except Exception as e:
            # Fall back to sorting by name if there's an error
            logging.debug(
                f"Error sorting by modification time, falling back to name sort: {e!s}",
            )
            matches.sort(key=lambda x: str(x))

        # Convert Path objects to strings
        file_paths = [str(match) for match in matches]

        # Apply pagination
        total_files = len(file_paths)
        if offset > 0:
            file_paths = file_paths[offset:]

        truncated = total_files > (offset + limit)

        # Limit the number of results
        file_paths = file_paths[:limit]

        return {
            "files": file_paths,
            "truncated": truncated,
            "total": total_files,
        }
    except Exception as e:
        logging.exception(f"Error executing glob: {e!s}")
        raise


def render_result_for_assistant(output: Dict[str, Any]) -> str:
    """Render the results in a format suitable for the assistant.

    Args:
        output: The glob results dictionary

    Returns:
        A formatted string representation of the results
    """
    filenames = output.get("filenames", [])
    num_files = output.get("numFiles", 0)

    if num_files == 0:
        return "No files found"

    result = os.linesep.join(filenames)

    # Only add truncation message if results were actually truncated
    if output.get("truncated", False):
        result += (
            "\n(Results are truncated. Consider using a more specific path or pattern.)"
        )

    return result


async def glob_files(
    pattern: str,
    path: str | None = None,
    limit: int = MAX_RESULTS,
    offset: int = 0,
    chat_id: str | None = None,
    signal=None,
) -> Dict[str, Any]:
    """Search for files matching a glob pattern.

    Args:
        pattern: The glob pattern to match files against
        path: The directory to search in (defaults to current working directory)
        limit: Maximum number of results to return
        offset: Number of results to skip (for pagination)
        chat_id: The unique ID of the current chat session
        signal: Optional abort signal to terminate the operation

    Returns:
        A dictionary with execution stats and matched files
    """
    start_time = time.time()

    # Use current working directory if path is not provided
    if path is None:
        path = os.getcwd()

    try:
        # Set up options for glob
        options = {
            "limit": limit,
            "offset": offset,
        }

        # Execute glob
        result = await glob(pattern, path, options, signal)

        # Calculate execution time
        execution_time = int(
            (time.time() - start_time) * 1000
        )  # Convert to milliseconds

        # Get matching files
        files = result.get("files", [])

        # Prepare output
        output = {
            "filenames": files,
            "durationMs": execution_time,
            "numFiles": len(files),
            "truncated": result.get("truncated", False),
        }

        # Add formatted result for assistant
        output["resultForAssistant"] = render_result_for_assistant(output)

        return output
    except Exception as e:
        # Calculate execution time even on error
        execution_time = int((time.time() - start_time) * 1000)

        # Return empty results with error info
        error_output = {
            "filenames": [],
            "durationMs": execution_time,
            "numFiles": 0,
            "error": str(e),
            "resultForAssistant": f"Error: {e!s}",
        }

        return error_output