Skip to main content
Glama
knishioka

Treasure Data MCP Server

by knishioka

td_list_project_files

Extract and list all files and directories from a Treasure Data project archive to view project contents.

Instructions

List all files contained in a project archive.

This tool extracts and lists the content of a previously downloaded
project archive, showing all files and directories within the project.

Args:
    archive_path: The path to the downloaded project archive (.tar.gz file)

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
archive_pathYes

Implementation Reference

  • The primary handler function for the 'td_list_project_files' tool. It validates the archive path, opens the tar.gz file, iterates through members with security checks, categorizes files by extension, and returns a structured list of files.
    @mcp.tool()
    async def td_list_project_files(archive_path: str) -> dict[str, Any]:
        """List all files contained in a project archive.
    
        This tool extracts and lists the content of a previously downloaded
        project archive, showing all files and directories within the project.
    
        Args:
            archive_path: The path to the downloaded project archive (.tar.gz file)
        """
        # Input validation - prevent path traversal
        if not _validate_archive_path(archive_path):
            return _format_error_response("Invalid archive path")
    
        try:
            if not os.path.exists(archive_path):
                return _format_error_response("Archive file not found")
    
            file_list = []
    
            with tarfile.open(archive_path, "r:gz") as tar:
                for member in tar.getmembers():
                    # Security check for each member
                    if not _safe_extract_member(member, "/tmp/validation"):
                        continue  # Skip unsafe members
    
                    file_info = {
                        "name": member.name,
                        "type": "directory" if member.isdir() else "file",
                        "size": member.size,
                    }
    
                    # Add extension information for files
                    if not member.isdir():
                        ext = Path(member.name).suffix.lower()
                        file_info["extension"] = ext
    
                        # Identify file types based on extension
                        if ext == ".dig":
                            file_info["file_type"] = "Digdag workflow"
                        elif ext == ".sql":
                            file_info["file_type"] = "SQL query"
                        elif ext == ".py":
                            file_info["file_type"] = "Python script"
                        elif ext in [".yml", ".yaml"]:
                            file_info["file_type"] = "YAML configuration"
                        else:
                            file_info["file_type"] = "Other"
    
                    file_list.append(file_info)
    
            # Sort files: directories first, then by name
            file_list.sort(key=lambda x: (0 if x["type"] == "directory" else 1, x["name"]))
    
            return {
                "success": True,
                "archive_path": archive_path,
                "file_count": len(file_list),
                "files": file_list,
            }
        except (OSError, tarfile.ReadError) as e:
            return _format_error_response(f"Failed to list project files: {str(e)}")
        except Exception as e:
            return _format_error_response(
                f"Unexpected error while listing project files: {str(e)}"
            )
  • Helper function to validate the archive_path input parameter, ensuring it points to a safe temporary .tar.gz file and prevents path traversal attacks.
    def _validate_archive_path(archive_path: str) -> bool:
        """Validate archive path to ensure it's in allowed temporary directories."""
        if not archive_path:
            return False
    
        # Normalize the path to prevent tricks
        normalized_path = os.path.normpath(archive_path)
    
        # Allow paths in temp directories or test paths
        temp_prefix = tempfile.gettempdir()
        allowed_prefixes = [temp_prefix, "/tmp"]
    
        if not any(normalized_path.startswith(prefix) for prefix in allowed_prefixes):
            return False
    
        # Prevent path traversal
        if ".." in normalized_path:
            return False
    
        if not archive_path.endswith(".tar.gz"):
            return False
        return True
  • Helper function to safely validate each tarfile member, preventing path traversal, absolute paths, and oversized files (zip bombs).
    def _safe_extract_member(member, extract_path: str) -> bool:
        """Safely extract a tar member, preventing path traversal and other attacks."""
        # Normalize the member name
        member_path = os.path.normpath(member.name)
    
        # Prevent absolute paths
        if member_path.startswith("/") or member_path.startswith("\\"):
            return False
    
        # Prevent path traversal
        if ".." in member_path:
            return False
    
        # Check final extracted path
        final_path = os.path.join(extract_path, member_path)
        if not final_path.startswith(extract_path):
            return False
    
        # Check file size (prevent zip bombs)
        if hasattr(member, "size") and member.size > MAX_FILE_SIZE:
            return False
    
        return True
  • Helper function used to format consistent error responses across tools.
    def _format_error_response(error_msg: str) -> dict[str, str]:
        """Format error response without exposing sensitive information."""
        return {"error": error_msg}

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/knishioka/td-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server