"""Security and safety utilities for filesystem operations."""
from pathlib import Path
from typing import Union
from .exceptions import (
PathSecurityError,
FileSizeLimitError,
DepthLimitError,
)
def normalize_path(path: Union[str, Path]) -> Path:
"""
Normalize a path by resolving . and .. components.
Args:
path: Path to normalize
Returns:
Normalized Path object
"""
if isinstance(path, str):
path = Path(path)
# Convert to absolute path and resolve symlinks
return path.resolve()
def validate_path(path: Union[str, Path], root: Union[str, Path]) -> Path:
"""
Validate that a path is within the sandbox root directory.
This function ensures:
1. The path is within the root directory
2. No path traversal attempts (../)
3. Symlinks don't escape the sandbox
Args:
path: Path to validate (relative or absolute)
root: Root directory to validate against
Returns:
Validated absolute Path object
Raises:
PathSecurityError: If path is outside root or invalid
"""
# Normalize both paths
root_path = normalize_path(root)
# If path is relative, join with root
if isinstance(path, str):
path = Path(path)
if not path.is_absolute():
full_path = root_path / path
else:
full_path = Path(path)
# Resolve the full path
try:
resolved_path = full_path.resolve()
except (OSError, RuntimeError) as e:
raise PathSecurityError(f"Invalid path: {e}")
# Check if resolved path is within root
try:
# This will raise ValueError if resolved_path is not relative to root_path
resolved_path.relative_to(root_path)
except ValueError:
raise PathSecurityError(f"Path '{path}' is outside the sandbox root directory")
return resolved_path
def check_file_size(path: Path, max_size_bytes: int) -> None:
"""
Check if a file size is within the allowed limit.
Args:
path: Path to the file to check
max_size_bytes: Maximum allowed file size in bytes
Raises:
FileSizeLimitError: If file exceeds maximum size
FileNotFoundError: If file doesn't exist
"""
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")
if not path.is_file():
raise ValueError(f"Not a file: {path}")
file_size = path.stat().st_size
if file_size > max_size_bytes:
max_mb = max_size_bytes / (1024 * 1024)
actual_mb = file_size / (1024 * 1024)
raise FileSizeLimitError(
f"File size ({actual_mb:.2f} MB) exceeds maximum allowed size ({max_mb:.2f} MB)"
)
def check_depth(current_depth: int, max_depth: int) -> None:
"""
Check if search depth is within the allowed limit.
Args:
current_depth: Current recursion depth
max_depth: Maximum allowed depth
Raises:
DepthLimitError: If current depth exceeds maximum
"""
if current_depth > max_depth:
raise DepthLimitError(
f"Search depth ({current_depth}) exceeds maximum allowed depth ({max_depth})"
)
def is_binary_file(data: bytes, sample_size: int = 8192) -> bool:
"""
Detect if file content is binary or text.
Uses a simple heuristic: if the data contains null bytes or
a high proportion of non-printable characters, it's binary.
Args:
data: File content to check
sample_size: Number of bytes to sample (default: 8192)
Returns:
True if file appears to be binary, False if text
"""
# Check only the first sample_size bytes
sample = data[:sample_size]
# Empty files are considered text
if not sample:
return False
# Files with null bytes are binary
if b"\x00" in sample:
return True
# Count non-text bytes
non_text_chars = 0
for byte in sample:
# Check if byte is non-printable and not a common text control char
# Common text control chars: \t (9), \n (10), \r (13)
if byte < 32 and byte not in (9, 10, 13):
non_text_chars += 1
elif byte >= 127:
non_text_chars += 1
# If more than 30% non-text characters, consider it binary
threshold = len(sample) * 0.3
return non_text_chars > threshold
def get_relative_path(absolute_path: Path, root: Path) -> str:
"""
Get the relative path from root directory.
Args:
absolute_path: Absolute path to convert
root: Root directory
Returns:
String representation of relative path
"""
try:
return str(absolute_path.relative_to(root))
except ValueError:
# If path is not relative to root, return the name
return absolute_path.name