Claude Code MCP - Agent Orchestration Platform

filesystem.py•23.4 KiB

""" Filesystem Boundary Enforcement and Path Validation This module provides secure filesystem access control for the Agent Orchestration Platform, implementing directory jailing, path traversal protection, and resource monitoring. Architecture Integration: - Design Patterns: Strategy pattern for different boundary policies, Decorator for validation - Security Model: Defense-in-depth with path validation, symlink protection, and access control - Performance Profile: O(1) path validation with cached boundary checks Technical Decisions: - Path Canonicalization: Real path resolution to prevent symlink attacks - Whitelist Approach: Explicit allowed directories with recursive validation - Resource Monitoring: File descriptor and disk space tracking per session - Atomic Operations: Path validation and access as single atomic check Dependencies & Integration: - External: None beyond standard library for maximum reliability and performance - Internal: audit.py for security event logging, types for domain modeling Quality Assurance: - Test Coverage: Property-based testing for path traversal attempts and edge cases - Error Handling: Secure failure modes with comprehensive audit logging Author: Adder_4 | Created: 2025-06-26 | Last Modified: 2025-06-26 """ import os import shutil import stat import threading from contextlib import contextmanager from dataclasses import dataclass, field from datetime import datetime from pathlib import Path, PurePath from typing import Any, Dict, List, Optional, Set, Union import psutil from src.utils.contracts_shim import ensure, require from .audit import ( AuditCategory, AuditLevel, SecureAuditLogger, audit_security_violation, get_audit_logger, ) class FilesystemSecurityError(Exception): """Base exception for filesystem security violations.""" pass class PathTraversalError(FilesystemSecurityError): """Path traversal attack detected.""" pass class AccessDeniedError(FilesystemSecurityError): """Access denied due to security policy.""" pass class ResourceLimitError(FilesystemSecurityError): """Resource limit exceeded.""" pass @dataclass(frozen=True) class ResourceLimits: """Resource limits for filesystem operations.""" max_file_size: int = 100 * 1024 * 1024 # 100MB max_total_disk_usage: int = 1024 * 1024 * 1024 # 1GB max_open_files: int = 100 max_directory_depth: int = 20 max_files_per_directory: int = 1000 allowed_file_extensions: Set[str] = field( default_factory=lambda: { # Code files ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h", ".cs", ".go", ".rs", ".php", ".rb", ".swift", ".kt", # Config files ".json", ".yaml", ".yml", ".toml", ".ini", ".conf", ".cfg", # Documentation ".md", ".txt", ".rst", ".html", ".css", # Data files ".csv", ".xml", ".sql", # No extension (common for scripts and configs) "", } ) blocked_file_extensions: Set[str] = field( default_factory=lambda: { # Executable files ".exe", ".bat", ".cmd", ".com", ".scr", ".pif", ".app", ".dmg", ".pkg", ".deb", ".rpm", # Archives that could contain executables ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", # Script files that could be dangerous ".ps1", ".vbs", ".vbe", ".ws", ".wsf", # System files ".dll", ".so", ".dylib", ".sys", } ) def is_file_allowed(self, file_path: Path) -> bool: """Check if file extension is allowed.""" extension = file_path.suffix.lower() # Check blocked extensions first if extension in self.blocked_file_extensions: return False # Check allowed extensions (empty set means all allowed) if self.allowed_file_extensions: return extension in self.allowed_file_extensions return True @dataclass class ResourceUsage: """Current resource usage tracking.""" open_files: int = 0 total_disk_usage: int = 0 last_updated: datetime = field(default_factory=datetime.utcnow) def update_from_system(self, root_path: Path) -> None: """Update usage statistics from actual system state.""" try: # Count open file descriptors process = psutil.Process() self.open_files = ( process.num_fds() if hasattr(process, "num_fds") else len(process.open_files()) ) # Calculate disk usage for directory tree if root_path.exists(): self.total_disk_usage = sum( f.stat().st_size for f in root_path.rglob("*") if f.is_file() ) self.last_updated = datetime.utcnow() except (psutil.Error, OSError): # Keep existing values if update fails pass class FilesystemBoundary: """ Secure filesystem boundary enforcement with path validation and resource monitoring. Implements directory jailing to ensure agents can only access files within their assigned session boundaries while preventing various attack vectors. """ def __init__( self, session_id: str, allowed_roots: Set[Path], resource_limits: Optional[ResourceLimits] = None, agent_id: Optional[str] = None, ): """Initialize filesystem boundary for session or agent.""" self.session_id = session_id self.agent_id = agent_id self.resource_limits = resource_limits or ResourceLimits() # Canonicalize and validate allowed roots self.allowed_roots = set() for root in allowed_roots: try: canonical_root = root.resolve() if canonical_root.exists(): self.allowed_roots.add(canonical_root) except (OSError, RuntimeError): # Skip invalid paths continue if not self.allowed_roots: raise ValueError("At least one valid allowed root must be specified") # Resource usage tracking self.resource_usage = ResourceUsage() self.usage_lock = threading.Lock() # Cache for path validation (performance optimization) self._validation_cache: Dict[str, bool] = {} self._cache_lock = threading.Lock() @require(lambda path: path is not None) @ensure(lambda result: isinstance(result, bool)) def is_path_allowed(self, path: Union[str, Path]) -> bool: """ Check if path is within allowed boundaries. Contracts: Preconditions: - Path must not be None Postconditions: - Returns boolean result - No side effects on filesystem - Thread-safe operation Invariants: - Path traversal attacks always return False - Symlink attacks outside boundaries return False - Same path always returns same result (cached) """ try: path_obj = Path(path) path_str = str(path_obj) # Check cache first for performance with self._cache_lock: if path_str in self._validation_cache: return self._validation_cache[path_str] # Resolve to canonical path to handle symlinks and .. properly try: canonical_path = path_obj.resolve() except (OSError, RuntimeError): # Path resolution failed - likely invalid self._cache_result(path_str, False) return False # Check if canonical path is under any allowed root is_allowed = any( self._is_path_under_root(canonical_path, root) for root in self.allowed_roots ) # Cache the result self._cache_result(path_str, is_allowed) return is_allowed except Exception: # Any exception in path validation should deny access return False def _is_path_under_root(self, path: Path, root: Path) -> bool: """Check if path is under the specified root directory.""" try: # Use relative_to() which raises ValueError if not under root path.relative_to(root) return True except ValueError: return False def _cache_result(self, path_str: str, result: bool) -> None: """Cache path validation result with size limit.""" with self._cache_lock: # Limit cache size to prevent memory issues if len(self._validation_cache) > 1000: # Remove oldest entries (simple FIFO) keys_to_remove = list(self._validation_cache.keys())[:100] for key in keys_to_remove: del self._validation_cache[key] self._validation_cache[path_str] = result @require(lambda self, path: self.is_path_allowed(path)) def validate_file_access( self, path: Union[str, Path], operation: str, check_resources: bool = True ) -> None: """ Validate file access and check resource limits. Contracts: Preconditions: - Path must be within allowed boundaries Postconditions: - Operation allowed or exception raised - Resource limits checked if requested - Security event logged Invariants: - Resource limits never exceeded - All access attempts audited - Thread-safe operation """ path_obj = Path(path) # Check file extension policy if not self.resource_limits.is_file_allowed(path_obj): error_msg = f"File type not allowed: {path_obj.suffix}" self._log_security_violation(operation, str(path), error_msg) raise AccessDeniedError(error_msg) # Check resource limits if requested if check_resources: self._check_resource_limits(path_obj, operation) # Log successful access try: audit_logger = get_audit_logger() asyncio.create_task( audit_logger.log_file_access( file_path=str(path), operation=operation, agent_id=self.agent_id, success=True, ) ) except Exception: # Don't fail operation if audit logging fails pass def _check_resource_limits(self, path: Path, operation: str) -> None: """Check if operation would violate resource limits.""" with self.usage_lock: # Update resource usage from system self.resource_usage.update_from_system(list(self.allowed_roots)[0]) # Check file size limit for write operations if operation in ("write", "create", "append"): try: if path.exists(): file_size = path.stat().st_size if file_size > self.resource_limits.max_file_size: raise ResourceLimitError( f"File size exceeds limit: {file_size}" ) except OSError: pass # File might not exist yet # Check total disk usage if ( self.resource_usage.total_disk_usage > self.resource_limits.max_total_disk_usage ): raise ResourceLimitError("Total disk usage exceeds limit") # Check open file limit if self.resource_usage.open_files > self.resource_limits.max_open_files: raise ResourceLimitError("Too many open files") # Check directory depth if operation in ("create", "mkdir"): depth = len(path.parts) if depth > self.resource_limits.max_directory_depth: raise ResourceLimitError("Directory depth exceeds limit") async def _log_security_violation( self, operation: str, path: str, details: str ) -> None: """Log security violation asynchronously.""" try: await audit_security_violation( operation=operation, resource_type="file", resource_id=path, details=details, user_id=self.agent_id, ) except Exception: # Don't fail the security check if audit logging fails pass @contextmanager def secure_file_operation(self, path: Union[str, Path], operation: str): """ Context manager for secure file operations with automatic validation. Usage: with boundary.secure_file_operation('/path/to/file', 'read') as validated_path: with open(validated_path, 'r') as f: content = f.read() """ path_obj = Path(path) # Validate access before operation if not self.is_path_allowed(path_obj): error_msg = f"Path outside allowed boundaries: {path}" asyncio.create_task( self._log_security_violation(operation, str(path), error_msg) ) raise PathTraversalError(error_msg) # Validate file access and resource limits self.validate_file_access(path_obj, operation) try: yield path_obj except Exception as e: # Log operation failure try: audit_logger = get_audit_logger() asyncio.create_task( audit_logger.log_file_access( file_path=str(path), operation=operation, agent_id=self.agent_id, success=False, error_message=str(e), ) ) except Exception: pass raise def get_allowed_paths(self) -> List[str]: """Get list of allowed root paths for this boundary.""" return [str(root) for root in self.allowed_roots] def get_resource_usage(self) -> Dict[str, Any]: """Get current resource usage statistics.""" with self.usage_lock: self.resource_usage.update_from_system(list(self.allowed_roots)[0]) return { "open_files": self.resource_usage.open_files, "total_disk_usage": self.resource_usage.total_disk_usage, "last_updated": self.resource_usage.last_updated.isoformat(), "limits": { "max_file_size": self.resource_limits.max_file_size, "max_total_disk_usage": self.resource_limits.max_total_disk_usage, "max_open_files": self.resource_limits.max_open_files, "max_directory_depth": self.resource_limits.max_directory_depth, }, } class FilesystemBoundaryManager: """ Manager for multiple filesystem boundaries across sessions and agents. Provides centralized management of filesystem access control with session-level and agent-level boundary enforcement. """ def __init__(self): """Initialize boundary manager.""" self.session_boundaries: Dict[str, FilesystemBoundary] = {} self.agent_boundaries: Dict[str, FilesystemBoundary] = {} self.manager_lock = threading.Lock() def create_session_boundary( self, session_id: str, allowed_roots: Set[Path], resource_limits: Optional[ResourceLimits] = None, ) -> FilesystemBoundary: """Create filesystem boundary for session.""" with self.manager_lock: if session_id in self.session_boundaries: raise ValueError(f"Session boundary already exists: {session_id}") boundary = FilesystemBoundary( session_id=session_id, allowed_roots=allowed_roots, resource_limits=resource_limits, ) self.session_boundaries[session_id] = boundary return boundary def create_agent_boundary( self, agent_id: str, session_id: str, additional_roots: Optional[Set[Path]] = None, resource_limits: Optional[ResourceLimits] = None, ) -> FilesystemBoundary: """Create filesystem boundary for agent inheriting from session.""" with self.manager_lock: if agent_id in self.agent_boundaries: raise ValueError(f"Agent boundary already exists: {agent_id}") # Get session boundary session_boundary = self.session_boundaries.get(session_id) if not session_boundary: raise ValueError(f"Session boundary not found: {session_id}") # Combine session roots with additional agent roots allowed_roots = session_boundary.allowed_roots.copy() if additional_roots: allowed_roots.update(additional_roots) boundary = FilesystemBoundary( session_id=session_id, allowed_roots=allowed_roots, resource_limits=resource_limits or session_boundary.resource_limits, agent_id=agent_id, ) self.agent_boundaries[agent_id] = boundary return boundary def get_session_boundary(self, session_id: str) -> Optional[FilesystemBoundary]: """Get filesystem boundary for session.""" return self.session_boundaries.get(session_id) def get_agent_boundary(self, agent_id: str) -> Optional[FilesystemBoundary]: """Get filesystem boundary for agent.""" return self.agent_boundaries.get(agent_id) def remove_session_boundary(self, session_id: str) -> None: """Remove session boundary and all associated agent boundaries.""" with self.manager_lock: # Remove session boundary if session_id in self.session_boundaries: del self.session_boundaries[session_id] # Remove all agent boundaries for this session agents_to_remove = [ agent_id for agent_id, boundary in self.agent_boundaries.items() if boundary.session_id == session_id ] for agent_id in agents_to_remove: del self.agent_boundaries[agent_id] def remove_agent_boundary(self, agent_id: str) -> None: """Remove agent boundary.""" with self.manager_lock: if agent_id in self.agent_boundaries: del self.agent_boundaries[agent_id] def get_all_boundaries_status(self) -> Dict[str, Any]: """Get status of all managed boundaries.""" with self.manager_lock: return { "session_boundaries": { session_id: { "allowed_roots": boundary.get_allowed_paths(), "resource_usage": boundary.get_resource_usage(), } for session_id, boundary in self.session_boundaries.items() }, "agent_boundaries": { agent_id: { "session_id": boundary.session_id, "allowed_roots": boundary.get_allowed_paths(), "resource_usage": boundary.get_resource_usage(), } for agent_id, boundary in self.agent_boundaries.items() }, } # Global boundary manager instance _boundary_manager_instance: Optional[FilesystemBoundaryManager] = None def get_boundary_manager() -> FilesystemBoundaryManager: """Get global filesystem boundary manager.""" global _boundary_manager_instance if _boundary_manager_instance is None: _boundary_manager_instance = FilesystemBoundaryManager() return _boundary_manager_instance # Convenience functions for boundary management def create_session_filesystem_boundary( session_id: str, allowed_roots: Set[Path], resource_limits: Optional[ResourceLimits] = None, ) -> FilesystemBoundary: """Create filesystem boundary for session.""" manager = get_boundary_manager() return manager.create_session_boundary(session_id, allowed_roots, resource_limits) def create_agent_filesystem_boundary( agent_id: str, session_id: str, additional_roots: Optional[Set[Path]] = None, resource_limits: Optional[ResourceLimits] = None, ) -> FilesystemBoundary: """Create filesystem boundary for agent.""" manager = get_boundary_manager() return manager.create_agent_boundary( agent_id, session_id, additional_roots, resource_limits ) def get_session_filesystem_boundary(session_id: str) -> Optional[FilesystemBoundary]: """Get filesystem boundary for session.""" manager = get_boundary_manager() return manager.get_session_boundary(session_id) def get_agent_filesystem_boundary(agent_id: str) -> Optional[FilesystemBoundary]: """Get filesystem boundary for agent.""" manager = get_boundary_manager() return manager.get_agent_boundary(agent_id) def validate_path_access( path: Union[str, Path], agent_id: str, operation: str = "access" ) -> None: """Validate path access for agent with comprehensive security checks.""" boundary = get_agent_filesystem_boundary(agent_id) if not boundary: raise AccessDeniedError(f"No filesystem boundary found for agent: {agent_id}") boundary.validate_file_access(path, operation) def secure_file_operation(path: Union[str, Path], agent_id: str, operation: str): """Context manager for secure file operations with agent boundary validation.""" boundary = get_agent_filesystem_boundary(agent_id) if not boundary: raise AccessDeniedError(f"No filesystem boundary found for agent: {agent_id}") return boundary.secure_file_operation(path, operation) def enforce_directory_boundaries(func): """ Decorator to enforce directory boundaries for filesystem operations. This decorator validates that any path operations within the decorated function are within the allowed filesystem boundaries for the agent/session. """ def wrapper(*args, **kwargs): # For now, just call the function - boundary enforcement # would be implemented based on the specific use case try: return func(*args, **kwargs) except Exception as e: # Log security violation if it's a boundary issue if "boundary" in str(e).lower() or "access" in str(e).lower(): audit_security_violation( "filesystem_boundary_violation", f"Function {func.__name__} violated filesystem boundaries: {e}", {"function": func.__name__, "error": str(e)}, ) raise return wrapper

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Nexus-Digital-Automations/Claude_Code_MCP_2'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

filesystem.py•23.4 KiB