Skip to main content
Glama

MCP Data Fetch Server

by undici77
security.py6.01 kB
""" Security validation and sanitization utilities with sandbox support. """ import re from pathlib import Path from urllib.parse import urlparse from bs4 import BeautifulSoup from config import BLOCKED_DOMAINS, BLOCKED_EXTENSIONS class SecurityValidator: """Comprehensive security validation for web fetching.""" # Patterns that might indicate injection attempts INJECTION_PATTERNS = [ r'<script[^>]*>.*?</script>', r'javascript:', r'on\w+\s*=', r'eval\s*\(', r'expression\s*\(', r'vbscript:', r'data:text/html', ] # Path traversal patterns PATH_TRAVERSAL = [ '..', '%2e%2e', '..%2f', '%2e%2e%2f', '..\\', '%5c%2e%2e', ] def __init__(self, working_dir: Path): """Initialize with sandboxed working directory.""" self.working_dir = working_dir.resolve() @staticmethod def is_safe_url(url: str) -> tuple[bool, str]: """Validate URL safety.""" try: if not url or not isinstance(url, str): return False, "Invalid URL format" # Check length if len(url) > 2048: return False, "URL too long" parsed = urlparse(url) # Check scheme if parsed.scheme not in ('http', 'https'): return False, f"Unsupported scheme: {parsed.scheme}" # Check hostname hostname = parsed.hostname if not hostname: return False, "Missing hostname" # Block local addresses for blocked in BLOCKED_DOMAINS: if hostname.startswith(blocked) or hostname == blocked.rstrip('.'): return False, "Access to localhost/private networks blocked" # Check for path traversal path = parsed.path.lower() for pattern in SecurityValidator.PATH_TRAVERSAL: if pattern in path or pattern in url.lower(): return False, "Path traversal attempt detected" # Check file extension for ext in BLOCKED_EXTENSIONS: if path.endswith(ext): return False, f"Blocked file type: {ext}" return True, "" except Exception as e: return False, f"URL validation error: {str(e)}" def sanitize_path(self, path: str) -> Path: """ Sanitize file path to prevent traversal and ensure it stays within working directory. Returns an absolute path within the working directory. """ # Remove any path traversal attempts path = path.replace('..', '').replace('~', '') # Get just the filename path = Path(path).name # Remove special characters except alphanumeric, dash, underscore, dot path = re.sub(r'[^a-zA-Z0-9._-]', '_', str(path)) return Path(path) def get_safe_path(self, relative_path: str, base_dir: Path = None) -> tuple[Path, bool]: """ Get a safe absolute path within the working directory. Args: relative_path: The requested path (can be relative or absolute) base_dir: Base directory to use (defaults to working_dir) Returns: Tuple of (safe_path, is_safe) where safe_path is the resolved path and is_safe indicates if the path is within the sandbox """ if base_dir is None: base_dir = self.working_dir # Sanitize the path sanitized = self.sanitize_path(relative_path) # Resolve to absolute path target_path = (base_dir / sanitized).resolve() # Check if path is within working directory (sandbox check) try: target_path.relative_to(self.working_dir) return target_path, True except ValueError: # Path escapes working directory return target_path, False def get_relative_path(self, absolute_path: Path) -> str: """ Get relative path from working directory. Useful for returning paths that other agents can use. """ try: return str(absolute_path.relative_to(self.working_dir)) except ValueError: # Path is outside working directory return str(absolute_path) @staticmethod def sanitize_html_content(html: str) -> str: """Remove potentially dangerous HTML content.""" soup = BeautifulSoup(html, 'html.parser') # Remove dangerous elements for tag in soup(['script', 'style', 'iframe', 'object', 'embed', 'applet', 'meta', 'link', 'base']): tag.decompose() # Remove event handlers for tag in soup.find_all(True): for attr in list(tag.attrs.keys()): if attr.startswith('on') or attr in ['formaction', 'action']: del tag.attrs[attr] return str(soup) @staticmethod def is_prompt_injection(text: str) -> bool: """Detect potential prompt injection attempts.""" if not text: return False text_lower = text.lower() # Suspicious patterns injection_indicators = [ 'ignore previous', 'ignore all previous', 'disregard previous', 'forget previous', 'new instructions', 'system prompt', 'you are now', 'your new role', 'sudo', 'admin mode', 'developer mode', ] for indicator in injection_indicators: if indicator in text_lower: return True return False

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/undici77/MCPDataFetchServer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server