"""Validation utilities for Bug Bounty Hunter MCP"""
import re
import validators as val
from urllib.parse import urlparse
from typing import Optional, Dict, List
import subprocess
import shutil
def validate_url(url: str) -> bool:
"""
Validate URL format
Args:
url: URL to validate
Returns:
True if valid, False otherwise
"""
return val.url(url) is True
def validate_domain(domain: str) -> bool:
"""
Validate domain format
Args:
domain: Domain to validate
Returns:
True if valid, False otherwise
"""
return val.domain(domain) is True
def validate_ip(ip: str) -> bool:
"""
Validate IP address (IPv4 or IPv6)
Args:
ip: IP address to validate
Returns:
True if valid, False otherwise
"""
return val.ipv4(ip) is True or val.ipv6(ip) is True
def validate_email(email: str) -> bool:
"""
Validate email format
Args:
email: Email to validate
Returns:
True if valid, False otherwise
"""
return val.email(email) is True
def is_in_scope(target: str, scope: List[str]) -> bool:
"""
Check if target is in scope
Args:
target: Target domain/URL
scope: List of in-scope domains
Returns:
True if in scope, False otherwise
"""
if not scope:
return True
domain = extract_domain(target)
for scope_item in scope:
if domain == scope_item or domain.endswith(f".{scope_item}"):
return True
return False
def extract_domain(url_or_domain: str) -> str:
"""
Extract domain from URL or return domain if already a domain
Args:
url_or_domain: URL or domain string
Returns:
Extracted domain
"""
if url_or_domain.startswith(('http://', 'https://')):
parsed = urlparse(url_or_domain)
return parsed.netloc
return url_or_domain
async def validate_tool_installation() -> Dict[str, bool]:
"""
Validate that required security tools are installed
Returns:
Dictionary with tool names and installation status
"""
tools = {
# Subdomain enumeration
'subfinder': 'subfinder',
'amass': 'amass',
'assetfinder': 'assetfinder',
# HTTP probing
'httpx': 'httpx',
'httprobe': 'httprobe',
# Port scanning
'nmap': 'nmap',
'masscan': 'masscan',
'naabu': 'naabu',
# Web crawling
'gospider': 'gospider',
'katana': 'katana',
'hakrawler': 'hakrawler',
# Fuzzing
'ffuf': 'ffuf',
'gobuster': 'gobuster',
'wfuzz': 'wfuzz',
'feroxbuster': 'feroxbuster',
# Vulnerability scanning
'nuclei': 'nuclei',
'nikto': 'nikto',
'sqlmap': 'sqlmap',
'dalfox': 'dalfox',
# Content discovery
'waybackurls': 'waybackurls',
'gau': 'gau',
'unfurl': 'unfurl',
# DNS
'dnsx': 'dnsx',
'dnsgen': 'dnsgen',
# Parameter discovery
'arjun': 'arjun',
'paramspider': 'paramspider',
# JWT
'jwt_tool': 'jwt_tool',
# Cloud
's3scanner': 's3scanner',
# SSL/TLS
'testssl': 'testssl.sh',
# Git
'git-dumper': 'git-dumper',
# Other
'whatweb': 'whatweb',
'wpscan': 'wpscan',
'joomscan': 'joomscan',
}
results = {}
for tool_name, command in tools.items():
results[tool_name] = check_tool_installed(command)
return {
"installed": [k for k, v in results.items() if v],
"missing": [k for k, v in results.items() if not v],
"details": results
}
def check_tool_installed(tool: str) -> bool:
"""
Check if a tool is installed
Args:
tool: Tool command name
Returns:
True if installed, False otherwise
"""
return shutil.which(tool) is not None
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename for safe file system operations
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Remove or replace invalid characters
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
# Remove leading/trailing spaces and dots
filename = filename.strip('. ')
# Limit length
if len(filename) > 255:
filename = filename[:255]
return filename
def is_valid_jwt(token: str) -> bool:
"""
Check if string looks like a JWT token
Args:
token: Token string
Returns:
True if valid JWT format, False otherwise
"""
parts = token.split('.')
if len(parts) != 3:
return False
# Basic check for base64 characters
for part in parts:
if not re.match(r'^[A-Za-z0-9_-]+$', part):
return False
return True
def extract_urls_from_text(text: str) -> List[str]:
"""
Extract URLs from text
Args:
text: Text to search
Returns:
List of extracted URLs
"""
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
urls = re.findall(url_pattern, text)
return list(set(urls))
def extract_subdomains_from_text(text: str, base_domain: str) -> List[str]:
"""
Extract subdomains from text
Args:
text: Text to search
base_domain: Base domain to search for
Returns:
List of extracted subdomains
"""
# Pattern to match subdomains
pattern = r'(?:[a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+' + re.escape(base_domain)
subdomains = re.findall(pattern, text)
return list(set(subdomains))
def is_http_url(url: str) -> bool:
"""
Check if URL is HTTP/HTTPS
Args:
url: URL to check
Returns:
True if HTTP/HTTPS, False otherwise
"""
return url.startswith(('http://', 'https://'))
def parse_http_status(status_code: int) -> str:
"""
Get HTTP status code category
Args:
status_code: HTTP status code
Returns:
Category string (informational, success, redirection, client_error, server_error)
"""
if 100 <= status_code < 200:
return "informational"
elif 200 <= status_code < 300:
return "success"
elif 300 <= status_code < 400:
return "redirection"
elif 400 <= status_code < 500:
return "client_error"
elif 500 <= status_code < 600:
return "server_error"
else:
return "unknown"
def validate_scope_format(scope: str) -> bool:
"""
Validate scope format (domain, CIDR, or wildcard)
Args:
scope: Scope string
Returns:
True if valid, False otherwise
"""
# Check if it's a valid domain
if validate_domain(scope):
return True
# Check if it's a wildcard domain
if scope.startswith('*.') and validate_domain(scope[2:]):
return True
# Check if it's a CIDR notation
cidr_pattern = r'^(\d{1,3}\.){3}\d{1,3}/\d{1,2}$'
if re.match(cidr_pattern, scope):
return True
return False