import re
from pathlib import Path
from typing import List, Tuple, Optional
class SensitiveDataScanner:
"""Scans text for potential sensitive data leaks."""
PATTERNS = {
"AWS Access Key": r"AKIA[0-9A-Z]{16}",
"AWS Secret Key": r"(?i)aws_secret_access_key\s*=\s*[a-zA-Z0-9/+]{40}",
"Generic API Key": r"(?i)(api_key|apikey|secret|token)\s*[:=]\s*['\"][a-zA-Z0-9_\-]{16,}['\"]",
"Private Key": r"-----BEGIN [A-Z ]+ PRIVATE KEY-----",
"Slack Token": r"xox[baprs]-([0-9a-zA-Z]{10,48})?",
"GitHub Token": r"(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36}",
"Stripe Key": r"sk_live_[0-9a-zA-Z_\-]+",
}
def __init__(self, patterns: Optional[dict] = None):
self.patterns = patterns if patterns else self.PATTERNS
def scan(self, text: str) -> List[Tuple[str, str]]:
"""
Scan text for sensitive data.
Returns:
List of tuples (pattern_name, match_snippet)
"""
findings = []
if not text:
return findings
for name, pattern in self.patterns.items():
matches = re.finditer(pattern, text)
for match in matches:
# Obfuscate the match in the report
match_str = match.group(0)
redacted = match_str[:4] + "..." + match_str[-4:] if len(match_str) > 8 else "***"
findings.append((name, redacted))
return findings
def sanitize(self, text: str) -> str:
"""
Return text with sensitive data redacted.
"""
if not text:
return text
sanitized = text
for pattern in self.patterns.values():
sanitized = re.sub(pattern, "[REDACTED]", sanitized)
return sanitized
def validate_path_safety(path: str, project_root: str) -> bool:
"""
Validate that a path is safe and within the project root.
This function checks if a given path is within the specified project root
to prevent directory traversal attacks and unauthorized file access.
Args:
path: The path to validate (can be relative or absolute)
project_root: The project root directory path
Returns:
True if the path is safe (within project root), False otherwise
Example:
>>> validate_path_safety("/project/src/file.py", "/project")
True
>>> validate_path_safety("/project/../etc/passwd", "/project")
False
"""
try:
# Resolve both paths to absolute, normalized paths
resolved_path = Path(path).resolve()
resolved_root = Path(project_root).resolve()
# Check if the resolved path is relative to the project root
# This will raise ValueError if resolved_path is not relative to resolved_root
resolved_path.relative_to(resolved_root)
return True
except (ValueError, RuntimeError, OSError):
# ValueError: path is not relative to project_root
# RuntimeError: infinite loop in resolution (symlinks)
# OSError: path doesn't exist or permission issues
return False