"""Logging configuration using loguru.
Provides structured logging with console and optional file output.
Includes PII/secrets sanitization to prevent credential leaks.
"""
import os
import re
import sys
from pathlib import Path
from loguru import logger
from mcp_task_aggregator.config import get_settings
# Patterns for sensitive data sanitization (order matters - more specific first)
_SANITIZE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
# URLs with credentials (user:pass@host) - must come before email pattern
(re.compile(r"(https?://)([^/:@\s]+):([^/@\s]+)@"), r"\1<USER>:<PASS>@"),
# Bearer tokens
(re.compile(r"(Bearer\s+)[\w\-\.]+", re.I), r"\1<REDACTED>"),
# API keys and tokens (common formats) - handles key=value and key: value
(re.compile(r"(api[_-]?key|token|secret|auth)\s*[=:]\s*['\"]?[\w\-\.]+['\"]?", re.I), r"\1=<REDACTED>"),
# Email addresses
(re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"), "<EMAIL>"),
# Jira/Atlassian cloud IDs (UUIDs)
(re.compile(r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}", re.I), "<UUID>"),
]
def _get_username() -> str:
"""Get current username for path sanitization."""
return os.environ.get("USER", os.environ.get("USERNAME", "user"))
def sanitize_message(message: str) -> str:
"""Sanitize sensitive data from log messages.
Args:
message: Raw log message.
Returns:
Sanitized message with PII/secrets redacted.
"""
# Sanitize home directory paths (contains username)
username = _get_username()
home = str(Path.home())
message = message.replace(home, "~")
message = message.replace(f"/Users/{username}", "~")
message = message.replace(f"/home/{username}", "~")
# Apply regex patterns
for pattern, replacement in _SANITIZE_PATTERNS:
message = pattern.sub(replacement, message)
return message
def _sanitizing_format(record: dict) -> str:
"""Format function that sanitizes the message before output."""
record["extra"]["sanitized_message"] = sanitize_message(record["message"])
return (
"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
"<level>{level: <8}</level> | "
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
"<level>{extra[sanitized_message]}</level>\n"
)
def _sanitizing_file_format(record: dict) -> str:
"""Format function for file output with sanitization."""
record["extra"]["sanitized_message"] = sanitize_message(record["message"])
return "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} | {extra[sanitized_message]}\n"
def setup_logging() -> None:
"""Configure loguru logging based on application settings.
All log output is sanitized to prevent PII/secrets from being logged.
"""
settings = get_settings()
# Remove default handler
logger.remove()
# Add console handler (stderr for MCP compatibility) with sanitization
logger.add(
sys.stderr,
format=_sanitizing_format,
level=settings.log_level,
colorize=True,
)
# Add file handler if configured with sanitization
if settings.log_file:
logger.add(
settings.log_file,
format=_sanitizing_file_format,
level=settings.log_level,
rotation="10 MB",
retention="7 days",
compression="gz",
)
logger.info(f"Logging initialized at level {settings.log_level}")
def get_logger(name: str) -> "logger":
"""Get a logger instance with the given name.
Args:
name: Logger name, typically __name__ from the calling module.
Returns:
Configured logger instance.
"""
return logger.bind(name=name)
def configure_logging_for_tests(log_level: str = "DEBUG", log_file: Path | None = None) -> None:
"""Configure logging for test environment.
Args:
log_level: Log level to use during tests.
log_file: Optional file path for test logs.
"""
logger.remove()
test_format = "{time:HH:mm:ss} | {level: <8} | {name}:{function}:{line} | {message}"
logger.add(
sys.stderr,
format=test_format,
level=log_level,
colorize=True,
)
if log_file:
logger.add(
log_file,
format=test_format,
level=log_level,
)