"""
Configuration management for the Spark Optimizer.
This module provides centralized configuration with environment variable support,
validation, and default values.
"""
import os
import logging
from typing import Optional
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
@dataclass
class OptimizerConfig:
"""
Configuration for the Spark Optimizer system.
All settings can be overridden via environment variables with SPARK_OPT_ prefix.
"""
# Spark History Server settings
history_server_url: str = field(default_factory=lambda: os.getenv("SPARK_OPT_HISTORY_URL", "http://localhost:18080"))
history_server_timeout: int = field(default_factory=lambda: int(os.getenv("SPARK_OPT_TIMEOUT", "30")))
# LLM settings
gemini_api_key: Optional[str] = field(default_factory=lambda: os.getenv("GEMINI_API_KEY"))
gemini_model: str = field(default_factory=lambda: os.getenv("SPARK_OPT_MODEL", "gemini-2.0-flash-exp"))
llm_max_retries: int = field(default_factory=lambda: int(os.getenv("SPARK_OPT_MAX_RETRIES", "5")))
llm_retry_delay: float = field(default_factory=lambda: float(os.getenv("SPARK_OPT_RETRY_DELAY", "5.0")))
# Analysis settings
max_stages_for_detail: int = field(default_factory=lambda: int(os.getenv("SPARK_OPT_MAX_STAGES", "5")))
enable_code_analysis: bool = field(default_factory=lambda: os.getenv("SPARK_OPT_CODE_ANALYSIS", "true").lower() == "true")
# Logging settings
log_level: str = field(default_factory=lambda: os.getenv("SPARK_OPT_LOG_LEVEL", "INFO"))
def __post_init__(self):
"""Validate configuration after initialization."""
if self.history_server_timeout <= 0:
raise ValueError("history_server_timeout must be positive")
if self.llm_max_retries < 0:
raise ValueError("llm_max_retries must be non-negative")
if self.max_stages_for_detail <= 0:
raise ValueError("max_stages_for_detail must be positive")
# Configure logging
logging.basicConfig(
level=getattr(logging, self.log_level.upper()),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger.info(f"Initialized OptimizerConfig: history_url={self.history_server_url}, model={self.gemini_model}")
# Global config instance
config = OptimizerConfig()