"""Data module - Mock benchmark data and helpers."""
import logging
logger = logging.getLogger(__name__)
# ============================================================================
# MOCK BENCHMARK DATA
# ============================================================================
MOCK_BENCHMARKS = {
"722511": {
"name": "Full-Service Restaurants",
"benchmarks": {
"California": {"revenue": 520000, "profit": 52000, "margin": 10.0},
"Texas": {"revenue": 480000, "profit": 48000, "margin": 10.0},
"New York": {"revenue": 600000, "profit": 54000, "margin": 9.0},
"Florida": {"revenue": 450000, "profit": 45000, "margin": 10.0},
"National": {"revenue": 500000, "profit": 50000, "margin": 10.0},
}
},
"441110": {
"name": "Retail - Auto Dealers",
"benchmarks": {
"California": {"revenue": 2500000, "profit": 125000, "margin": 5.0},
"Texas": {"revenue": 2200000, "profit": 110000, "margin": 5.0},
"New York": {"revenue": 2800000, "profit": 126000, "margin": 4.5},
"Florida": {"revenue": 2100000, "profit": 105000, "margin": 5.0},
"National": {"revenue": 2400000, "profit": 120000, "margin": 5.0},
}
},
"621111": {
"name": "Healthcare - Physicians",
"benchmarks": {
"California": {"revenue": 1200000, "profit": 180000, "margin": 15.0},
"Texas": {"revenue": 1000000, "profit": 150000, "margin": 15.0},
"New York": {"revenue": 1400000, "profit": 196000, "margin": 14.0},
"Florida": {"revenue": 950000, "profit": 142500, "margin": 15.0},
"National": {"revenue": 1100000, "profit": 165000, "margin": 15.0},
}
},
"236220": {
"name": "Commercial Construction",
"benchmarks": {
"California": {"revenue": 3500000, "profit": 280000, "margin": 8.0},
"Texas": {"revenue": 3000000, "profit": 240000, "margin": 8.0},
"New York": {"revenue": 4000000, "profit": 280000, "margin": 7.0},
"Florida": {"revenue": 2800000, "profit": 224000, "margin": 8.0},
"National": {"revenue": 3200000, "profit": 256000, "margin": 8.0},
}
},
}
INDUSTRY_TO_NAICS = {
"restaurant": "722511", "restaurants": "722511", "food": "722511", "dining": "722511",
"retail": "441110", "auto": "441110",
"healthcare": "621111", "medical": "621111", "physician": "621111",
"construction": "236220", "building": "236220",
}
STATE_ABBREV = {
"CA": "California",
"TX": "Texas",
"NY": "New York",
"FL": "Florida",
}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def get_naics_code(industry: str) -> str:
"""Convert industry name to NAICS code."""
return INDUSTRY_TO_NAICS.get(industry.lower().strip(), "722511")
def normalize_region(region: str) -> str:
"""Normalize region/state to full name."""
return STATE_ABBREV.get(region.upper().strip(), region.title())
def fetch_benchmark_data(
naics_code: str,
region: str,
metric_type: str,
) -> tuple[float, float, dict]:
"""
Fetch benchmark data for given industry and region.
Returns:
tuple: (regional_average, national_average, full_data)
"""
industry_data = MOCK_BENCHMARKS.get(naics_code, MOCK_BENCHMARKS["722511"])
region_data = industry_data["benchmarks"].get(region, industry_data["benchmarks"]["National"])
national_data = industry_data["benchmarks"]["National"]
metric_key = (
"profit" if "profit" in metric_type.lower()
else "margin" if "margin" in metric_type.lower()
else "revenue"
)
regional_value = region_data[metric_key]
national_value = national_data[metric_key]
return regional_value, national_value, {
"industry_name": industry_data["name"],
"naics_code": naics_code,
"region": region,
"metric": metric_key,
"regional_data": region_data,
"national_data": national_data,
}
def prepare_widget_content(
industry_name: str,
region: str,
metric: str,
user_value: float,
regional_average: float,
national_average: float,
time_period: str,
) -> dict:
"""
Prepare structured content for the widget.
Compatible with both Claude and ChatGPT.
"""
vs_regional = ((user_value - regional_average) / regional_average) * 100
vs_national = ((user_value - national_average) / national_average) * 100
return {
# Core data (used by widget)
"industry_name": industry_name,
"region": region,
"metric": metric,
"user_value": user_value,
"regional_average": regional_average,
"national_average": national_average,
"time_period": time_period,
# Computed fields (for ChatGPT compatibility)
"vsRegionalPercent": vs_regional,
"vsNationalPercent": vs_national,
"isAboveRegional": vs_regional > 0,
"isAboveNational": vs_national > 0,
}
logger.info("✓ Data module loaded")