"""
Authoritative Material Database Implementation
Loads material properties from multiple canonical sources:
1. USNavalResearchLaboratory - Galvanic series (MIL-HDBK-1004/6)
2. KittyCAD - Mechanical properties (density, strength)
3. Calculated properties - PREN, CPT estimates
Based on Codex review recommendations to replace hard-coded data.
"""
from typing import Dict, Any, Optional
import pandas as pd
import requests
import requests_cache
import logging
from pathlib import Path
from core.interfaces import MaterialDatabase
logger = logging.getLogger(__name__)
# Install HTTP cache per Codex recommendation
# Reduces repeated external calls on server restart
_CACHE_DIR = Path(__file__).parent.parent / ".cache"
_CACHE_DIR.mkdir(exist_ok=True)
requests_cache.install_cache(
str(_CACHE_DIR / "http_cache"),
backend="sqlite",
expire_after=86400, # 24 hours
)
class AuthoritativeMaterialDatabase(MaterialDatabase):
"""
Material database loading from authoritative GitHub sources.
Sources:
- USNavalResearchLaboratory/corrosion-modeling-applications (MIT)
- KittyCAD/material-properties (Apache-2.0)
- Calculated: PREN, CPT, composition-based properties
"""
# Data source URLs
USNRL_GALVANIC_URL = (
"https://raw.githubusercontent.com/USNavalResearchLaboratory/"
"corrosion-modeling-applications/master/cma/SeawaterPotentialData.xml"
)
KITTYCAD_STAINLESS_URL = (
"https://raw.githubusercontent.com/KittyCAD/material-properties/"
"main/materials/stainlesssteel.json"
)
KITTYCAD_NICKEL_URL = (
"https://raw.githubusercontent.com/KittyCAD/material-properties/"
"main/materials/nickelalloys.json"
)
def __init__(self, use_cache: bool = True):
"""
Initialize material database.
Args:
use_cache: Whether to cache loaded data (default: True)
"""
self._cache = {} if use_cache else None
self._galvanic_data: Optional[pd.DataFrame] = None
self._kittycad_stainless: Optional[Dict] = None
self._kittycad_nickel: Optional[Dict] = None
self._loaded = False
def _lazy_load(self):
"""Lazy load data on first access"""
if self._loaded:
return
logger.info("Loading authoritative material data from GitHub sources...")
try:
# Load USNRL galvanic series
logger.info(f"Loading USNRL galvanic series from {self.USNRL_GALVANIC_URL}")
self._galvanic_data = pd.read_xml(self.USNRL_GALVANIC_URL)
logger.info(f"Loaded {len(self._galvanic_data)} galvanic series entries")
except Exception as e:
logger.warning(f"Failed to load USNRL galvanic data: {e}")
logger.info("Will use fallback hard-coded values")
self._galvanic_data = None
try:
# Load KittyCAD stainless steel data
logger.info(f"Loading KittyCAD stainless data from {self.KITTYCAD_STAINLESS_URL}")
response = requests.get(self.KITTYCAD_STAINLESS_URL)
response.raise_for_status()
self._kittycad_stainless = response.json()
logger.info(f"Loaded KittyCAD stainless steel data")
except Exception as e:
logger.warning(f"Failed to load KittyCAD stainless data: {e}")
self._kittycad_stainless = None
try:
# Load KittyCAD nickel alloys data
logger.info(f"Loading KittyCAD nickel data from {self.KITTYCAD_NICKEL_URL}")
response = requests.get(self.KITTYCAD_NICKEL_URL)
response.raise_for_status()
self._kittycad_nickel = response.json()
logger.info(f"Loaded KittyCAD nickel alloy data")
except Exception as e:
logger.warning(f"Failed to load KittyCAD nickel data: {e}")
self._kittycad_nickel = None
self._loaded = True
logger.info("Material database initialization complete")
def get_material_properties(self, material_id: str) -> Dict[str, Any]:
"""
Get comprehensive material properties from multiple sources.
Args:
material_id: Material identifier (e.g., "316L", "CS", "duplex_2205")
Returns:
Dictionary with merged properties from all sources
"""
self._lazy_load()
# Check cache first
if self._cache is not None and material_id in self._cache:
return self._cache[material_id]
# Build properties from multiple sources
properties = {
"material_id": material_id,
"name": self._get_full_name(material_id),
}
# Add galvanic potential from USNRL
galvanic_potential = self._get_galvanic_potential(material_id)
if galvanic_potential is not None:
properties["galvanic_potential_V_SCE"] = galvanic_potential
properties["galvanic_source"] = "USNRL MIL-HDBK-1004/6"
# Add mechanical properties from KittyCAD
kittycad_props = self._get_kittycad_properties(material_id)
if kittycad_props:
properties.update(kittycad_props)
# Add composition from CSV-backed authoritative database
composition = self._get_composition(material_id)
if composition:
properties["composition"] = composition
# Tag composition source as authoritative CSV-backed data
properties["composition_source"] = "ASTM_standards_CSV"
properties["composition_provenance"] = {
"method": "CSV_loader",
"authoritative": True,
"note": "Loaded from materials_compositions.csv (ASTM A240, B443, B152)",
"quality": "authoritative",
}
# Calculate PREN if composition available
pren = self.calculate_pren(material_id)
if pren is not None:
properties["PREN"] = pren
# Estimate CPT from PREN
cpt = self.estimate_cpt(material_id)
if cpt is not None:
properties["CPT_estimate_C"] = cpt
# Add cost factor (relative to carbon steel)
cost_factor = self._get_cost_factor(material_id)
if cost_factor is not None:
properties["cost_factor"] = cost_factor
# Cache result
if self._cache is not None:
self._cache[material_id] = properties
return properties
def calculate_pren(self, material_id: str) -> Optional[float]:
"""
Calculate PREN = %Cr + 3.3*%Mo + 16*%N
PREN (Pitting Resistance Equivalent Number) is a measure of
pitting corrosion resistance in stainless steels.
"""
composition = self._get_composition(material_id)
if not composition:
return None
try:
Cr = composition.get("Cr", 0)
Mo = composition.get("Mo", 0)
N = composition.get("N", 0)
pren = Cr + 3.3 * Mo + 16 * N
return round(pren, 1)
except Exception as e:
logger.warning(f"Failed to calculate PREN for {material_id}: {e}")
return None
def estimate_cpt(self, material_id: str) -> Optional[float]:
"""
Estimate Critical Pitting Temperature (°C) from PREN.
Empirical correlation: CPT ≈ PREN - 10 (rough approximation)
More accurate: Use lookup tables or experimental data
"""
pren = self.calculate_pren(material_id)
if pren is None:
return None
# Simplified correlation
# TODO: Replace with more accurate model in Phase 3
cpt = pren - 10
return round(cpt, 0)
def _get_galvanic_potential(self, material_id: str) -> Optional[float]:
"""Get galvanic potential from USNRL data"""
if self._galvanic_data is None:
# Fallback to hard-coded values
return self._fallback_galvanic_potential(material_id)
# Map material_id to USNRL material names
# USNRL XML contains alloy names like "Carbon Steel", "Stainless Steel 316", etc.
usnrl_mapping = {
"CS": ["Carbon Steel", "Mild Steel"],
"316L": ["Stainless Steel 316", "SS316", "316L"],
"304": ["Stainless Steel 304", "SS304", "304"],
"duplex_2205": ["Duplex 2205", "2205"],
"C276": ["Hastelloy C-276", "C276", "Alloy C-276"],
"titanium_gr2": ["Titanium", "Titanium Grade 2"],
}
search_names = usnrl_mapping.get(material_id, [])
try:
# Search galvanic data for matching material
for search_name in search_names:
# Case-insensitive search in material column (assuming 'Material' or 'Alloy' column exists)
if 'Material' in self._galvanic_data.columns:
matches = self._galvanic_data[
self._galvanic_data['Material'].str.contains(search_name, case=False, na=False)
]
elif 'Alloy' in self._galvanic_data.columns:
matches = self._galvanic_data[
self._galvanic_data['Alloy'].str.contains(search_name, case=False, na=False)
]
else:
# If column names unknown, use fallback
return self._fallback_galvanic_potential(material_id)
if not matches.empty:
# Extract potential value (assuming 'Potential' or 'E_SCE' column)
if 'Potential' in matches.columns:
return float(matches.iloc[0]['Potential'])
elif 'E_SCE' in matches.columns:
return float(matches.iloc[0]['E_SCE'])
elif 'Voltage' in matches.columns:
return float(matches.iloc[0]['Voltage'])
except Exception as e:
logger.warning(f"Failed to parse USNRL galvanic data for {material_id}: {e}")
# Fallback to hard-coded values if parsing fails
return self._fallback_galvanic_potential(material_id)
def _fallback_galvanic_potential(self, material_id: str) -> Optional[float]:
"""Fallback galvanic potentials (V vs SCE in seawater)"""
fallback = {
"CS": -0.6,
"316L": -0.1,
"304": -0.15,
"duplex_2205": -0.05,
"super_duplex": 0.0,
"alloy_20": -0.05,
"C276": 0.1,
"titanium_gr2": -0.05,
}
return fallback.get(material_id)
def _get_kittycad_properties(self, material_id: str) -> Dict[str, Any]:
"""Get mechanical properties from KittyCAD data"""
props = {}
# Map material_id to KittyCAD material names
kittycad_mapping = {
"316L": "316L",
"304": "304",
"duplex_2205": "2205", # Duplex stainless
"C276": "C276", # Hastelloy (nickel alloys)
}
kittycad_name = kittycad_mapping.get(material_id)
if not kittycad_name:
return props
# Try stainless steel data first
if self._kittycad_stainless:
material_data = self._kittycad_stainless.get(kittycad_name)
if material_data:
props["density_kg_m3"] = material_data.get("density")
props["yield_strength_MPa"] = material_data.get("yield_strength")
props["ultimate_tensile_strength_MPa"] = material_data.get("ultimate_tensile_strength")
props["youngs_modulus_GPa"] = material_data.get("youngs_modulus")
props["poisson_ratio"] = material_data.get("poisson_ratio")
props["kittycad_source"] = "stainlesssteel.json"
# Try nickel alloys data if not found in stainless
if not props and self._kittycad_nickel:
material_data = self._kittycad_nickel.get(kittycad_name)
if material_data:
props["density_kg_m3"] = material_data.get("density")
props["yield_strength_MPa"] = material_data.get("yield_strength")
props["ultimate_tensile_strength_MPa"] = material_data.get("ultimate_tensile_strength")
props["youngs_modulus_GPa"] = material_data.get("youngs_modulus")
props["poisson_ratio"] = material_data.get("poisson_ratio")
props["kittycad_source"] = "nickelalloys.json"
return props
def _get_composition(self, material_id: str) -> Optional[Dict[str, float]]:
"""
Get chemical composition (wt%) from authoritative CSV-backed database.
Uses data.MATERIALS_DATABASE loaded from materials_compositions.csv
(ASTM A240, B443, B152, etc.)
"""
from data import MATERIALS_DATABASE
# Try to get material from CSV-backed database
material_data = MATERIALS_DATABASE.get(material_id)
if material_data:
# Build composition dict from MaterialComposition dataclass
composition = {}
if material_data.Cr_wt_pct > 0:
composition["Cr"] = material_data.Cr_wt_pct
if material_data.Ni_wt_pct > 0:
composition["Ni"] = material_data.Ni_wt_pct
if material_data.Mo_wt_pct > 0:
composition["Mo"] = material_data.Mo_wt_pct
if material_data.N_wt_pct > 0:
composition["N"] = material_data.N_wt_pct
if material_data.Fe_bal:
# Calculate Fe balance
total_alloying = sum(composition.values())
composition["Fe"] = max(0, 100.0 - total_alloying)
return composition if composition else None
return None
def _get_cost_factor(self, material_id: str) -> Optional[float]:
"""
Get cost factor relative to carbon steel.
TODO: Load from industry sources or materials pricing APIs
"""
cost_factors = {
"CS": 1.0,
"304": 2.5,
"316L": 3.5,
"duplex_2205": 5.0,
"super_duplex": 8.0,
"alloy_20": 10.0,
"C276": 20.0,
"titanium_gr2": 15.0,
}
return cost_factors.get(material_id)
def _get_full_name(self, material_id: str) -> str:
"""Get full material name"""
names = {
"CS": "Carbon Steel",
"316L": "316L Stainless Steel",
"304": "304 Stainless Steel",
"duplex_2205": "Duplex 2205",
"super_duplex": "Super Duplex (SAF 2507)",
"alloy_20": "Alloy 20 (Carpenter 20Cb-3)",
"C276": "Hastelloy C-276",
"titanium_gr2": "Titanium Grade 2",
}
return names.get(material_id, material_id)