SF Permits MCP Server

test_prod_gate.py•14.4 KiB

"""Tests for scripts/prod_gate.py — weighted scoring and hotfix ratchet logic. The scoring math lives inside main() in prod_gate.py. Rather than running the full CLI (which would hit subprocess calls, the staging URL, and file-system state), we extract the pure arithmetic and replicate it here under test. A thin helper `_compute_gate` mirrors the exact algorithm from main(): • CATEGORY_WEIGHTS dict • category_mins reduction • weighted score formula (5 - (5 - raw) * weight, floor at 2 when raw >= 2) • round → effective_score • verdict + hotfix-ratchet logic All tests call `_compute_gate()` with hand-crafted result lists and assert on (verdict, effective_score) — no subprocess, no network, no disk. """ import os import sys import tempfile import pytest # --------------------------------------------------------------------------- # Replicate the scoring algorithm from prod_gate.py main() # --------------------------------------------------------------------------- CATEGORY_WEIGHTS = { "Design Tokens": ("design", 0.6), "Test Suite": ("safety", 1.0), "Dependencies": ("safety", 1.0), "Health": ("data", 1.0), "Data Freshness":("data", 1.0), "Smoke Test": ("data", 1.0), "Route Inventory":("ops", 0.8), "Performance": ("ops", 0.8), # Auth Safety and Secret Leak are hard holds, not scored } def _compute_gate(results, hard_holds=None, hotfix_file=None): """Mirror the scoring logic from prod_gate.main(). Args: results: list of (name, raw_score, msg, issues) tuples hard_holds: list of (name, msg, issues) tuples — defaults to [] hotfix_file: path to a HOTFIX_REQUIRED.md to simulate ratchet checks. If None, ratchet is disabled (no file check). Returns: dict with keys: effective_score, verdict, hotfix_ratchet_triggered """ if hard_holds is None: hard_holds = [] # Build category minimums category_mins = {} for name, raw_score, msg, issues in results: if name in ("Auth Safety", "Secret Leak"): continue cat_info = CATEGORY_WEIGHTS.get(name) if not cat_info: continue cat_name, weight = cat_info if cat_name not in category_mins or raw_score < category_mins[cat_name][0]: category_mins[cat_name] = (raw_score, weight, name) # Compute weighted category scores weighted_scores = {} for cat_name, (raw, weight, check_name) in category_mins.items(): if raw == 5: weighted = 5.0 else: penalty = (5 - raw) * weight weighted = 5.0 - penalty if raw >= 2: weighted = max(weighted, 2.0) weighted_scores[cat_name] = (round(weighted, 1), raw, weight, check_name) effective_score_float = min(ws[0] for ws in weighted_scores.values()) if weighted_scores else 5.0 effective_score = max(1, min(5, round(effective_score_float))) # Verdict if hard_holds: verdict = "HOLD" elif effective_score <= 2: verdict = "HOLD" elif effective_score <= 3: verdict = "PROMOTE" else: verdict = "PROMOTE" # Hotfix ratchet (only simulated when hotfix_file is provided) hotfix_ratchet_triggered = False if effective_score == 3 and not hard_holds and hotfix_file is not None: if os.path.exists(hotfix_file): hotfix_ratchet_triggered = True verdict = "HOLD" else: # First time at score 3 — write the hotfix marker with open(hotfix_file, "w") as f: f.write("# HOTFIX REQUIRED\n") # Hotfix cleanup if effective_score >= 4 and hotfix_file and os.path.exists(hotfix_file): os.remove(hotfix_file) return { "effective_score": effective_score, "verdict": verdict, "hotfix_ratchet_triggered": hotfix_ratchet_triggered, "weighted_scores": weighted_scores, } def _all_perfect(): """All scored checks returning raw 5.""" return [ ("Design Tokens", 5, "clean", []), ("Test Suite", 5, "passing", []), ("Dependencies", 5, "none", []), ("Health", 5, "ok", []), ("Data Freshness", 5, "ok", []), ("Smoke Test", 5, "ok", []), ("Route Inventory",5, "ok", []), ("Performance", 5, "fast", []), ] # --------------------------------------------------------------------------- # test_weighted_scoring_perfect # --------------------------------------------------------------------------- def test_weighted_scoring_perfect(): """All categories raw 5 → effective 5, PROMOTE.""" out = _compute_gate(_all_perfect()) assert out["effective_score"] == 5 assert out["verdict"] == "PROMOTE" # --------------------------------------------------------------------------- # test_weighted_scoring_design_dampened # --------------------------------------------------------------------------- def test_weighted_scoring_design_dampened(): """Design raw 2, everything else 5 → effective 3 (dampened, not 2), PROMOTE.""" results = _all_perfect() # Replace Design Tokens with raw score 2 results = [(n, (2 if n == "Design Tokens" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) # Design raw=2, weight=0.6: effective = 5 - (5-2)*0.6 = 5 - 1.8 = 3.2 → rounds to 3 # But floor: raw >= 2 → max(3.2, 2.0) = 3.2 → round → 3 # All other categories = 5. Min of all = 3. assert out["effective_score"] == 3, ( f"Expected effective 3 when design raw=2, got {out['effective_score']}" ) assert out["verdict"] == "PROMOTE" # score 3 → promote with mandatory hotfix def test_weighted_scoring_design_raw_1_gives_3(): """Design raw 1, weight 0.6 → effective = 5 - 4*0.6 = 2.6 → rounds to 3 (no floor since raw=1).""" results = _all_perfect() results = [(n, (1 if n == "Design Tokens" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) # raw=1 → floor doesn't apply (raw < 2) # weighted = 5 - (5-1)*0.6 = 5 - 2.4 = 2.6 → round to 3 assert out["effective_score"] == 3 assert out["verdict"] == "PROMOTE" # --------------------------------------------------------------------------- # test_weighted_scoring_safety_not_dampened # --------------------------------------------------------------------------- def test_weighted_scoring_safety_not_dampened(): """Safety (Test Suite) raw 2, everything else 5 → effective 2, HOLD.""" results = _all_perfect() results = [(n, (2 if n == "Test Suite" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) # Safety weight 1.0: effective = 5 - (5-2)*1.0 = 5 - 3 = 2 → stays 2 # (floor = max(2.0, 2.0) = 2.0) assert out["effective_score"] == 2 assert out["verdict"] == "HOLD" def test_weighted_scoring_safety_raw_1_gives_1(): """Safety raw 1 → effective 1 → HOLD.""" results = _all_perfect() results = [(n, (1 if n == "Test Suite" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) # weight=1.0, raw=1: weighted = 5 - 4*1.0 = 1 (no floor: raw < 2) assert out["effective_score"] == 1 assert out["verdict"] == "HOLD" # --------------------------------------------------------------------------- # test_weighted_scoring_floor # --------------------------------------------------------------------------- def test_weighted_scoring_floor_design_raw_2(): """Design raw 2, weight 0.6: floor prevents score from going below 2. effective = max(5 - (5-2)*0.6, 2.0) = max(3.2, 2.0) = 3.2 → rounds to 3. """ results = _all_perfect() results = [(n, (2 if n == "Design Tokens" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) # Weighted for design = 3.2, rounded = 3 design_weighted = out["weighted_scores"].get("design") assert design_weighted is not None # (weighted_val, raw, weight, check_name) assert design_weighted[0] == 3.2 def test_weighted_scoring_ops_raw_2_gives_3(): """Ops (Route Inventory) raw 2, weight 0.8 → effective = max(5-2.4, 2) = max(2.6, 2) = 2.6 → rounds to 3.""" results = _all_perfect() results = [(n, (2 if n == "Route Inventory" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) # ops: effective = 5 - (5-2)*0.8 = 5 - 2.4 = 2.6 → round to 3 assert out["effective_score"] == 3 assert out["verdict"] == "PROMOTE" # --------------------------------------------------------------------------- # test_hard_hold_overrides_score # --------------------------------------------------------------------------- def test_hard_hold_overrides_score(): """Auth bypass with all other scores at 5 → still HOLD.""" results = _all_perfect() # Auth Safety is a hard hold, not in the scored results hard_holds = [("Auth Safety", "bypass detected on /brief", ["/brief returned 200 without auth"])] out = _compute_gate(results, hard_holds=hard_holds) assert out["verdict"] == "HOLD" assert out["effective_score"] == 5 # numeric score is still 5, hold is from hard_holds def test_secret_leak_hard_hold(): """Secret leak forces HOLD regardless of all other scores being perfect.""" results = _all_perfect() hard_holds = [("Secret Leak", "sk-abc123 found in diff", ["Potential secret: sk-abc123..."])] out = _compute_gate(results, hard_holds=hard_holds) assert out["verdict"] == "HOLD" # --------------------------------------------------------------------------- # test_hotfix_ratchet_first_time # --------------------------------------------------------------------------- def test_hotfix_ratchet_first_time(tmp_path): """Score 3 with no existing HOTFIX_REQUIRED.md → PROMOTE, file created.""" hotfix_file = str(tmp_path / "HOTFIX_REQUIRED.md") # Design raw 2 → effective 3 (dampened by 0.6 weight) results = _all_perfect() results = [(n, (2 if n == "Design Tokens" else r), m, i) for n, r, m, i in results] assert not os.path.exists(hotfix_file) out = _compute_gate(results, hotfix_file=hotfix_file) assert out["effective_score"] == 3 assert out["verdict"] == "PROMOTE" assert not out["hotfix_ratchet_triggered"] assert os.path.exists(hotfix_file), "HOTFIX_REQUIRED.md should be written on first score-3 promotion" # --------------------------------------------------------------------------- # test_hotfix_ratchet_second_time # --------------------------------------------------------------------------- def test_hotfix_ratchet_second_time(tmp_path): """Score 3 when HOTFIX_REQUIRED.md already exists → downgraded to HOLD.""" hotfix_file = str(tmp_path / "HOTFIX_REQUIRED.md") # Pre-create the file (simulates previous promotion at score 3) with open(hotfix_file, "w") as f: f.write("# HOTFIX REQUIRED\n") results = _all_perfect() results = [(n, (2 if n == "Design Tokens" else r), m, i) for n, r, m, i in results] out = _compute_gate(results, hotfix_file=hotfix_file) assert out["effective_score"] == 3 assert out["verdict"] == "HOLD" assert out["hotfix_ratchet_triggered"] # --------------------------------------------------------------------------- # test_hotfix_cleanup_on_improvement # --------------------------------------------------------------------------- def test_hotfix_cleanup_on_improvement(tmp_path): """Score 5 when HOTFIX_REQUIRED.md exists → file deleted, PROMOTE.""" hotfix_file = str(tmp_path / "HOTFIX_REQUIRED.md") with open(hotfix_file, "w") as f: f.write("# HOTFIX REQUIRED\n") results = _all_perfect() # all raw 5 → effective 5 out = _compute_gate(results, hotfix_file=hotfix_file) assert out["effective_score"] == 5 assert out["verdict"] == "PROMOTE" assert not os.path.exists(hotfix_file), ( "HOTFIX_REQUIRED.md should be deleted when score improves to 4+" ) def test_hotfix_cleanup_on_score_4(tmp_path): """Score 4 also triggers hotfix cleanup.""" hotfix_file = str(tmp_path / "HOTFIX_REQUIRED.md") with open(hotfix_file, "w") as f: f.write("# HOTFIX REQUIRED\n") # Ops raw 3 → effective = 5 - (5-3)*0.8 = 5 - 1.6 = 3.4 → rounds to 3 # Hmm, that gives 3 not 4. Use a data raw=4 instead: # data weight=1.0, raw=4 → effective = 5 - 1*1.0 = 4 → rounds to 4 results = _all_perfect() results = [(n, (4 if n == "Health" else r), m, i) for n, r, m, i in results] out = _compute_gate(results, hotfix_file=hotfix_file) assert out["effective_score"] == 4 assert not os.path.exists(hotfix_file), ( "HOTFIX_REQUIRED.md should be deleted when score is 4" ) # --------------------------------------------------------------------------- # Edge cases # --------------------------------------------------------------------------- def test_no_scored_checks_returns_5(): """If no check matches CATEGORY_WEIGHTS, effective score defaults to 5.""" # Only hard-hold checks (not in CATEGORY_WEIGHTS) results = [ ("Auth Safety", 5, "ok", []), ("Secret Leak", 5, "ok", []), ] out = _compute_gate(results) assert out["effective_score"] == 5 assert out["verdict"] == "PROMOTE" def test_effective_score_min_across_categories(): """The effective score is the minimum across all weighted category scores. safety raw=2, weight=1.0 → effective = 5 - (5-2)*1.0 = 2 (floor max(2,2)=2) design raw=1, weight=0.6 → effective = 5 - (5-1)*0.6 = 5 - 2.4 = 2.6 → 3 min(2, 3, all-others-5) = 2 → HOLD """ results = _all_perfect() # Test Suite (safety) raw=2, Design Tokens (design) raw=1 results = [ (n, (2 if n == "Test Suite" else (1 if n == "Design Tokens" else r)), m, i) for n, r, m, i in results ] out = _compute_gate(results) # safety raw=2, weight=1.0 → effective=2.0 (floor applies: max(2,2)=2) # design raw=1, weight=0.6 → effective=2.6, no floor (raw<2) → rounds to 3 # min(2, 3) = 2 → HOLD assert out["effective_score"] == 2 assert out["verdict"] == "HOLD" def test_data_category_uses_minimum(): """When two data checks differ, the category min is used.""" results = _all_perfect() # Health=5, Data Freshness=2, Smoke Test=5 # data category min = 2 → effective = 5 - 3*1.0 = 2 → HOLD results = [(n, (2 if n == "Data Freshness" else r), m, i) for n, r, m, i in results] out = _compute_gate(results) assert out["effective_score"] == 2 assert out["verdict"] == "HOLD"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tbrennem-source/sf-permits-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_prod_gate.py•14.4 KiB