SF Permits MCP Server

sf-permits-mcp
scripts

vision_score.py•17.4 KiB

#!/usr/bin/env python3 """Send dashboard screenshots to Claude Vision for scoring. Usage: # Default: scores existing dashboard-loop screenshots python scripts/vision_score.py [round_num] # Changed-pages mode: score pages changed since last commit python scripts/vision_score.py --changed --url https://staging.example.com --sprint qs10 python scripts/vision_score.py --changed --url https://... --sprint qs10 --output qa-results/my-scores.json """ import anthropic import argparse import base64 import json import os import subprocess import sys from datetime import datetime, timezone from pathlib import Path VISION_PROMPT = """You are a strict web design reviewer. Score this page on an ABSOLUTE scale, not relative to anything else. RUBRIC: 5/5 EXCELLENT: Content in centered max-width container (~1100px). Glass-morphism cards with rounded corners and subtle borders for each content section. Monospace display font for headings, clean sans-serif for body. Navigation is a clean horizontal bar with no wrapping. Adequate whitespace between sections (24px+). Dark theme with consistent color tokens. Professional, polished, ready for paying customers. 4/5 GOOD: Centered content, cards present, good spacing. Minor issues like slightly inconsistent fonts or one section without a card. Nav works but could be tighter. 3/5 MEDIOCRE: Some centering but inconsistent. Some sections have cards, others are raw. Font usage mixed. Nav functional but crowded. Spacing uneven. Looks like a dev tool, not a product. 2/5 POOR: Content mostly flush-left or full-width. Few or no cards. Nav overflows or wraps. Large unstyled sections. Poor spacing. Looks unfinished. 1/5 BROKEN: No centering, no cards, nav broken, raw HTML, light theme on a dark-theme site, fundamentally unstyled. CHECK EACH: 1. CENTERING: Is main content in a centered max-width container? Or flush-left/full-width sprawl? 2. NAV: Does nav display on one line without wrapping? Are items reasonably sized? 3. CARDS: Are content sections wrapped in card containers (rounded borders, background, shadow)? 4. TYPOGRAPHY: Monospace headings? Sans-serif body? Consistent sizing hierarchy? 5. SPACING: Adequate gaps between sections? Not cramped? 6. SEARCH BAR: If present, is it styled as a prominent input with rounded corners? 7. RECENT ITEMS: If present, are they styled as cards/chips, not raw text links? 8. ACTION LINKS: If present, are they styled as buttons, not tiny text? For EACH failing check, describe the SPECIFIC CSS fix needed (property: value). Return ONLY this JSON: {"score": N, "checks": {"centering": {"pass": bool, "fix": "css fix or null"}, "nav": {"pass": bool, "fix": "css fix or null"}, "cards": {"pass": bool, "fix": "css fix or null"}, "typography": {"pass": bool, "fix": "css fix or null"}, "spacing": {"pass": bool, "fix": "css fix or null"}, "search_bar": {"pass": bool, "fix": "css fix or null"}, "recent_items": {"pass": bool, "fix": "css fix or null"}, "action_links": {"pass": bool, "fix": "css fix or null"}}, "summary": "one line overall assessment"}""" # Mapping of template filenames/paths to page slugs. # Keys: lowercase filename stems or partial paths that git diff --name-only returns. # Values: page slug from PAGES list in visual_qa.py. TEMPLATE_TO_PAGE: dict[str, str] = { # Public pages "landing": "landing", "index": "landing", "search": "search", "login": "login", "beta_request": "beta-request", "beta-request": "beta-request", "property_report": "property-report", "property-report": "property-report", "report": "property-report", # Auth pages "account": "account", "brief": "brief", "portfolio": "portfolio", "consultants": "consultants", "bottlenecks": "bottlenecks", "analyses": "analyses", "voice_calibration": "voice-calibration", "voice-calibration": "voice-calibration", "watch_list": "watch-list", "watch-list": "watch-list", "watch": "watch-list", # Admin pages "admin_feedback": "admin-feedback", "admin-feedback": "admin-feedback", "feedback": "admin-feedback", "admin_activity": "admin-activity", "admin-activity": "admin-activity", "activity": "admin-activity", "admin_ops": "admin-ops", "admin-ops": "admin-ops", "ops": "admin-ops", "admin_sources": "admin-sources", "admin-sources": "admin-sources", "sources": "admin-sources", "admin_regulatory": "admin-regulatory", "admin-regulatory": "admin-regulatory", "regulatory": "admin-regulatory", "regulatory_watch": "admin-regulatory", "admin_costs": "admin-costs", "admin-costs": "admin-costs", "costs": "admin-costs", "admin_pipeline": "admin-pipeline", "admin-pipeline": "admin-pipeline", "pipeline": "admin-pipeline", "admin_beta": "admin-beta", "admin-beta": "admin-beta", "beta_requests": "admin-beta", "beta-requests": "admin-beta", } # All 21 pages from visual_qa.py PAGES list with their paths PAGES: list[dict] = [ # --- Public (no auth) --- {"slug": "landing", "path": "/", "auth": "public"}, {"slug": "search", "path": "/search?q=kitchen+remodel&neighborhood=Mission", "auth": "public"}, {"slug": "login", "path": "/auth/login", "auth": "public"}, {"slug": "beta-request", "path": "/beta-request", "auth": "public"}, {"slug": "property-report", "path": "/report/3512/035", "auth": "public"}, # --- Auth (logged-in user) --- {"slug": "account", "path": "/account", "auth": "auth"}, {"slug": "brief", "path": "/brief", "auth": "auth"}, {"slug": "portfolio", "path": "/portfolio", "auth": "auth"}, {"slug": "consultants", "path": "/consultants", "auth": "auth"}, {"slug": "bottlenecks", "path": "/dashboard/bottlenecks", "auth": "auth"}, {"slug": "analyses", "path": "/account/analyses", "auth": "auth"}, {"slug": "voice-calibration", "path": "/account/voice-calibration", "auth": "auth"}, {"slug": "watch-list", "path": "/watch/list", "auth": "auth"}, # --- Admin --- {"slug": "admin-feedback", "path": "/admin/feedback", "auth": "admin"}, {"slug": "admin-activity", "path": "/admin/activity", "auth": "admin"}, {"slug": "admin-ops", "path": "/admin/ops", "auth": "admin"}, {"slug": "admin-sources", "path": "/admin/sources", "auth": "admin"}, {"slug": "admin-regulatory", "path": "/admin/regulatory-watch", "auth": "admin"}, {"slug": "admin-costs", "path": "/admin/costs", "auth": "admin"}, {"slug": "admin-pipeline", "path": "/admin/pipeline", "auth": "admin"}, {"slug": "admin-beta", "path": "/admin/beta-requests", "auth": "admin"}, ] # Build slug -> page def for fast lookup PAGES_BY_SLUG: dict[str, dict] = {p["slug"]: p for p in PAGES} def score_screenshot(image_path: str, label: str = "") -> dict: """Send a screenshot to Claude Vision and return the score. Returns a dict with keys: score, checks, summary. checks is a dict of dimension -> {pass: bool, fix: str|None}. """ client = anthropic.Anthropic() with open(image_path, "rb") as f: image_data = base64.standard_b64encode(f.read()).decode("utf-8") suffix = f" ({label})" if label else "" response = client.messages.create( model="claude-sonnet-4-5-20250929", max_tokens=2000, messages=[ { "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": image_data, }, }, { "type": "text", "text": VISION_PROMPT + f"\n\nThis is a screenshot of the authenticated dashboard{suffix}.", }, ], } ], ) text = response.content[0].text # Try to parse JSON from response try: # Find JSON in the response start = text.index("{") end = text.rindex("}") + 1 result = json.loads(text[start:end]) return result except (ValueError, json.JSONDecodeError): print(f"WARNING: Could not parse JSON from Vision response:\n{text}") return {"score": 0, "checks": {}, "raw": text} def get_changed_pages() -> list[str]: """Find page slugs for templates changed since last commit.""" try: result = subprocess.run( ["git", "diff", "--name-only", "HEAD~1", "--", "web/templates/", "web/static/"], capture_output=True, text=True, cwd=os.getcwd(), ) changed_files = [f for f in result.stdout.strip().split("\n") if f] except Exception as e: print(f"WARNING: git diff failed: {e}", file=sys.stderr) return [] matched_slugs: list[str] = [] seen: set[str] = set() for filepath in changed_files: # Extract the filename stem (without extension) stem = Path(filepath).stem.lower().replace("-", "_") # Also try the full last component with dashes stem_dash = Path(filepath).stem.lower() for key in [stem, stem_dash]: if key in TEMPLATE_TO_PAGE: slug = TEMPLATE_TO_PAGE[key] if slug not in seen: seen.add(slug) matched_slugs.append(slug) break return matched_slugs def _login_via_test_secret(page, base_url: str, role: str, secret: str) -> bool: """Authenticate using the test-login endpoint. Returns True on success.""" email = f"test-{role}@sfpermits.ai" if role == "admin" else "test-user@sfpermits.ai" try: resp = page.request.post( f"{base_url}/auth/test-login", data=json.dumps({"email": email, "secret": secret}), headers={"Content-Type": "application/json"}, ) return resp.status == 200 or resp.status == 302 except Exception: return False def take_screenshot(page, url: str, screenshot_path: str) -> bool: """Navigate to URL and take a full-page screenshot. Returns True on success.""" for attempt in range(3): try: if attempt > 0: page.wait_for_timeout(3000) page.goto(url, wait_until="domcontentloaded", timeout=45000) page.wait_for_timeout(1500) page.screenshot(path=screenshot_path, full_page=True) return True except Exception as e: if attempt == 2: print(f"WARNING: Navigation failed after 3 attempts for {url}: {e}", file=sys.stderr) return False def append_pending_review(result_entry: dict, pending_reviews_path: str) -> None: """Append a low-scoring result to the pending-reviews.json file.""" path = Path(pending_reviews_path) path.parent.mkdir(parents=True, exist_ok=True) # Load existing entries or initialize if path.exists(): try: with open(path) as f: entries = json.load(f) except (json.JSONDecodeError, IOError): entries = [] else: entries = [] entries.append(result_entry) with open(path, "w") as f: json.dump(entries, f, indent=2) def run_changed_mode(args) -> int: """Score pages changed since last commit using Playwright screenshots.""" from playwright.sync_api import sync_playwright base_url = args.url.rstrip("/") sprint = args.sprint or "latest" output_path = args.output or "qa-results/vision-scores-latest.json" pending_reviews_path = "qa-results/pending-reviews.json" test_secret = os.environ.get("TEST_LOGIN_SECRET", "") # Find changed page slugs changed_slugs = get_changed_pages() if not changed_slugs: print("No changed templates matched to known pages.") return 0 print(f"Changed pages detected: {', '.join(changed_slugs)}") # Prepare screenshot directory screenshots_dir = Path("qa-results") / "screenshots" / sprint screenshots_dir.mkdir(parents=True, exist_ok=True) all_results: list[dict] = [] with sync_playwright() as pw: browser = pw.chromium.launch(headless=True) # Determine max auth level needed pages_to_score = [PAGES_BY_SLUG[s] for s in changed_slugs if s in PAGES_BY_SLUG] needs_admin = any(p["auth"] == "admin" for p in pages_to_score) needs_auth = needs_admin or any(p["auth"] == "auth" for p in pages_to_score) context = browser.new_context( viewport={"width": 1440, "height": 900}, ignore_https_errors=True, ) page = context.new_page() # Login if needed logged_in = False if needs_auth and test_secret: role = "admin" if needs_admin else "user" logged_in = _login_via_test_secret(page, base_url, role, test_secret) if not logged_in: print("WARNING: Login failed — auth/admin pages will be skipped", file=sys.stderr) for page_def in pages_to_score: slug = page_def["slug"] auth_level = page_def["auth"] url = f"{base_url}{page_def['path']}" screenshot_path = str(screenshots_dir / f"{slug}-desktop.png") if auth_level in ("auth", "admin") and not logged_in: print(f" SKIP {slug}: auth required but not logged in") continue print(f" Scoring {slug} ({url})...") nav_ok = take_screenshot(page, url, screenshot_path) if not nav_ok: print(f" FAIL {slug}: could not navigate") continue result = score_screenshot(screenshot_path, label=slug) score_val = result.get("score", 0) checks = result.get("checks", {}) summary = result.get("summary", "") # Count passing dimensions passing = sum(1 for v in checks.values() if isinstance(v, dict) and v.get("pass")) total_dims = len(checks) result_entry = { "page": slug, "url": url, "score": score_val, "checks": checks, "summary": summary, "screenshot": screenshot_path, "timestamp": datetime.now(timezone.utc).isoformat(), } all_results.append(result_entry) # Append to pending-reviews.json if score < 3.0 if score_val < 3.0: append_pending_review(result_entry, pending_reviews_path) action = "FLAGGED (score < 3.0)" else: action = "OK" print(f" {slug}: {score_val}/5 | {passing}/{total_dims} dimensions passing | {action}") page.close() context.close() browser.close() # Print summary table print("\n--- Vision Score Summary ---") print(f"{'Page':<25} {'Score':<8} {'Dims Pass':<12} {'Action'}") print("-" * 65) for r in all_results: checks = r.get("checks", {}) passing = sum(1 for v in checks.values() if isinstance(v, dict) and v.get("pass")) total_dims = len(checks) action = "FLAGGED" if r["score"] < 3.0 else "OK" print(f"{r['page']:<25} {r['score']:<8} {passing}/{total_dims:<11} {action}") # Write output JSON output_dir = Path(output_path).parent output_dir.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: json.dump(all_results, f, indent=2) print(f"\nResults written to {output_path}") flagged = [r for r in all_results if r["score"] < 3.0] if flagged: print(f"{len(flagged)} page(s) flagged in {pending_reviews_path}") return 0 def main() -> int: parser = argparse.ArgumentParser( description="Score screenshots with Claude Vision", ) parser.add_argument("--changed", action="store_true", help="Score git-changed pages (requires --url)") parser.add_argument("--url", help="Base URL (e.g. https://sfpermits-ai-staging-production.up.railway.app)") parser.add_argument("--sprint", help="Sprint label for screenshot filenames (e.g. qs10)") parser.add_argument("--output", help="Path for per-run JSON results (default: qa-results/vision-scores-latest.json)") # Allow positional round_num for legacy mode parser.add_argument("round_num", nargs="?", type=int, default=None, help="Round number for legacy dashboard-loop mode") args = parser.parse_args() if args.changed: if not args.url: print("ERROR: --changed requires --url", file=sys.stderr) return 1 return run_changed_mode(args) # Legacy mode: score dashboard-loop screenshots screenshot_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "qa-results", "screenshots", "dashboard-loop" ) round_num = args.round_num if args.round_num is not None else 1 results = {} for variant in ["desktop", "mobile"]: path = os.path.join(screenshot_dir, f"round-{round_num}-{variant}.png") if os.path.exists(path): print(f"\n--- Scoring round-{round_num}-{variant}.png ---") result = score_screenshot(path, label=f"round {round_num}, {variant}") results[variant] = result print(json.dumps(result, indent=2)) else: print(f"SKIP: {path} not found") # Write results to file results_path = os.path.join(screenshot_dir, f"round-{round_num}-scores.json") with open(results_path, "w") as f: json.dump(results, f, indent=2) print(f"\nScores saved to {results_path}") return 0 if __name__ == "__main__": sys.exit(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tbrennem-source/sf-permits-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

vision_score.py•17.4 KiB