SF Permits MCP Server

test_data_quality.py•8.8 KiB

"""Tests for web.data_quality check functions. Mocks _timed_query and _raw_query to test threshold logic without a live DB. """ from __future__ import annotations from datetime import date, timedelta from unittest.mock import patch import pytest # ── Helpers ────────────────────────────────────────────────────── def _mock_raw(return_val): """Create a mock for _raw_query that returns *return_val*.""" return patch("web.data_quality._raw_query", return_value=return_val) def _mock_timed(return_val): """Create a mock for _timed_query that returns *return_val*.""" return patch("web.data_quality._timed_query", return_value=return_val) # ── _check_orphaned_contacts ───────────────────────────────────── class TestOrphanedContacts: """Orphaned contacts check: contacts without resolved entities.""" def test_green_when_below_5pct(self): from web.data_quality import _check_orphaned_contacts # 2% orphaned (20 of 1000) with _mock_timed([(20,)]) as mq: with _mock_timed([(1000,)]): # Need to handle two calls — first for orphans, second for total pass # Simpler: patch once, use side_effect with patch("web.data_quality._timed_query", side_effect=[[(20,)], [(1000,)]]): result = _check_orphaned_contacts() assert result["status"] == "green" assert result["name"] == "Unresolved Contacts" def test_yellow_between_5_and_10pct(self): from web.data_quality import _check_orphaned_contacts # 8% orphaned (80 of 1000) with patch("web.data_quality._timed_query", side_effect=[[(80,)], [(1000,)]]): result = _check_orphaned_contacts() assert result["status"] == "yellow" def test_red_above_10pct(self): from web.data_quality import _check_orphaned_contacts # 15% orphaned (150 of 1000) with patch("web.data_quality._timed_query", side_effect=[[(150,)], [(1000,)]]): result = _check_orphaned_contacts() assert result["status"] == "red" # ── _check_rag_chunk_count ─────────────────────────────────────── class TestRagChunkCount: """RAG chunk count check: dynamic baseline from cache.""" def test_green_stable_count(self): from web.data_quality import _check_rag_chunk_count # Count = 1050, no duplicates, no cache (uses self-baseline) with patch("web.data_quality._raw_query", side_effect=[ [(1050,)], # total count [(1050,)], # distinct count [], # no cache ]): result = _check_rag_chunk_count() assert result["status"] == "green" assert "1,050" in result["value"] def test_red_when_duplicates_exceed_50(self): from web.data_quality import _check_rag_chunk_count # 1200 total, 1100 distinct = 100 duplicates with patch("web.data_quality._raw_query", side_effect=[ [(1200,)], # total count [(1100,)], # distinct count [], # no cache ]): result = _check_rag_chunk_count() assert result["status"] == "red" assert "duplicates" in result["detail"] def test_red_when_count_drops_below_70pct(self): """If count drops >30% from cached baseline → red.""" import json from web.data_quality import _check_rag_chunk_count cached = json.dumps([{"name": "RAG Chunks", "value": "1,000"}]) with patch("web.data_quality._raw_query", side_effect=[ [(600,)], # total count (60% of 1000) [(600,)], # distinct count [(cached,)], # cache with previous value ]): result = _check_rag_chunk_count() assert result["status"] == "red" assert "data loss" in result["detail"] def test_zero_chunks_is_red(self): from web.data_quality import _check_rag_chunk_count with patch("web.data_quality._raw_query", side_effect=[ [(0,)], [(0,)], [], ]): result = _check_rag_chunk_count() assert result["status"] == "red" # ── _check_addenda_freshness ──────────────────────────────────── class TestAddendaFreshness: """Addenda freshness: age of most recent finish_date.""" def test_green_when_recent(self): from web.data_quality import _check_addenda_freshness recent = date.today() - timedelta(days=5) with _mock_raw([(str(recent),)]): result = _check_addenda_freshness() assert result["status"] == "green" assert result["name"] == "Addenda Freshness" def test_yellow_30_to_60_days(self): from web.data_quality import _check_addenda_freshness old = date.today() - timedelta(days=45) with _mock_raw([(str(old),)]): result = _check_addenda_freshness() assert result["status"] == "yellow" def test_red_over_60_days(self): from web.data_quality import _check_addenda_freshness very_old = date.today() - timedelta(days=90) with _mock_raw([(str(very_old),)]): result = _check_addenda_freshness() assert result["status"] == "red" def test_table_not_exists(self): from web.data_quality import _check_addenda_freshness with patch("web.data_quality._raw_query", side_effect=Exception("relation does not exist")): result = _check_addenda_freshness() assert result["status"] == "yellow" assert "not available" in result["detail"] def test_no_data(self): from web.data_quality import _check_addenda_freshness with _mock_raw([(None,)]): result = _check_addenda_freshness() assert result["status"] == "red" # ── _check_station_velocity_freshness ──────────────────────────── class TestStationVelocityFreshness: """Station velocity freshness: age of computed_at.""" def test_green_when_recent(self): from web.data_quality import _check_station_velocity_freshness recent = date.today() - timedelta(days=2) with _mock_raw([(str(recent),)]): result = _check_station_velocity_freshness() assert result["status"] == "green" assert result["name"] == "Station Velocity" def test_yellow_7_to_14_days(self): from web.data_quality import _check_station_velocity_freshness old = date.today() - timedelta(days=10) with _mock_raw([(str(old),)]): result = _check_station_velocity_freshness() assert result["status"] == "yellow" def test_red_over_14_days(self): from web.data_quality import _check_station_velocity_freshness very_old = date.today() - timedelta(days=20) with _mock_raw([(str(very_old),)]): result = _check_station_velocity_freshness() assert result["status"] == "red" def test_table_not_exists(self): from web.data_quality import _check_station_velocity_freshness with patch("web.data_quality._raw_query", side_effect=Exception("relation does not exist")): result = _check_station_velocity_freshness() assert result["status"] == "yellow" assert "not available" in result["detail"] # ── run_all_checks ─────────────────────────────────────────────── class TestRunAllChecks: """Integration test for run_all_checks.""" def test_sorts_red_first(self): from web.data_quality import run_all_checks # Mock BACKEND to skip prod checks and mock all universal checks with patch("src.db.BACKEND", "duckdb"): with patch("web.data_quality._timed_query", return_value=[(0,)]): results = run_all_checks() # Results should be sorted: red first, yellow, green statuses = [r["status"] for r in results] status_values = {"red": 0, "yellow": 1, "green": 2} assert statuses == sorted(statuses, key=lambda s: status_values.get(s, 9)) def test_check_failure_produces_error_entry(self): from web.data_quality import run_all_checks with patch("src.db.BACKEND", "duckdb"): with patch("web.data_quality._timed_query", side_effect=Exception("boom")): results = run_all_checks() # Should have error entries, not crash assert len(results) > 0 error_results = [r for r in results if r["value"] == "Error"] assert len(error_results) > 0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tbrennem-source/sf-permits-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_data_quality.py•8.8 KiB