SF Permits MCP Server

test_rag.py•24.6 KiB

"""Tests for the RAG knowledge retrieval system. Tests chunker, retrieval scoring, and deduplication logic. Embedding and store tests use mocks to avoid API/DB dependencies. """ import json import os import tempfile from pathlib import Path from unittest.mock import MagicMock, patch import pytest # --------------------------------------------------------------------------- # Chunker tests # --------------------------------------------------------------------------- class TestChunkerTier1: """Test tier1 JSON chunking.""" def test_chunk_dict_json(self, tmp_path): """Chunk a dict-style JSON file.""" from src.rag.chunker import chunk_tier1_json data = { "source": "test source", "description": "Test data", "topic_a": { "detail": "This is a detailed section about topic A with enough content to be a valid chunk", "steps": ["step 1", "step 2", "step 3"], }, "topic_b": "Another section with enough text to exceed the minimum chunk size threshold for inclusion", } filepath = tmp_path / "test-data.json" filepath.write_text(json.dumps(data)) chunks = chunk_tier1_json(filepath) assert len(chunks) >= 1 assert all(c["source_file"] == "test-data.json" for c in chunks) assert all("content" in c for c in chunks) assert all("source_section" in c for c in chunks) def test_chunk_list_json(self, tmp_path): """Chunk a list-style JSON file.""" from src.rag.chunker import chunk_tier1_json data = [ {"name": "Item A", "description": "A long enough description for a valid chunk to be created from this item"}, {"name": "Item B", "description": "Another long enough description for a valid chunk to be created from this item"}, ] filepath = tmp_path / "list-data.json" filepath.write_text(json.dumps(data)) chunks = chunk_tier1_json(filepath) assert len(chunks) == 2 assert chunks[0]["source_section"] == "item_0" assert chunks[1]["source_section"] == "item_1" def test_chunk_skips_metadata_keys(self, tmp_path): """Should skip source, source_url, last_verified, description.""" from src.rag.chunker import chunk_tier1_json data = { "source": "sf.gov", "source_url": "https://sf.gov", "last_verified": "2026-01-01", "description": "Test", "actual_content": { "info": "This is the real content that should be chunked into a valid piece of text", }, } filepath = tmp_path / "meta-test.json" filepath.write_text(json.dumps(data)) chunks = chunk_tier1_json(filepath) # Only actual_content should be chunked assert len(chunks) == 1 assert "actual_content" in chunks[0]["source_section"] def test_chunk_invalid_json(self, tmp_path): """Should return empty list for invalid JSON.""" from src.rag.chunker import chunk_tier1_json filepath = tmp_path / "bad.json" filepath.write_text("not valid json {{{") chunks = chunk_tier1_json(filepath) assert chunks == [] def test_real_tier1_files_chunk(self): """Verify actual tier1 files produce non-zero chunks.""" from src.rag.chunker import chunk_tier1_json tier1_dir = Path(__file__).resolve().parent.parent / "data" / "knowledge" / "tier1" if not tier1_dir.exists(): pytest.skip("tier1 directory not available") # Test a few known files for name in ["otc-criteria.json", "fee-tables.json", "epr-requirements.json"]: filepath = tier1_dir / name if filepath.exists(): chunks = chunk_tier1_json(filepath) assert len(chunks) > 0, f"{name} produced no chunks" class TestChunkerRawText: """Test tier2/3 raw text chunking.""" def test_basic_paragraph_chunking(self): from src.rag.chunker import chunk_raw_text text = "\n\n".join([ f"Paragraph {i}: " + "This is a test paragraph with enough content. " * 5 for i in range(10) ]) chunks = chunk_raw_text(text, "test-file.txt") assert len(chunks) > 1 assert all(c["source_file"] == "test-file.txt" for c in chunks) def test_empty_text(self): from src.rag.chunker import chunk_raw_text assert chunk_raw_text("", "test.txt") == [] assert chunk_raw_text(" ", "test.txt") == [] def test_section_header_detection(self): from src.rag.chunker import chunk_raw_text text = ( "# Introduction\n\n" "This section introduces the concept with enough text to be a valid chunk.\n\n" "# Requirements\n\n" "This section covers requirements with enough text to be a valid chunk too.\n\n" "More requirements detail that adds to the paragraph length sufficiently." ) chunks = chunk_raw_text(text, "test.txt") assert len(chunks) >= 1 # At least one chunk should pick up a section header sections = [c["source_section"] for c in chunks] assert any(s != "body" for s in sections) class TestChunkerCodeSections: """Test tier4 code section chunking.""" def test_split_at_section_headers(self): from src.rag.chunker import chunk_code_sections text = ( "Section 101.1 General Provisions\n" "This section establishes the general provisions for building code compliance " "with all requirements specified herein.\n\n" "Section 102.1 Applicability\n" "This section defines applicability criteria for the building code to various " "construction types and project scopes.\n\n" "Section 103.1 Enforcement\n" "This section covers code enforcement procedures and penalties for non-compliance " "including stop work orders and citations.\n" ) chunks = chunk_code_sections(text, "building-code.txt") assert len(chunks) == 3 assert "Section 101.1" in chunks[0]["source_section"] def test_falls_back_to_paragraph_chunking(self): from src.rag.chunker import chunk_code_sections text = "No section headers here.\n\n" + "Some paragraph content. " * 20 chunks = chunk_code_sections(text, "test.txt") assert len(chunks) >= 1 # --------------------------------------------------------------------------- # Retrieval scoring tests (no API/DB calls) # --------------------------------------------------------------------------- class TestRetrievalScoring: """Test the scoring and reranking logic in retrieval.py.""" def test_tier_boost_values(self): from src.rag.retrieval import TIER_BOOSTS assert TIER_BOOSTS["tier1"] > TIER_BOOSTS["tier4"] assert TIER_BOOSTS["tier2"] > TIER_BOOSTS["tier3"] def test_weight_sum(self): """Scoring weights should approximately sum to 1.""" from src.rag.retrieval import VECTOR_WEIGHT, KEYWORD_WEIGHT, TIER_BOOST_WEIGHT assert abs((VECTOR_WEIGHT + KEYWORD_WEIGHT + TIER_BOOST_WEIGHT) - 1.0) < 0.01 def test_deduplicate_identical(self): from src.rag.retrieval import _deduplicate results = [ {"content": "The same content repeated here", "final_score": 0.9}, {"content": "The same content repeated here", "final_score": 0.8}, {"content": "Completely different content about something else", "final_score": 0.7}, ] deduped = _deduplicate(results) assert len(deduped) == 2 def test_deduplicate_preserves_diverse(self): from src.rag.retrieval import _deduplicate results = [ {"content": "Topic A about permits and fees", "final_score": 0.9}, {"content": "Topic B about inspections and timelines", "final_score": 0.8}, {"content": "Topic C about zoning and land use", "final_score": 0.7}, ] deduped = _deduplicate(results) assert len(deduped) == 3 def test_keyword_to_chunk_matching(self): from src.rag.retrieval import _match_keyword_to_chunk chunk = { "content": "OTC permits require specific criteria including scope limitations", "source_file": "otc-criteria.json", "source_section": "residential_interior", } keyword_scores = {"otc_criteria": 0.95, "inspections": 0.5} score = _match_keyword_to_chunk(chunk, keyword_scores) # Should get a high score because "otc_criteria" matches source_file assert score >= 0.9 def test_keyword_to_chunk_no_match(self): from src.rag.retrieval import _match_keyword_to_chunk chunk = { "content": "Some unrelated content about vegetables", "source_file": "random.json", "source_section": "cooking", } keyword_scores = {"otc_criteria": 0.95} score = _match_keyword_to_chunk(chunk, keyword_scores) assert score == 0.0 def test_keyword_to_chunk_empty_scores(self): from src.rag.retrieval import _match_keyword_to_chunk chunk = {"content": "test", "source_file": "test.json", "source_section": "s"} assert _match_keyword_to_chunk(chunk, {}) == 0.0 @patch("src.rag.embeddings.embed_query") @patch("src.rag.store.search") def test_retrieve_full_pipeline(self, mock_vector_search, mock_embed): """Test the full retrieve pipeline with mocked vector search.""" from src.rag.retrieval import retrieve mock_embed.return_value = [0.1] * 1536 mock_vector_search.return_value = [ { "content": "OTC permits for residential interior work require scope limits", "source_file": "otc-criteria.json", "source_section": "residential_interior", "source_tier": "official", "trust_weight": 1.0, "similarity": 0.85, "metadata": {"tier": "tier1", "type": "structured"}, }, { "content": "Fee schedules for building permits based on construction cost", "source_file": "fee-tables.json", "source_section": "building_fees", "source_tier": "official", "trust_weight": 1.0, "similarity": 0.60, "metadata": {"tier": "tier1", "type": "structured"}, }, ] results = retrieve("What are OTC permit requirements?", top_k=5) assert len(results) == 2 assert all("final_score" in r for r in results) assert all("scoring_breakdown" in r for r in results) # OTC result should score higher (better similarity + keyword match) assert results[0]["source_file"] == "otc-criteria.json" @patch("src.rag.embeddings.embed_query") @patch("src.rag.store.search") def test_retrieve_filters_low_similarity(self, mock_vector_search, mock_embed): """Results below MIN_SIMILARITY threshold should be filtered.""" from src.rag.retrieval import retrieve, MIN_SIMILARITY mock_embed.return_value = [0.1] * 1536 mock_vector_search.return_value = [ { "content": "Low quality match", "source_file": "test.json", "source_section": "s", "source_tier": "official", "trust_weight": 1.0, "similarity": MIN_SIMILARITY - 0.01, "metadata": {"tier": "tier1"}, }, ] results = retrieve("test query") assert len(results) == 0 @patch("src.rag.embeddings.embed_query", side_effect=RuntimeError("No API key")) def test_retrieve_falls_back_on_embed_error(self, mock_embed): """Should fall back to keyword-only when embedding fails.""" from src.rag.retrieval import retrieve # This should not raise, should return keyword-only results results = retrieve("OTC permit requirements") # May return results from keyword fallback (depends on knowledge base state) assert isinstance(results, list) class TestRetrieveWithContext: """Test the context assembly for LLM augmentation.""" @patch("src.rag.embeddings.embed_query") @patch("src.rag.store.search") def test_context_format(self, mock_vector_search, mock_embed): from src.rag.retrieval import retrieve_with_context mock_embed.return_value = [0.1] * 1536 mock_vector_search.return_value = [ { "content": "Test content about permits", "source_file": "permit-guide.json", "source_section": "overview", "source_tier": "official", "trust_weight": 1.0, "similarity": 0.80, "metadata": {"tier": "tier1"}, }, ] result = retrieve_with_context("How do permits work?") assert "results" in result assert "context" in result assert "query" in result assert result["result_count"] == 1 assert "permit-guide.json" in result["context"] assert "Test content about permits" in result["context"] # --------------------------------------------------------------------------- # Embeddings tests (mocked) # --------------------------------------------------------------------------- class TestEmbeddings: """Test embedding client with mocked OpenAI API.""" def test_embed_query_calls_embed_texts(self): from src.rag.embeddings import embed_query with patch("src.rag.embeddings.embed_texts") as mock: mock.return_value = [[0.1] * 1536] result = embed_query("test text") assert len(result) == 1536 mock.assert_called_once_with(["test text"], model="text-embedding-3-small") def test_embed_texts_missing_key(self): from src.rag.embeddings import embed_texts with patch.dict(os.environ, {}, clear=True): # Remove OPENAI_API_KEY if present env = dict(os.environ) env.pop("OPENAI_API_KEY", None) with patch.dict(os.environ, env, clear=True): with pytest.raises(RuntimeError, match="OPENAI_API_KEY"): embed_texts(["test"]) # --------------------------------------------------------------------------- # Store tests (mocked DB) # --------------------------------------------------------------------------- class TestStore: """Test store module functions with mocked database.""" @patch("src.rag.store._get_conn") def test_ensure_table(self, mock_conn): from src.rag.store import ensure_table mock_cursor = MagicMock() mock_conn.return_value.cursor.return_value = mock_cursor ensure_table() # Should execute CREATE EXTENSION, CREATE TABLE, and 2 indexes assert mock_cursor.execute.call_count >= 4 mock_conn.return_value.commit.assert_called_once() mock_conn.return_value.close.assert_called_once() @patch("src.rag.store._get_conn") def test_insert_chunks(self, mock_conn): from src.rag.store import insert_chunks mock_cursor = MagicMock() mock_conn.return_value.cursor.return_value = mock_cursor chunks = [ {"content": "Test chunk 1", "source_file": "test.json", "source_section": "s1", "metadata": {}}, {"content": "Test chunk 2", "source_file": "test.json", "source_section": "s2", "metadata": {}}, ] embeddings = [[0.1] * 1536, [0.2] * 1536] insert_chunks(chunks, embeddings, source_tier="official", trust_weight=1.0) assert mock_cursor.execute.call_count == 2 mock_conn.return_value.commit.assert_called_once() @patch("src.rag.store._get_conn") def test_insert_chunks_mismatched_lengths(self, mock_conn): from src.rag.store import insert_chunks with pytest.raises(ValueError, match="Chunk count"): insert_chunks([{"content": "a"}], [[0.1] * 1536, [0.2] * 1536]) @patch("src.rag.store._get_conn") def test_search_returns_results(self, mock_conn): from src.rag.store import search mock_cursor = MagicMock() mock_conn.return_value.cursor.return_value = mock_cursor mock_cursor.fetchall.return_value = [ ("content text", "file.json", "section", "official", 1.0, 0.85, "{}"), ] results = search([0.1] * 1536, top_k=5) assert len(results) == 1 assert results[0]["content"] == "content text" assert results[0]["similarity"] == 0.85 @patch("src.rag.store._get_conn") def test_search_with_tier_filter(self, mock_conn): from src.rag.store import search mock_cursor = MagicMock() mock_conn.return_value.cursor.return_value = mock_cursor mock_cursor.fetchall.return_value = [] results = search([0.1] * 1536, top_k=5, source_tier="official") assert results == [] # Should have used the tier-filtered query call_args = mock_cursor.execute.call_args assert "source_tier" in call_args[0][0] @patch("src.rag.store._get_conn") def test_get_stats(self, mock_conn): from src.rag.store import get_stats mock_cursor = MagicMock() mock_conn.return_value.cursor.return_value = mock_cursor mock_cursor.fetchall.side_effect = [ [("official", 100), ("amy", 20)], # tier counts [("file1.json", 50), ("file2.json", 30)], # top files ] mock_cursor.fetchone.return_value = (120,) stats = get_stats() assert stats["total_chunks"] == 120 assert stats["by_tier"]["official"] == 100 @patch("src.rag.store._get_conn") def test_clear_tier(self, mock_conn): from src.rag.store import clear_tier mock_cursor = MagicMock() mock_conn.return_value.cursor.return_value = mock_cursor mock_cursor.rowcount = 50 deleted = clear_tier("official") assert deleted == 50 mock_conn.return_value.commit.assert_called_once() # --------------------------------------------------------------------------- # Ingestion script tests # --------------------------------------------------------------------------- class TestIngestionScript: """Test the ingestion script helper functions.""" def test_ingest_tier1_dry_run(self): """Dry run should count chunks without calling API.""" from scripts.rag_ingest import ingest_tier1 count = ingest_tier1(dry_run=True) # Should produce chunks from actual tier1 files assert count > 0 def test_tier_trust_weights(self): from scripts.rag_ingest import TIER_TRUST assert TIER_TRUST["tier1"] == 1.0 assert TIER_TRUST["tier2"] < TIER_TRUST["tier1"] assert TIER_TRUST["tier3"] < TIER_TRUST["tier2"] # --------------------------------------------------------------------------- # Clean chunk content tests # --------------------------------------------------------------------------- class TestCleanChunkContent: """Test the _clean_chunk_content helper in web/app.py. Uses the Flask test app to get proper context for imports. """ @pytest.fixture(autouse=True) def setup_app(self): from web.app import app, _clean_chunk_content self._app = app self._clean_fn = _clean_chunk_content def _clean(self, content, source_file=""): with self._app.app_context(): return self._clean_fn(content, source_file) def test_strips_filename_prefix(self): result = self._clean("[epr-requirements] Some content here") assert "[epr-requirements]" not in result assert "Some content here" in result def test_converts_key_value_to_bold(self): result = self._clean("permit_type: Building Permit") assert "**Permit Type**:" in result assert "Building Permit" in result def test_converts_quote_to_blockquote(self): result = self._clean('quote: This is important info') assert '> "This is important info"' in result def test_preserves_bullet_lists(self): result = self._clean("- Item one\n- Item two") assert "- Item one" in result assert "- Item two" in result def test_wraps_quoted_strings(self): result = self._clean('"This is a quoted statement"') assert '> "This is a quoted statement"' in result def test_strips_empty_lines(self): result = self._clean("Line one\n\n\n\nLine two") assert result == "Line one\n\nLine two" def test_empty_input(self): result = self._clean("") assert result == "" # --------------------------------------------------------------------------- # Draft intent classification tests # --------------------------------------------------------------------------- class TestDraftResponseIntent: """Test the draft_response intent classification.""" def test_greeting_start(self): from src.tools.intent_router import classify result = classify("Hi Amy, a client wants to convert their garage to an ADU, what permits?") assert result.intent == "draft_response" def test_client_asking(self): from src.tools.intent_router import classify result = classify("Client is asking about the timeline for a kitchen remodel permit") assert result.intent == "draft_response" def test_homeowner_wants(self): from src.tools.intent_router import classify result = classify("Homeowner wants to know if they need a permit for replacing windows") assert result.intent == "draft_response" def test_how_should_respond(self): from src.tools.intent_router import classify result = classify("How should I respond to a question about seismic retrofit requirements?") assert result.intent == "draft_response" def test_draft_prefix(self): from src.tools.intent_router import classify result = classify("Draft: response about ADU permit requirements and timeline") assert result.intent == "draft_response" def test_long_question_with_question_mark(self): from src.tools.intent_router import classify # 150+ chars with a ? long_q = "I have a client who is interested in purchasing a property in the Sunset district and they want to know what kinds of renovations would require permits and what the general timeline would be for getting approvals?" result = classify(long_q) assert result.intent == "draft_response" def test_short_question_stays_general(self): from src.tools.intent_router import classify result = classify("What is EPR?") assert result.intent != "draft_response" def test_address_still_matches_address(self): from src.tools.intent_router import classify result = classify("123 Main St") assert result.intent == "search_address" def test_permit_number_still_matches(self): from src.tools.intent_router import classify result = classify("202301012345") assert result.intent == "lookup_permit" # --------------------------------------------------------------------------- # insert_single_note tests (mocked DB + embeddings) # --------------------------------------------------------------------------- class TestInsertSingleNote: """Test the insert_single_note helper.""" @patch("src.rag.store.insert_chunks") @patch("src.rag.embeddings.embed_texts") def test_basic_insert(self, mock_embed, mock_insert): from src.rag.store import insert_single_note mock_embed.return_value = [[0.1] * 1536] result = insert_single_note("This is an expert correction about OTC criteria", { "added_by_user_id": 42, "firm_id": None, "query_context": "What is OTC?", }) assert result == 1 mock_embed.assert_called_once() mock_insert.assert_called_once() # Check the chunk passed to insert_chunks call_args = mock_insert.call_args chunks = call_args[0][0] assert chunks[0]["content"] == "This is an expert correction about OTC criteria" assert chunks[0]["source_file"] == "expert-note" assert call_args[1]["source_tier"] == "amy" assert call_args[1]["trust_weight"] == 0.9 @patch("src.rag.store.insert_chunks") @patch("src.rag.embeddings.embed_texts") def test_metadata_includes_timestamp(self, mock_embed, mock_insert): from src.rag.store import insert_single_note mock_embed.return_value = [[0.1] * 1536] insert_single_note("A note about something", {"added_by_user_id": 1}) chunks = mock_insert.call_args[0][0] assert "added_at" in chunks[0]["metadata"]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tbrennem-source/sf-permits-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_rag.py•24.6 KiB