DM20 Protocol

test_module_indexer.py•31.4 KiB

""" Tests for module content chunking and ChromaDB indexing. Tests the ModuleIndexer class including text chunking with overlap, metadata attachment, special content detection (boxed text, stat blocks), re-indexing detection, and full module indexing workflow. VectorStoreManager and PDF access are mocked to keep tests fast and independent of external resources. """ import hashlib from pathlib import Path from typing import Any from unittest.mock import MagicMock, patch import pytest from dm20_protocol.claudmaster.module_indexer import ( ChunkConfig, IndexingResult, ModuleIndexer, _is_boxed_text, _is_stat_block, extract_text_from_pdf, ) from dm20_protocol.claudmaster.models.module import ( ContentType, ModuleElement, ModuleStructure, NPCReference, LocationReference, ) from dm20_protocol.claudmaster.vector_store import ( CollectionNotFoundError, VectorStoreManager, ) # --------------------------------------------------------------------------- # Helpers and fixtures # --------------------------------------------------------------------------- def _make_module_structure( module_id: str = "test-module", title: str = "Test Module", chapters: list[ModuleElement] | None = None, npcs: list[NPCReference] | None = None, locations: list[LocationReference] | None = None, ) -> ModuleStructure: """Create a minimal ModuleStructure for testing.""" return ModuleStructure( module_id=module_id, title=title, source_file="test-module.pdf", chapters=chapters or [], npcs=npcs or [], locations=locations or [], ) def _make_mock_store() -> MagicMock: """Create a mocked VectorStoreManager.""" store = MagicMock(spec=VectorStoreManager) store.list_collections.return_value = [] store._collection_name.side_effect = lambda mid: f"mod_{mid}" store._get_embedding_function.return_value = MagicMock() return store def _sample_text(length: int = 600) -> str: """Generate sample adventure text of approximately *length* characters.""" paragraph = ( "The ancient stone corridor stretches into darkness ahead. " "Flickering torchlight reveals worn carvings on the walls " "depicting scenes of forgotten battles. A faint smell of " "decay hangs in the air." ) # Repeat to reach desired length parts = [] current = 0 while current < length: parts.append(paragraph) current += len(paragraph) + 2 # +2 for "\n\n" return "\n\n".join(parts) STAT_BLOCK_TEXT = """ Goblin Boss Small humanoid, neutral evil Armor Class 17 (chain shirt, shield) Hit Points 21 (6d6) Speed 30 ft. STR 10 (+0) DEX 14 (+2) CON 10 (+0) INT 10 (+0) WIS 8 (-1) CHA 10 (+0) Senses darkvision 60 ft., passive Perception 9 Languages Common, Goblin Challenge 1 (200 XP) """ BOXED_TEXT = """ Read the following aloud: You step through the heavy oak doors into a vast chamber. The ceiling soars fifty feet overhead, supported by massive stone pillars. At the far end, a figure sits upon a throne of bones. """ @pytest.fixture def mock_store() -> MagicMock: """Mocked VectorStoreManager.""" return _make_mock_store() @pytest.fixture def default_config() -> ChunkConfig: """Default ChunkConfig.""" return ChunkConfig() @pytest.fixture def indexer(mock_store: MagicMock) -> ModuleIndexer: """ModuleIndexer backed by a mocked store.""" return ModuleIndexer(mock_store) @pytest.fixture def small_config() -> ChunkConfig: """Small chunk config for testing splitting behaviour.""" return ChunkConfig(chunk_size=200, chunk_overlap=50, min_chunk_size=50) @pytest.fixture def small_indexer(mock_store: MagicMock, small_config: ChunkConfig) -> ModuleIndexer: """ModuleIndexer with small chunks.""" return ModuleIndexer(mock_store, chunk_config=small_config) # --------------------------------------------------------------------------- # ChunkConfig tests # --------------------------------------------------------------------------- class TestChunkConfig: """Tests for ChunkConfig dataclass defaults and customization.""" def test_default_values(self) -> None: cfg = ChunkConfig() assert cfg.chunk_size == 500 assert cfg.chunk_overlap == 100 assert cfg.min_chunk_size == 100 assert cfg.respect_paragraphs is True assert cfg.respect_sections is True def test_custom_values(self) -> None: cfg = ChunkConfig( chunk_size=1000, chunk_overlap=200, min_chunk_size=50, respect_paragraphs=False, respect_sections=False, ) assert cfg.chunk_size == 1000 assert cfg.chunk_overlap == 200 assert cfg.min_chunk_size == 50 assert cfg.respect_paragraphs is False assert cfg.respect_sections is False # --------------------------------------------------------------------------- # IndexingResult tests # --------------------------------------------------------------------------- class TestIndexingResult: """Tests for IndexingResult dataclass.""" def test_default_fields(self) -> None: result = IndexingResult( module_id="test", chunks_created=10, chapters_indexed=3, npcs_indexed=2, locations_indexed=5, indexing_time_seconds=1.5, ) assert result.module_id == "test" assert result.chunks_created == 10 assert result.chapters_indexed == 3 assert result.npcs_indexed == 2 assert result.locations_indexed == 5 assert result.indexing_time_seconds == 1.5 assert result.skipped is False assert result.errors == [] def test_errors_mutable_default(self) -> None: """Ensure errors default is a fresh list per instance.""" r1 = IndexingResult("a", 0, 0, 0, 0, 0.0) r2 = IndexingResult("b", 0, 0, 0, 0, 0.0) r1.errors.append("something") assert r2.errors == [] # --------------------------------------------------------------------------- # Special content detection # --------------------------------------------------------------------------- class TestSpecialContentDetection: """Tests for boxed text and stat block detection.""" def test_is_stat_block_positive(self) -> None: assert _is_stat_block(STAT_BLOCK_TEXT) is True def test_is_stat_block_negative(self) -> None: assert _is_stat_block("The goblin runs away screaming.") is False def test_is_stat_block_partial(self) -> None: """Even a partial stat block header should be detected.""" text = "Armor Class 15\nHit Points 30\nSpeed 30 ft." assert _is_stat_block(text) is True def test_is_boxed_text_positive(self) -> None: assert _is_boxed_text(BOXED_TEXT) is True def test_is_boxed_text_with_read_aloud(self) -> None: text = "Read aloud: The room is dark and cold." assert _is_boxed_text(text) is True def test_is_boxed_text_negative(self) -> None: assert _is_boxed_text("The party enters the dungeon.") is False def test_is_boxed_text_quoted(self) -> None: text = '"You hear a distant rumbling as the walls begin to shake."' assert _is_boxed_text(text) is True # --------------------------------------------------------------------------- # chunk_text tests # --------------------------------------------------------------------------- class TestChunkText: """Tests for the chunk_text method.""" def _basic_context(self) -> dict[str, Any]: return { "module_id": "test-mod", "chapter": "Chapter 1", "section": "Section A", "content_type": "narrative", "page_start": 1, "page_end": 3, "npc_names": ["Strahd"], "location_names": ["Castle Ravenloft"], } def test_empty_text_returns_empty(self, indexer: ModuleIndexer) -> None: result = indexer.chunk_text("", self._basic_context()) assert result == [] def test_whitespace_only_returns_empty(self, indexer: ModuleIndexer) -> None: result = indexer.chunk_text(" \n\n ", self._basic_context()) assert result == [] def test_short_text_single_chunk(self, indexer: ModuleIndexer) -> None: text = "A short paragraph about a goblin." result = indexer.chunk_text(text, self._basic_context()) assert len(result) == 1 assert result[0][0] == text def test_metadata_attached_to_chunks(self, indexer: ModuleIndexer) -> None: text = "The dark tower looms in the distance." result = indexer.chunk_text(text, self._basic_context()) assert len(result) == 1 _, meta = result[0] assert meta["module_id"] == "test-mod" assert meta["chapter"] == "Chapter 1" assert meta["section"] == "Section A" assert meta["content_type"] == "narrative" assert meta["page_start"] == 1 assert meta["page_end"] == 3 assert meta["chunk_index"] == 0 assert meta["total_chunks_in_section"] == 1 assert meta["is_boxed_text"] is False assert meta["is_stat_block"] is False def test_multiple_chunks_created(self, small_indexer: ModuleIndexer) -> None: """Long text should be split into multiple chunks.""" text = _sample_text(800) result = small_indexer.chunk_text(text, self._basic_context()) assert len(result) > 1 def test_chunk_index_sequential(self, small_indexer: ModuleIndexer) -> None: text = _sample_text(800) result = small_indexer.chunk_text(text, self._basic_context()) for i, (_, meta) in enumerate(result): assert meta["chunk_index"] == i def test_total_chunks_in_section_consistent(self, small_indexer: ModuleIndexer) -> None: text = _sample_text(800) result = small_indexer.chunk_text(text, self._basic_context()) total = len(result) for _, meta in result: assert meta["total_chunks_in_section"] == total def test_overlap_present_in_later_chunks(self) -> None: """Second chunk should start with overlap from the first.""" config = ChunkConfig(chunk_size=100, chunk_overlap=30, min_chunk_size=20) store = _make_mock_store() indexer = ModuleIndexer(store, chunk_config=config) # Create two paragraphs that together exceed chunk_size para1 = "A" * 80 para2 = "B" * 80 text = f"{para1}\n\n{para2}" ctx = { "module_id": "m", "chapter": "c", "section": "s", "content_type": "narrative", "page_start": 1, "page_end": 1, "npc_names": [], "location_names": [], } result = indexer.chunk_text(text, ctx) assert len(result) >= 2 # The second chunk should contain some characters from the first # because of overlap second_text = result[1][0] assert len(second_text) > len(para2) def test_npc_cross_reference(self, indexer: ModuleIndexer) -> None: """NPCs mentioned in text should appear in metadata.""" text = "Count Strahd surveyed the courtyard of Castle Ravenloft." ctx = self._basic_context() ctx["npc_names"] = ["Strahd"] ctx["location_names"] = ["Castle Ravenloft"] result = indexer.chunk_text(text, ctx) _, meta = result[0] assert "Strahd" in meta["npcs_referenced"] assert "Castle Ravenloft" in meta["locations_referenced"] def test_no_npc_cross_reference_when_absent(self, indexer: ModuleIndexer) -> None: text = "The empty room holds nothing of interest." ctx = self._basic_context() ctx["npc_names"] = ["Strahd"] ctx["location_names"] = ["Castle Ravenloft"] result = indexer.chunk_text(text, ctx) _, meta = result[0] assert meta["npcs_referenced"] == "" assert meta["locations_referenced"] == "" def test_stat_block_detection_in_chunk(self, indexer: ModuleIndexer) -> None: result = indexer.chunk_text(STAT_BLOCK_TEXT, self._basic_context()) assert len(result) >= 1 _, meta = result[0] assert meta["is_stat_block"] is True # Stat blocks override content_type to "npc" assert meta["content_type"] == "npc" def test_boxed_text_detection_in_chunk(self, indexer: ModuleIndexer) -> None: result = indexer.chunk_text(BOXED_TEXT, self._basic_context()) assert len(result) >= 1 _, meta = result[0] assert meta["is_boxed_text"] is True def test_respect_paragraphs_false(self) -> None: """With respect_paragraphs=False, split on newlines instead.""" config = ChunkConfig( chunk_size=500, chunk_overlap=0, min_chunk_size=10, respect_paragraphs=False, ) store = _make_mock_store() indexer = ModuleIndexer(store, chunk_config=config) text = "Line one.\nLine two.\nLine three." ctx = { "module_id": "m", "chapter": "c", "section": "s", "content_type": "narrative", "page_start": 1, "page_end": 1, "npc_names": [], "location_names": [], } result = indexer.chunk_text(text, ctx) # All lines should end up in a single chunk (short text) assert len(result) == 1 def test_min_chunk_merge(self) -> None: """A trailing short chunk should be merged into the previous one.""" config = ChunkConfig(chunk_size=100, chunk_overlap=0, min_chunk_size=50) store = _make_mock_store() indexer = ModuleIndexer(store, chunk_config=config) para1 = "A" * 90 para2 = "B" * 20 # Below min_chunk_size text = f"{para1}\n\n{para2}" ctx = { "module_id": "m", "chapter": "c", "section": "s", "content_type": "narrative", "page_start": 1, "page_end": 1, "npc_names": [], "location_names": [], } result = indexer.chunk_text(text, ctx) # The short trailing chunk should be merged assert len(result) == 1 assert "B" * 20 in result[0][0] def test_force_split_long_paragraph(self) -> None: """A single paragraph exceeding chunk_size is force-split.""" config = ChunkConfig(chunk_size=100, chunk_overlap=0, min_chunk_size=10) store = _make_mock_store() indexer = ModuleIndexer(store, chunk_config=config) long_text = "word " * 50 # ~250 chars ctx = { "module_id": "m", "chapter": "c", "section": "s", "content_type": "narrative", "page_start": 1, "page_end": 1, "npc_names": [], "location_names": [], } result = indexer.chunk_text(long_text.strip(), ctx) assert len(result) >= 2 # No chunk should exceed chunk_size significantly # (overlap may add a bit) for chunk_text, _ in result: assert len(chunk_text) <= 200 # generous bound accounting for overlap # --------------------------------------------------------------------------- # is_indexed / get_index_metadata / delete_index # --------------------------------------------------------------------------- class TestIndexManagement: """Tests for index lifecycle methods.""" def test_is_indexed_false_initially(self, indexer: ModuleIndexer, mock_store: MagicMock) -> None: mock_store.list_collections.return_value = [] assert indexer.is_indexed("some-module") is False def test_is_indexed_true(self, indexer: ModuleIndexer, mock_store: MagicMock) -> None: mock_store.list_collections.return_value = ["some-module"] assert indexer.is_indexed("some-module") is True def test_get_index_metadata_returns_none_when_not_indexed( self, indexer: ModuleIndexer, mock_store: MagicMock, ) -> None: mock_store.list_collections.return_value = [] assert indexer.get_index_metadata("absent") is None def test_get_index_metadata_returns_dict( self, indexer: ModuleIndexer, mock_store: MagicMock, ) -> None: mock_store.list_collections.return_value = ["my-mod"] mock_store._collection_name.return_value = "mod_my-mod" col = MagicMock() col.metadata = {"source_hash": "abc123", "indexed_at": "2026-02-05T00:00:00Z"} mock_store._client = MagicMock() mock_store._client.get_collection.return_value = col mock_store._get_embedding_function.return_value = MagicMock() meta = indexer.get_index_metadata("my-mod") assert meta is not None assert meta["source_hash"] == "abc123" def test_delete_index_calls_store(self, indexer: ModuleIndexer, mock_store: MagicMock) -> None: indexer.delete_index("mod-to-delete") mock_store.delete_collection.assert_called_once_with("mod-to-delete") def test_delete_index_propagates_error( self, indexer: ModuleIndexer, mock_store: MagicMock, ) -> None: mock_store.delete_collection.side_effect = CollectionNotFoundError("not found") with pytest.raises(CollectionNotFoundError): indexer.delete_index("ghost") # --------------------------------------------------------------------------- # index_module tests # --------------------------------------------------------------------------- class TestIndexModule: """Tests for the full index_module workflow.""" @pytest.fixture def sample_structure(self) -> ModuleStructure: return _make_module_structure( module_id="curse-of-strahd", title="Curse of Strahd", chapters=[ ModuleElement( name="Chapter 1: Into the Mists", content_type=ContentType.CHAPTER, page_start=1, page_end=10, children=["Death House"], ), ModuleElement( name="Death House", content_type=ContentType.SECTION, page_start=3, page_end=8, parent="Chapter 1: Into the Mists", ), ], npcs=[ NPCReference(name="Strahd", chapter="Chapter 1", page=5), ], locations=[ LocationReference(name="Castle Ravenloft", chapter="Chapter 1", page=3), ], ) @pytest.fixture def tmp_pdf(self, tmp_path: Path) -> Path: """Create a dummy PDF-like file for hashing.""" pdf = tmp_path / "curse-of-strahd.pdf" pdf.write_bytes(b"fake pdf content for hashing") return pdf def test_index_module_basic( self, mock_store: MagicMock, sample_structure: ModuleStructure, tmp_pdf: Path, ) -> None: """Basic indexing creates collection and adds documents.""" indexer = ModuleIndexer(mock_store) with patch( "dm20_protocol.claudmaster.module_indexer.extract_text_from_pdf", return_value="The mists close in around the party. Strahd watches from Castle Ravenloft.", ): result = indexer.index_module(sample_structure, str(tmp_pdf)) assert result.module_id == "curse-of-strahd" assert result.chunks_created > 0 assert result.chapters_indexed >= 1 assert result.skipped is False assert result.errors == [] # Verify store interactions mock_store.create_collection.assert_called_once() mock_store.add_documents.assert_called_once() def test_index_module_skips_unchanged( self, mock_store: MagicMock, sample_structure: ModuleStructure, tmp_pdf: Path, ) -> None: """Already indexed and unchanged module should be skipped.""" # Simulate module is already indexed with matching hash file_hash = hashlib.sha256(b"fake pdf content for hashing").hexdigest() mock_store.list_collections.return_value = ["curse-of-strahd"] mock_store._collection_name.return_value = "mod_curse-of-strahd" col = MagicMock() col.metadata = {"source_hash": file_hash} mock_store._client = MagicMock() mock_store._client.get_collection.return_value = col mock_store._get_embedding_function.return_value = MagicMock() indexer = ModuleIndexer(mock_store) result = indexer.index_module(sample_structure, str(tmp_pdf)) assert result.skipped is True assert result.chunks_created == 0 mock_store.add_documents.assert_not_called() def test_index_module_force_reindex( self, mock_store: MagicMock, sample_structure: ModuleStructure, tmp_pdf: Path, ) -> None: """force_reindex=True should re-index even when unchanged.""" mock_store.list_collections.return_value = ["curse-of-strahd"] indexer = ModuleIndexer(mock_store) with patch( "dm20_protocol.claudmaster.module_indexer.extract_text_from_pdf", return_value="Some adventure text about Strahd.", ): result = indexer.index_module( sample_structure, str(tmp_pdf), force_reindex=True, ) assert result.skipped is False assert result.chunks_created > 0 # Old collection should have been deleted mock_store.delete_collection.assert_called_once_with("curse-of-strahd") mock_store.create_collection.assert_called_once() def test_index_module_handles_extraction_error( self, mock_store: MagicMock, sample_structure: ModuleStructure, tmp_pdf: Path, ) -> None: """Errors during text extraction should be recorded but not fatal.""" indexer = ModuleIndexer(mock_store) with patch( "dm20_protocol.claudmaster.module_indexer.extract_text_from_pdf", side_effect=RuntimeError("PDF read failure"), ): result = indexer.index_module(sample_structure, str(tmp_pdf)) # Should still return a result with errors assert len(result.errors) > 0 assert "PDF read failure" in result.errors[0] def test_index_module_empty_text_skipped( self, mock_store: MagicMock, sample_structure: ModuleStructure, tmp_pdf: Path, ) -> None: """Elements with no extractable text should be skipped gracefully.""" indexer = ModuleIndexer(mock_store) with patch( "dm20_protocol.claudmaster.module_indexer.extract_text_from_pdf", return_value="", ): result = indexer.index_module(sample_structure, str(tmp_pdf)) assert result.chunks_created == 0 assert result.errors == [] def test_index_module_npc_and_location_counts( self, mock_store: MagicMock, sample_structure: ModuleStructure, tmp_pdf: Path, ) -> None: """NPC and location cross-reference counts should be populated.""" indexer = ModuleIndexer(mock_store) with patch( "dm20_protocol.claudmaster.module_indexer.extract_text_from_pdf", return_value="Strahd lurks in Castle Ravenloft, watching.", ): result = indexer.index_module(sample_structure, str(tmp_pdf)) assert result.npcs_indexed >= 1 assert result.locations_indexed >= 1 def test_index_module_with_no_chapters( self, mock_store: MagicMock, tmp_pdf: Path, ) -> None: """Module with no chapters should produce zero chunks.""" empty_structure = _make_module_structure(chapters=[]) indexer = ModuleIndexer(mock_store) result = indexer.index_module(empty_structure, str(tmp_pdf)) assert result.chunks_created == 0 assert result.chapters_indexed == 0 # --------------------------------------------------------------------------- # Hash computation # --------------------------------------------------------------------------- class TestFileHash: """Tests for the internal file hash helper.""" def test_hash_deterministic(self, tmp_path: Path) -> None: f = tmp_path / "test.pdf" f.write_bytes(b"hello world") h1 = ModuleIndexer._compute_file_hash(f) h2 = ModuleIndexer._compute_file_hash(f) assert h1 == h2 def test_hash_changes_with_content(self, tmp_path: Path) -> None: f = tmp_path / "test.pdf" f.write_bytes(b"version 1") h1 = ModuleIndexer._compute_file_hash(f) f.write_bytes(b"version 2") h2 = ModuleIndexer._compute_file_hash(f) assert h1 != h2 def test_hash_matches_stdlib(self, tmp_path: Path) -> None: f = tmp_path / "test.pdf" content = b"some content" f.write_bytes(content) expected = hashlib.sha256(content).hexdigest() assert ModuleIndexer._compute_file_hash(f) == expected # --------------------------------------------------------------------------- # Force-split helper # --------------------------------------------------------------------------- class TestForceSplit: """Tests for the _force_split static method.""" def test_short_text_not_split(self) -> None: result = ModuleIndexer._force_split("hello", 100) assert result == ["hello"] def test_long_text_split(self) -> None: text = "word " * 100 # ~500 chars result = ModuleIndexer._force_split(text.strip(), 100) assert len(result) >= 4 # Verify all chunks are within bounds for chunk in result: assert len(chunk) <= 100 def test_no_spaces(self) -> None: """Text without spaces is split at exact boundary.""" text = "A" * 250 result = ModuleIndexer._force_split(text, 100) assert len(result) == 3 assert result[0] == "A" * 100 assert result[1] == "A" * 100 assert result[2] == "A" * 50 # --------------------------------------------------------------------------- # extract_text_from_pdf (mocked fitz) # --------------------------------------------------------------------------- class TestExtractTextFromPdf: """Tests for extract_text_from_pdf with mocked PyMuPDF.""" @patch("fitz.open") def test_basic_extraction(self, mock_fitz_open: MagicMock) -> None: """Should extract text from the correct page range.""" mock_page1 = MagicMock() mock_page1.get_text.return_value = "Page 1 text" mock_page2 = MagicMock() mock_page2.get_text.return_value = "Page 2 text" pages = [mock_page1, mock_page2, MagicMock(), MagicMock(), MagicMock()] mock_doc = MagicMock() mock_doc.page_count = 5 mock_doc.__getitem__ = MagicMock(side_effect=lambda idx: pages[idx]) mock_fitz_open.return_value = mock_doc result = extract_text_from_pdf("/fake/path.pdf", 1, 2) assert "Page 1 text" in result assert "Page 2 text" in result @patch("fitz.open") def test_single_page(self, mock_fitz_open: MagicMock) -> None: mock_page = MagicMock() mock_page.get_text.return_value = "Solo page" mock_doc = MagicMock() mock_doc.page_count = 10 mock_doc.__getitem__ = MagicMock(return_value=mock_page) mock_fitz_open.return_value = mock_doc result = extract_text_from_pdf("/fake/path.pdf", 5, None) assert "Solo page" in result @patch("fitz.open") def test_open_failure(self, mock_fitz_open: MagicMock) -> None: mock_fitz_open.side_effect = RuntimeError("Cannot open") result = extract_text_from_pdf("/bad/path.pdf", 1, 5) assert result == "" # --------------------------------------------------------------------------- # Integration-style test (full pipeline, mocked I/O) # --------------------------------------------------------------------------- class TestFullPipeline: """End-to-end test of the indexing pipeline with mocked dependencies.""" def test_round_trip(self, tmp_path: Path) -> None: """Index a module, verify chunks are created, check metadata.""" store = _make_mock_store() structure = _make_module_structure( module_id="lost-mine", title="Lost Mine of Phandelver", chapters=[ ModuleElement( name="Part 1: Goblin Arrows", content_type=ContentType.CHAPTER, page_start=1, page_end=15, ), ModuleElement( name="Cragmaw Hideout", content_type=ContentType.SECTION, page_start=5, page_end=10, parent="Part 1: Goblin Arrows", ), ], npcs=[NPCReference(name="Sildar Hallwinter", chapter="Part 1", page=3)], locations=[LocationReference(name="Cragmaw Cave", chapter="Part 1", page=5)], ) pdf_file = tmp_path / "lost-mine.pdf" pdf_file.write_bytes(b"fake PDF data") indexer = ModuleIndexer(store, chunk_config=ChunkConfig(chunk_size=300, chunk_overlap=50)) long_text = ( "The adventurers travel along the Triboar Trail toward Phandalin. " "Sildar Hallwinter has been captured by goblins and is held in " "the Cragmaw Cave. The party must rescue him before time runs out.\n\n" "Deep inside the cave, goblin sentries stand guard at every turn. " "The sounds of dripping water echo through the tunnels." ) with patch( "dm20_protocol.claudmaster.module_indexer.extract_text_from_pdf", return_value=long_text, ): result = indexer.index_module(structure, str(pdf_file)) assert result.module_id == "lost-mine" assert result.chunks_created > 0 assert result.chapters_indexed >= 1 assert result.skipped is False # Verify add_documents was called with correct structure call_args = store.add_documents.call_args docs = call_args[1]["documents"] if "documents" in call_args[1] else call_args[0][1] metas = call_args[1]["metadatas"] if "metadatas" in call_args[1] else call_args[0][2] ids = call_args[1]["ids"] if "ids" in call_args[1] else call_args[0][3] assert len(docs) == len(metas) == len(ids) assert all(isinstance(d, str) for d in docs) assert all(isinstance(m, dict) for m in metas) assert all(isinstance(i, str) for i in ids) # Verify metadata structure for meta in metas: assert "module_id" in meta assert "chapter" in meta assert "section" in meta assert "content_type" in meta assert "page_start" in meta assert "page_end" in meta assert "chunk_index" in meta assert "total_chunks_in_section" in meta assert "npcs_referenced" in meta assert "locations_referenced" in meta assert "is_boxed_text" in meta assert "is_stat_block" in meta # --------------------------------------------------------------------------- # Exports # --------------------------------------------------------------------------- class TestExports: """Verify public API exports from the package.""" def test_module_indexer_importable_from_package(self) -> None: from dm20_protocol.claudmaster import ( ChunkConfig, IndexingResult, ModuleIndexer, ) assert ChunkConfig is not None assert IndexingResult is not None assert ModuleIndexer is not None def test_module_indexer_in_all(self) -> None: import dm20_protocol.claudmaster as pkg assert "ChunkConfig" in pkg.__all__ assert "IndexingResult" in pkg.__all__ assert "ModuleIndexer" in pkg.__all__

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_module_indexer.py•31.4 KiB