Skip to main content
Glama
test_navigator.py18.8 kB
"""Tests for document navigator and compass classes.""" import sys from pathlib import Path import pytest # Add the project root to Python path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from docnav.navigator import DocumentCompass, DocumentNavigator class TestDocumentCompass: """Test cases for DocumentCompass class.""" def test_init_with_markdown(self): """Test initialization with markdown content.""" content = "# Test\n\nHello world" compass = DocumentCompass(content, "markdown") assert compass.source_text == content assert compass.source_format == "markdown" assert compass.root is not None assert compass.root.type == "document" assert compass.root.id == "root" def test_init_with_xml(self): """Test initialization with XML content.""" content = "<root><test>Hello</test></root>" compass = DocumentCompass(content, "xml") assert compass.source_text == content assert compass.source_format == "xml" assert compass.root is not None def test_init_with_unsupported_format(self): """Test initialization with unsupported format raises error.""" with pytest.raises(ValueError, match="Unsupported format"): DocumentCompass("test", "unsupported") def test_parse_markdown_headings(self): """Test parsing of markdown headings.""" content = """# Main Title ## Section 1 Content here. ### Subsection 1.1 More content. ## Section 2 Final content. """ compass = DocumentCompass(content, "markdown") # Test that index contains heading nodes heading_nodes = [ node for node in compass.index.values() if node.type == "heading" ] assert len(heading_nodes) >= 3 # Test heading hierarchy h1_nodes = [node for node in heading_nodes if node.level == 1] h2_nodes = [node for node in heading_nodes if node.level == 2] h3_nodes = [node for node in heading_nodes if node.level == 3] assert len(h1_nodes) >= 1 assert len(h2_nodes) >= 2 assert len(h3_nodes) >= 1 def test_parse_markdown_lists(self): """Test parsing of markdown lists.""" content = """# Test - Item 1 - Item 2 - Nested item - Item 3 1. Numbered item 1 2. Numbered item 2 """ compass = DocumentCompass(content, "markdown") # Should have list and list_item nodes list_nodes = [node for node in compass.index.values() if node.type == "list"] list_item_nodes = [ node for node in compass.index.values() if node.type == "list_item" ] assert len(list_nodes) >= 2 # bullet and ordered lists assert len(list_item_nodes) >= 5 # total items def test_parse_markdown_code_blocks(self): """Test parsing of markdown code blocks.""" content = """# Test ```python def hello(): print("Hello World") ``` ```javascript console.log("Hello"); ``` """ compass = DocumentCompass(content, "markdown") code_nodes = [ node for node in compass.index.values() if node.type == "code_block" ] assert len(code_nodes) >= 2 def test_get_outline(self): """Test outline generation.""" content = """# Main Title ## Section 1 ### Subsection 1.1 ## Section 2 ### Subsection 2.1 """ compass = DocumentCompass(content, "markdown") outline = compass.get_outline(max_depth=3) assert "Main Title" in outline assert "Section 1" in outline assert "Section 2" in outline assert "Subsection 1.1" in outline assert "Subsection 2.1" in outline def test_get_section_content(self): """Test section content extraction.""" content = """# Main Title ## Section 1 This is section 1 content. ### Subsection 1.1 This is subsection content. ## Section 2 This is section 2 content. """ compass = DocumentCompass(content, "markdown") # Find a section node section_nodes = [ node for node in compass.index.values() if node.type == "heading" and "Section 1" in node.title ] if section_nodes: section_content = compass.get_section(section_nodes[0].id) assert "Section 1" in section_content def test_search_functionality(self): """Test search functionality.""" content = """# Main Title ## Section 1 This contains the word elephant. ## Section 2 This contains the word tiger. """ compass = DocumentCompass(content, "markdown") # Search for specific term results = compass.search("elephant") assert len(results) >= 1 assert any("elephant" in result.content.lower() for result in results) # Search for non-existent term results = compass.search("nonexistent") assert len(results) == 0 def test_navigation_context(self): """Test navigation context generation.""" content = """# Main Title ## Section 1 ### Subsection 1.1 ## Section 2 ### Subsection 2.1 """ compass = DocumentCompass(content, "markdown") # Find a section node to test navigation section_nodes = [ node for node in compass.index.values() if node.type == "heading" and node.level == 2 ] if section_nodes: context = compass.get_navigation_context(section_nodes[0].id) assert context.current is not None assert context.current["type"] == "heading" def test_token_counting(self): """Test token counting functionality.""" content = "# Test\n\nThis is a test document with some content." compass = DocumentCompass(content, "markdown") total_tokens = compass.get_total_tokens() assert total_tokens > 0 assert isinstance(total_tokens, int) content_tokens = compass.get_content_tokens() assert content_tokens > 0 assert isinstance(content_tokens, int) def test_build_index(self): """Test index building.""" content = """# Main Title ## Section 1 Content here. """ compass = DocumentCompass(content, "markdown") # Index should contain all nodes with IDs assert len(compass.index) > 0 assert "root" in compass.index # All indexed nodes should have IDs for node_id, node in compass.index.items(): assert node.id == node_id class TestDocumentNavigator: """Test cases for DocumentNavigator class.""" def setup_method(self): """Set up test fixtures.""" self.navigator = DocumentNavigator() self.test_md_file = Path(__file__).parent / "test_report_markdown.md" self.test_pdf_file = Path(__file__).parent / "test_report_pdf.pdf" def test_init(self): """Test navigator initialization.""" assert self.navigator.loaded_documents == {} assert self.navigator.document_metadata == {} def test_generate_doc_id(self): """Test document ID generation.""" doc_id = self.navigator._generate_doc_id() assert isinstance(doc_id, str) assert len(doc_id) > 0 # Should be valid UUID import uuid uuid.UUID(doc_id) # This will raise ValueError if invalid def test_normalize_file_path(self): """Test file path normalization.""" test_path = Path(__file__).parent / "test_file.txt" normalized = self.navigator._normalize_file_path(test_path) assert isinstance(normalized, str) assert test_path.name in normalized @pytest.mark.anyio async def test_load_document_from_text(self): """Test loading document from text content.""" content = "# Test Document\n\nThis is a test." doc_id, compass = await self.navigator.load_document_from_text(content) assert isinstance(doc_id, str) assert isinstance(compass, DocumentCompass) assert doc_id in self.navigator.loaded_documents assert doc_id in self.navigator.document_metadata # Check metadata metadata = self.navigator.get_document_metadata(doc_id) assert metadata is not None assert metadata["format"] == "markdown" assert metadata["source_type"] == "text" def test_load_document_from_text_sync(self): """Test synchronous loading of document from text.""" content = "# Test Document\n\nThis is a test." doc_id, compass = self.navigator.load_document_from_text_sync(content) assert isinstance(doc_id, str) assert isinstance(compass, DocumentCompass) assert doc_id in self.navigator.loaded_documents @pytest.mark.anyio async def test_load_document_from_file(self): """Test loading document from file.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") doc_id, compass = await self.navigator.load_document_from_file( self.test_md_file ) assert isinstance(doc_id, str) assert isinstance(compass, DocumentCompass) assert doc_id in self.navigator.loaded_documents # Check metadata metadata = self.navigator.get_document_metadata(doc_id) assert metadata is not None assert metadata["format"] == "markdown" assert metadata["source_type"] == "file" assert self.test_md_file.name in metadata["title"] def test_load_document_from_file_sync(self): """Test synchronous loading of document from file.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") doc_id, compass = self.navigator.load_document_from_file_sync(self.test_md_file) assert isinstance(doc_id, str) assert isinstance(compass, DocumentCompass) @pytest.mark.anyio async def test_load_nonexistent_file(self): """Test loading non-existent file raises error.""" with pytest.raises(FileNotFoundError): await self.navigator.load_document_from_file(Path("nonexistent.md")) def test_get_document(self): """Test getting loaded document.""" content = "# Test Document\n\nThis is a test." doc_id, compass = self.navigator.load_document_from_text_sync(content) # Get existing document retrieved = self.navigator.get_document(doc_id) assert retrieved is compass # Get non-existent document non_existent = self.navigator.get_document("invalid-id") assert non_existent is None def test_get_document_metadata(self): """Test getting document metadata.""" content = "# Test Document\n\nThis is a test." doc_id, _ = self.navigator.load_document_from_text_sync( content, title="My Test" ) metadata = self.navigator.get_document_metadata(doc_id) assert metadata is not None assert metadata["title"] == "My Test" assert metadata["format"] == "markdown" # Test invalid ID invalid_metadata = self.navigator.get_document_metadata("invalid-id") assert invalid_metadata is None def test_list_documents(self): """Test listing all documents.""" # Initially empty docs = self.navigator.list_documents() assert len(docs) == 0 # Add some documents content1 = "# Doc 1\n\nContent 1" content2 = "# Doc 2\n\nContent 2" doc_id1, _ = self.navigator.load_document_from_text_sync( content1, title="Doc 1" ) doc_id2, _ = self.navigator.load_document_from_text_sync( content2, title="Doc 2" ) docs = self.navigator.list_documents() assert len(docs) == 2 # Check document info doc_ids = [doc["id"] for doc in docs] assert doc_id1 in doc_ids assert doc_id2 in doc_ids def test_remove_document(self): """Test removing documents.""" content = "# Test Document\n\nThis is a test." doc_id, _ = self.navigator.load_document_from_text_sync(content) # Document should exist assert doc_id in self.navigator.loaded_documents assert doc_id in self.navigator.document_metadata # Remove document result = self.navigator.remove_document(doc_id) assert result is True # Document should be gone assert doc_id not in self.navigator.loaded_documents assert doc_id not in self.navigator.document_metadata # Try to remove non-existent document result = self.navigator.remove_document("invalid-id") assert result is False def test_get_outline(self): """Test getting document outline.""" content = """# Main Title ## Section 1 ### Subsection 1.1 ## Section 2 """ doc_id, _ = self.navigator.load_document_from_text_sync(content) outline = self.navigator.get_outline(doc_id) assert "Main Title" in outline assert "Section 1" in outline assert "Section 2" in outline # Test invalid document ID invalid_outline = self.navigator.get_outline("invalid-id") assert "not found" in invalid_outline def test_read_section(self): """Test reading section content.""" content = """# Main Title ## Section 1 This is section 1 content. ## Section 2 This is section 2 content. """ doc_id, compass = self.navigator.load_document_from_text_sync(content) # Find a section to read section_nodes = [ node for node in compass.index.values() if node.type == "heading" and "Section 1" in node.title ] if section_nodes: section_content = self.navigator.read_section(doc_id, section_nodes[0].id) assert "Section 1" in section_content # Test invalid document ID invalid_section = self.navigator.read_section("invalid-id", "section-1") assert "not found" in invalid_section def test_search_document(self): """Test searching document content.""" content = """# Main Title ## Section 1 This section contains the word elephant. ## Section 2 This section contains the word tiger. """ doc_id, _ = self.navigator.load_document_from_text_sync(content) # Search for existing term results = self.navigator.search_document(doc_id, "elephant") assert "elephant" in results.lower() assert "results" in results.lower() # Search for non-existent term results = self.navigator.search_document(doc_id, "nonexistent") assert "No results found" in results # Test invalid document ID invalid_results = self.navigator.search_document("invalid-id", "test") assert "not found" in invalid_results def test_navigate(self): """Test navigation functionality.""" content = """# Main Title ## Section 1 ### Subsection 1.1 ## Section 2 ### Subsection 2.1 """ doc_id, compass = self.navigator.load_document_from_text_sync(content) # Find a section to navigate to section_nodes = [ node for node in compass.index.values() if node.type == "heading" and node.level == 2 ] if section_nodes: nav_result = self.navigator.navigate(doc_id, section_nodes[0].id) assert "Current:" in nav_result assert section_nodes[0].title in nav_result # Test invalid document ID invalid_nav = self.navigator.navigate("invalid-id", "section-1") assert "not found" in invalid_nav def test_get_document_tokens(self): """Test getting document token statistics.""" content = "# Test Document\n\nThis is a test document with some content." doc_id, _ = self.navigator.load_document_from_text_sync(content) tokens = self.navigator.get_document_tokens(doc_id) assert tokens is not None assert "total_tokens" in tokens assert tokens["total_tokens"] > 0 # Test invalid document ID invalid_tokens = self.navigator.get_document_tokens("invalid-id") assert invalid_tokens is None class TestNavigatorIntegration: """Integration tests for navigator with real files.""" def setup_method(self): """Set up test fixtures.""" self.navigator = DocumentNavigator() self.test_md_file = Path(__file__).parent / "test_report_markdown.md" @pytest.mark.anyio async def test_real_file_processing(self): """Test processing real markdown file.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") doc_id, compass = await self.navigator.load_document_from_file( self.test_md_file ) # Test basic functionality assert compass.root is not None assert len(compass.index) > 0 # Test outline generation outline = self.navigator.get_outline(doc_id) assert len(outline) > 0 # Test search functionality search_results = self.navigator.search_document(doc_id, "test") assert isinstance(search_results, str) # Test navigation heading_nodes = [ node for node in compass.index.values() if node.type == "heading" ] if heading_nodes: nav_result = self.navigator.navigate(doc_id, heading_nodes[0].id) assert "Current:" in nav_result def test_multiple_document_management(self): """Test managing multiple documents.""" content1 = "# Document 1\n\nContent for document 1." content2 = "# Document 2\n\nContent for document 2." doc_id1, _ = self.navigator.load_document_from_text_sync( content1, title="Doc 1" ) doc_id2, _ = self.navigator.load_document_from_text_sync( content2, title="Doc 2" ) # Both documents should be loaded assert len(self.navigator.loaded_documents) == 2 assert len(self.navigator.document_metadata) == 2 # Test independent operations outline1 = self.navigator.get_outline(doc_id1) outline2 = self.navigator.get_outline(doc_id2) assert "Document 1" in outline1 assert "Document 2" in outline2 assert "Document 1" not in outline2 assert "Document 2" not in outline1 # Test removal self.navigator.remove_document(doc_id1) assert len(self.navigator.loaded_documents) == 1 assert doc_id2 in self.navigator.loaded_documents

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shenyimings/DocNav-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server