DocNav-MCP

DocNav-MCP
tests

test_processors.py•12.6 KiB

"""Tests for document processors.""" import sys from pathlib import Path import pytest # Add the project root to Python path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from docnav.processors import MarkdownProcessor, PDFProcessor class TestMarkdownProcessor: """Test cases for MarkdownProcessor.""" def setup_method(self): """Set up test fixtures.""" self.processor = MarkdownProcessor() self.test_md_file = Path(__file__).parent / "test_report_markdown.md" def test_can_process_markdown_files(self): """Test that processor can identify markdown files.""" assert self.processor.can_process(Path("test.md")) assert self.processor.can_process(Path("test.markdown")) assert self.processor.can_process(Path("test.mdown")) assert self.processor.can_process(Path("test.mkd")) assert not self.processor.can_process(Path("test.txt")) assert not self.processor.can_process(Path("test.pdf")) def test_get_supported_extensions(self): """Test that processor returns correct supported extensions.""" extensions = self.processor.get_supported_extensions() assert ".md" in extensions assert ".markdown" in extensions assert ".mdown" in extensions assert ".mkd" in extensions def test_get_processor_info(self): """Test that processor returns correct metadata.""" info = self.processor.get_processor_info() assert info["name"] == "MarkdownProcessor" assert "parsing" in info["features"] assert "search" in info["features"] assert "navigation" in info["features"] @pytest.mark.anyio async def test_process_markdown_file(self): """Test processing of markdown file.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") document = await self.processor.process(self.test_md_file) # Check document structure assert document.file_path == self.test_md_file assert document.title == "test_report_markdown" assert document.source_format == "markdown" assert document.root is not None assert document.root.type == "document" assert document.root.id == "root" @pytest.mark.anyio async def test_process_nonexistent_file(self): """Test processing of non-existent file raises error.""" with pytest.raises(FileNotFoundError): await self.processor.process(Path("nonexistent.md")) @pytest.mark.anyio async def test_extract_node(self): """Test extraction of specific nodes.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") document = await self.processor.process(self.test_md_file) # Test extracting root node root_node = await self.processor.extract_node(document, "root") assert root_node is not None assert root_node.type == "document" # Test extracting non-existent node non_existent = await self.processor.extract_node(document, "nonexistent") assert non_existent is None @pytest.mark.anyio async def test_search_functionality(self): """Test search functionality.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") document = await self.processor.process(self.test_md_file) # Search for common terms that likely exist in any markdown document results = await self.processor.search(document, "test") assert isinstance(results, list) # Each result should have required fields for result in results: assert hasattr(result, "node_id") assert hasattr(result, "section") assert hasattr(result, "content") assert hasattr(result, "type") @pytest.mark.anyio async def test_heading_hierarchy(self): """Test heading hierarchy extraction.""" if not self.test_md_file.exists(): pytest.skip("Test markdown file not found") document = await self.processor.process(self.test_md_file) headings = self.processor.get_heading_hierarchy(document) assert isinstance(headings, list) for heading in headings: assert "id" in heading assert "title" in heading assert "level" in heading assert heading["level"] >= 1 @pytest.mark.anyio async def test_parse_markdown_structure(self): """Test that markdown structure is parsed correctly.""" # Create a simple test markdown content test_content = """# Main Title Some paragraph content. ## Section 1 - List item 1 - List item 2 ### Subsection 1.1 ```python print("Hello World") ``` ## Section 2 Another paragraph. """ # Create a temporary file import tempfile with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: f.write(test_content) temp_path = Path(f.name) try: document = await self.processor.process(temp_path) # Check that document has content assert document.root is not None assert len(document.root.children) > 0 # Check that headings are parsed headings = self.processor.get_heading_hierarchy(document) assert len(headings) >= 3 # Should have at least 3 headings # Check heading levels heading_levels = [h["level"] for h in headings] assert 1 in heading_levels # Main title assert 2 in heading_levels # Section headers assert 3 in heading_levels # Subsection finally: temp_path.unlink() # Clean up class TestPDFProcessor: """Test cases for PDFProcessor.""" def setup_method(self): """Set up test fixtures.""" self.processor = PDFProcessor() self.test_pdf_file = Path(__file__).parent / "test_report_pdf.pdf" def test_can_process_pdf_files(self): """Test that processor can identify PDF files.""" assert self.processor.can_process(Path("test.pdf")) assert self.processor.can_process(Path("test.PDF")) assert not self.processor.can_process(Path("test.txt")) assert not self.processor.can_process(Path("test.md")) def test_get_supported_extensions(self): """Test that processor returns correct supported extensions.""" extensions = self.processor.get_supported_extensions() assert ".pdf" in extensions def test_get_processor_info(self): """Test that processor returns correct metadata.""" info = self.processor.get_processor_info() assert info["name"] == "PDFProcessor" assert "parsing" in info["features"] assert "search" in info["features"] assert "navigation" in info["features"] @pytest.mark.anyio async def test_process_pdf_file(self): """Test processing of PDF file.""" if not self.test_pdf_file.exists(): pytest.skip("Test PDF file not found") document = await self.processor.process(self.test_pdf_file) # Check document structure assert document.file_path == self.test_pdf_file assert document.title == "test_report_pdf" assert document.source_format == "pdf" assert document.root is not None assert document.root.type == "document" assert document.root.id == "root" # Check that PDF content was converted to markdown assert isinstance(document.source_text, str) assert len(document.source_text) > 0 @pytest.mark.anyio async def test_process_nonexistent_pdf(self): """Test processing of non-existent PDF file raises error.""" with pytest.raises(FileNotFoundError): await self.processor.process(Path("nonexistent.pdf")) @pytest.mark.anyio async def test_extract_node_pdf(self): """Test extraction of specific nodes from PDF.""" if not self.test_pdf_file.exists(): pytest.skip("Test PDF file not found") document = await self.processor.process(self.test_pdf_file) # Test extracting root node root_node = await self.processor.extract_node(document, "root") assert root_node is not None assert root_node.type == "document" # Test extracting non-existent node non_existent = await self.processor.extract_node(document, "nonexistent") assert non_existent is None @pytest.mark.anyio async def test_search_pdf_content(self): """Test search functionality on PDF content.""" if not self.test_pdf_file.exists(): pytest.skip("Test PDF file not found") document = await self.processor.process(self.test_pdf_file) # Search for common terms that likely exist in any document results = await self.processor.search(document, "test") assert isinstance(results, list) # Each result should have required fields for result in results: assert hasattr(result, "node_id") assert hasattr(result, "section") assert hasattr(result, "content") assert hasattr(result, "type") @pytest.mark.anyio async def test_pdf_heading_hierarchy(self): """Test heading hierarchy extraction from PDF.""" if not self.test_pdf_file.exists(): pytest.skip("Test PDF file not found") document = await self.processor.process(self.test_pdf_file) headings = self.processor.get_heading_hierarchy(document) assert isinstance(headings, list) for heading in headings: assert "id" in heading assert "title" in heading assert "level" in heading assert heading["level"] >= 1 class TestProcessorComparison: """Test cases comparing processors.""" def setup_method(self): """Set up test fixtures.""" self.md_processor = MarkdownProcessor() self.pdf_processor = PDFProcessor() self.test_md_file = Path(__file__).parent / "test_report_markdown.md" self.test_pdf_file = Path(__file__).parent / "test_report_pdf.pdf" @pytest.mark.anyio async def test_both_processors_create_valid_documents(self): """Test that both processors create valid document structures.""" md_doc = None pdf_doc = None if self.test_md_file.exists(): md_doc = await self.md_processor.process(self.test_md_file) if self.test_pdf_file.exists(): pdf_doc = await self.pdf_processor.process(self.test_pdf_file) for doc in [md_doc, pdf_doc]: if doc is not None: assert doc.root is not None assert doc.root.type == "document" assert doc.root.id == "root" assert hasattr(doc, "file_path") assert hasattr(doc, "title") assert hasattr(doc, "source_format") @pytest.mark.anyio async def test_search_consistency(self): """Test that search works consistently across processors.""" md_doc = None pdf_doc = None if self.test_md_file.exists(): md_doc = await self.md_processor.process(self.test_md_file) if self.test_pdf_file.exists(): pdf_doc = await self.pdf_processor.process(self.test_pdf_file) search_term = "test" for doc, processor in [ (md_doc, self.md_processor), (pdf_doc, self.pdf_processor), ]: if doc is not None: results = await processor.search(doc, search_term) assert isinstance(results, list) # Each result should have the same structure for result in results: assert hasattr(result, "node_id") assert hasattr(result, "section") assert hasattr(result, "content") assert hasattr(result, "type") assert search_term.lower() in result.content.lower() def test_processor_metadata_consistency(self): """Test that processor metadata is consistent.""" md_info = self.md_processor.get_processor_info() pdf_info = self.pdf_processor.get_processor_info() # Both should have the same feature set assert md_info["features"] == pdf_info["features"] # Both should have valid names assert md_info["name"] == "MarkdownProcessor" assert pdf_info["name"] == "PDFProcessor" # Both should have supported extensions assert len(md_info["supported_extensions"]) > 0 assert len(pdf_info["supported_extensions"]) > 0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shenyimings/DocNav-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_processors.py•12.6 KiB