RBT Document Editor

Overview Schema Related Servers Score Discussions

test_markdown_chunker.py•10.1 KiB

""" Unit tests for MarkdownChunker. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker """ import pytest from rbt_mcp_server.chunking.markdown_chunker import MarkdownChunker from rbt_mcp_server.chunking.models import ChunkMetadata @pytest.fixture def chunker(): """ Create a MarkdownChunker instance for testing. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker """ return MarkdownChunker() class TestMarkdownChunker: """ Test suite for MarkdownChunker. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker """ def test_chunk_with_multiple_h3_headings(self, chunker): """ Test Case 1: Parse Markdown document with multiple h3 headings. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: Markdown document contains 3 ### headings When: Call chunk() Then: Return 3 ChunkMetadata objects, each corresponding to one h3 section """ # Arrange document_content = """# Main Title Some introduction text. ### Introduction This is the introduction section with some content. ### Implementation This is the implementation section with code examples. ### Testing This is the testing section with test cases. """ project_id = "knowledge-smith" feature_id = "test-feature" doc_type = "Guide" file_path = "docs/guide.md" # Act chunks = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) # Assert assert len(chunks) == 3 # Check first chunk assert chunks[0].metadata["section_title"] == "Introduction" assert chunks[0].metadata["section_id"] == "introduction" assert chunks[0].metadata["chunk_id"] == "knowledge-smith+test-feature+introduction" assert chunks[0].metadata["parent_document_id"] == "knowledge-smith+test-feature+Guide" assert "introduction section" in chunks[0].content.lower() # Check second chunk assert chunks[1].metadata["section_title"] == "Implementation" assert chunks[1].metadata["section_id"] == "implementation" assert chunks[1].metadata["chunk_id"] == "knowledge-smith+test-feature+implementation" assert "implementation section" in chunks[1].content.lower() # Check third chunk assert chunks[2].metadata["section_title"] == "Testing" assert chunks[2].metadata["section_id"] == "testing" assert chunks[2].metadata["chunk_id"] == "knowledge-smith+test-feature+testing" assert "testing section" in chunks[2].content.lower() def test_chunk_without_h3_headings(self, chunker): """ Test Case 2: Handle Markdown document without h3 headings. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: Markdown document has no h3 headings When: Call chunk() Then: Return 1 ChunkMetadata containing the entire document """ # Arrange document_content = """# Main Title This is a document without any h3 headings. ## Section 1 Some content here. ## Section 2 More content here. """ project_id = "knowledge-smith" feature_id = None doc_type = "Guide" file_path = "docs/simple.md" # Act chunks = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) # Assert assert len(chunks) == 1 assert chunks[0].metadata["chunk_id"] == "knowledge-smith+general+document" assert chunks[0].metadata["parent_document_id"] == "knowledge-smith+general+Guide" assert chunks[0].metadata["section_id"] is None assert chunks[0].metadata["section_title"] is None assert chunks[0].content == document_content def test_heading_slug_generation(self, chunker): """ Test Case 3: Verify heading_slug generation correctness. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: h3 heading contains special characters, spaces, and mixed case When: Generate slug Then: Slug is lowercase, hyphen-separated, with special characters removed """ # Test various heading formats test_cases = [ ("Hello World!", "hello-world"), ("Implementation & Testing", "implementation-testing"), ("1. 任務目標與前置", "1-任務目標與前置"), ("API Reference: v2.0", "api-reference-v20"), ("Multiple Spaces", "multiple-spaces"), ("Trailing-Hyphen--", "trailing-hyphen"), ("UPPERCASE TEXT", "uppercase-text"), ("under_score_test", "under-score-test"), ] for heading, expected_slug in test_cases: # Act slug = chunker._generate_slug(heading) # Assert assert slug == expected_slug, f"Failed for heading: {heading}" def test_chunk_id_stability(self, chunker): """ Test Case 4: Verify chunk_id stability across multiple runs. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: Same Markdown document chunked multiple times When: Call chunk() multiple times Then: chunk_id for same heading remains unchanged """ # Arrange document_content = """### First Section Content for first section. ### Second Section Content for second section. """ project_id = "knowledge-smith" feature_id = "stable-test" doc_type = "Guide" file_path = "docs/stability.md" # Act - chunk the same document 3 times chunks_run1 = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) chunks_run2 = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) chunks_run3 = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) # Assert - all runs should produce identical chunk_ids assert len(chunks_run1) == 2 assert len(chunks_run2) == 2 assert len(chunks_run3) == 2 # Check first chunk assert chunks_run1[0].metadata["chunk_id"] == chunks_run2[0].metadata["chunk_id"] == chunks_run3[0].metadata["chunk_id"] assert chunks_run1[0].metadata["chunk_id"] == "knowledge-smith+stable-test+first-section" # Check second chunk assert chunks_run1[1].metadata["chunk_id"] == chunks_run2[1].metadata["chunk_id"] == chunks_run3[1].metadata["chunk_id"] assert chunks_run1[1].metadata["chunk_id"] == "knowledge-smith+stable-test+second-section" def test_chunk_with_no_feature_id(self, chunker): """ Test chunking general documents without feature_id. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: Document has no feature_id (general document) When: Call chunk() Then: chunk_id and parent_document_id use "general" as feature part """ # Arrange document_content = """### Overview This is a general document. """ project_id = "knowledge-smith" feature_id = None doc_type = "Architecture" file_path = "docs/architecture/overview.md" # Act chunks = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) # Assert assert len(chunks) == 1 assert chunks[0].metadata["chunk_id"] == "knowledge-smith+general+overview" assert chunks[0].metadata["parent_document_id"] == "knowledge-smith+general+Architecture" assert chunks[0].metadata["feature_id"] is None def test_chunk_preserves_content_boundaries(self, chunker): """ Test that content boundaries are preserved correctly. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: Document with multiple h3 sections with distinct content When: Call chunk() Then: Each chunk contains only its section's content """ # Arrange document_content = """### Section A Content for A. More A content. ### Section B Content for B. More B content. ### Section C Content for C. """ project_id = "test-project" feature_id = "test-feature" doc_type = "Doc" file_path = "test.md" # Act chunks = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) # Assert assert len(chunks) == 3 # Section A should not contain Section B or C content assert "Content for A" in chunks[0].content assert "More A content" in chunks[0].content assert "Content for B" not in chunks[0].content assert "Content for C" not in chunks[0].content # Section B should not contain Section A or C content assert "Content for B" in chunks[1].content assert "More B content" in chunks[1].content assert "Content for A" not in chunks[1].content assert "Content for C" not in chunks[1].content # Section C should not contain Section A or B content assert "Content for C" in chunks[2].content assert "Content for A" not in chunks[2].content assert "Content for B" not in chunks[2].content def test_empty_document(self, chunker): """ Test handling of empty document. @REQ: REQ-graphiti-chunk-mcp @BP: BP-graphiti-chunk-mcp @TASK: TASK-003-MarkdownChunker Given: Document is empty or only whitespace When: Call chunk() Then: Return 1 chunk with empty or whitespace content """ # Arrange document_content = "" project_id = "test-project" feature_id = "test-feature" doc_type = "Doc" file_path = "empty.md" # Act chunks = chunker.chunk(document_content, project_id, feature_id, doc_type, file_path) # Assert assert len(chunks) == 1 assert chunks[0].content == ""

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/leo7nel23/KnowkedgeSmith-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_markdown_chunker.py•10.1 KiB