Qdrant RAG MCP Server

qdrant-rag-mcp
tests
integration

test_documentation_indexer.py•5.29 KiB

#!/usr/bin/env python3 """ Test script for DocumentationIndexer functionality. This tests the new markdown/documentation indexing capabilities added in v0.2.3. """ import sys from pathlib import Path import json # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from indexers.documentation_indexer import DocumentationIndexer def test_documentation_indexer(): """Test the DocumentationIndexer with a real markdown file.""" # Initialize indexer indexer = DocumentationIndexer(chunk_size=2000, chunk_overlap=400) # Test file - using CHANGELOG.md as it has good structure test_file = Path(__file__).parent.parent / "CHANGELOG.md" print(f"Testing DocumentationIndexer with: {test_file}") print("=" * 60) # Check if supported assert indexer.is_supported(str(test_file)), f"{test_file} should be supported" print("✓ File type is supported") # Index the file chunks = indexer.index_file(str(test_file)) print(f"\nExtracted {len(chunks)} chunks from the document") print("-" * 60) # Show first few chunks with metadata for i, chunk in enumerate(chunks[:3]): print(f"\nChunk {i + 1}:") print(f"Type: {chunk['metadata'].get('chunk_type', 'unknown')}") print(f"Heading: {chunk['metadata'].get('heading', 'None')}") print(f"Heading Level: {chunk['metadata'].get('heading_level', 0)}") print(f"Heading Hierarchy: {chunk['metadata'].get('heading_hierarchy', [])}") print(f"Has Code Blocks: {chunk['metadata'].get('has_code_blocks', False)}") print(f"Content Preview: {chunk['content'][:200]}...") print("-" * 40) # Test specific features print("\n" + "=" * 60) print("FEATURE TESTS:") print("=" * 60) # 1. Test heading extraction headings_found = [c['metadata'].get('heading') for c in chunks if c['metadata'].get('heading')] print(f"\n1. Heading Extraction: Found {len(headings_found)} headings") print(f" Sample headings: {headings_found[:5]}") # 2. Test code block detection chunks_with_code = [c for c in chunks if c['metadata'].get('has_code_blocks')] print(f"\n2. Code Block Detection: {len(chunks_with_code)} chunks contain code blocks") if chunks_with_code: print(f" Code languages found: {chunks_with_code[0]['metadata'].get('section_code_languages', [])}") # 3. Test hierarchy preservation hierarchical_chunks = [c for c in chunks if len(c['metadata'].get('heading_hierarchy', [])) > 1] print(f"\n3. Hierarchy Preservation: {len(hierarchical_chunks)} chunks have hierarchical context") if hierarchical_chunks: sample = hierarchical_chunks[0]['metadata']['heading_hierarchy'] print(f" Example hierarchy: {' > '.join(sample)}") # 4. Test large section splitting partial_chunks = [c for c in chunks if c['metadata'].get('is_partial', False)] print(f"\n4. Large Section Splitting: {len(partial_chunks)} chunks are partial (split from larger sections)") # 5. Test metadata extraction print(f"\n5. File Metadata:") if chunks: file_meta = chunks[0]['metadata'] print(f" Title: {file_meta.get('title', 'Unknown')}") print(f" File Type: {file_meta.get('doc_type', 'Unknown')}") print(f" Total Code Blocks: {file_meta.get('code_block_count', 0)}") print(f" Code Languages: {file_meta.get('code_languages', [])}") # 6. Test summary extraction print(f"\n6. Summary Extraction Test:") if chunks and chunks[0]['content']: summary = indexer.extract_summary(chunks[0]['content'], max_length=150) print(f" Summary: {summary}") print("\n" + "=" * 60) print("✓ All tests completed successfully!") return True def test_edge_cases(): """Test edge cases and error handling.""" print("\n" + "=" * 60) print("EDGE CASE TESTS:") print("=" * 60) indexer = DocumentationIndexer() # Test 1: Empty file path print("\n1. Testing with non-existent file:") chunks = indexer.index_file("/non/existent/file.md") assert chunks == [], "Should return empty list for non-existent file" print(" ✓ Handled gracefully") # Test 2: Unsupported file print("\n2. Testing with unsupported file type:") assert not indexer.is_supported("test.py"), "Should not support .py files" print(" ✓ Correctly rejected") # Test 3: File types print("\n3. Testing supported file types:") supported = [".md", ".markdown", ".rst", ".txt", ".mdx"] for ext in supported: assert indexer.is_supported(f"test{ext}"), f"Should support {ext} files" print(f" ✓ All supported: {supported}") print("\n✓ Edge case tests passed!") if __name__ == "__main__": print("Documentation Indexer Test Suite") print("================================\n") try: # Run main tests test_documentation_indexer() # Run edge case tests test_edge_cases() print("\n" + "=" * 60) print("ALL TESTS PASSED! 🎉") print("Documentation Indexer is working correctly.") except Exception as e: print(f"\n❌ Test failed with error: {e}") import traceback traceback.print_exc() sys.exit(1)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ancoleman/qdrant-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_documentation_indexer.py•5.29 KiB