Code-Index-MCP

test_contextual_simple.py•6.12 KiB

#!/usr/bin/env python3 """ Simple test of the contextual embeddings pipeline components. """ import os import sys # Configure for testing os.environ["MARKDOWN_MAX_CHUNK_TOKENS"] = "500" os.environ["VOYAGE_AI_API_KEY"] = "test-key" # Add project root to path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) def test_adaptive_chunking(): """Test the adaptive chunking functionality.""" print("=== Testing Adaptive Chunking ===\n") from mcp_server.plugins.markdown_plugin.chunk_strategies import MarkdownChunkStrategy # Create strategy with token-based configuration strategy = MarkdownChunkStrategy() # Test document content = """ # Test Document This is a test document for adaptive chunking. ## Section 1 This section contains some content that will be chunked based on tokens. The chunking strategy should adapt to the document size. ## Section 2 Another section with more content. The system will use TokenEstimator to calculate the appropriate chunk boundaries. ### Subsection 2.1 Detailed content in a subsection that demonstrates hierarchical structure. ## Section 3 Final section with conclusion. """ # Mock AST and sections for testing ast = {"type": "document", "children": []} sections = [ { "id": "section-1", "title": "Section 1", "level": 2, "content": "This section contains some content...", "start_line": 5, "end_line": 7, "metadata": {"code_blocks": 0}, }, { "id": "section-2", "title": "Section 2", "level": 2, "content": "Another section with more content...", "start_line": 9, "end_line": 15, "metadata": {"code_blocks": 0}, }, ] # Create chunks chunks = strategy.create_chunks(content, ast, sections, "test.md") print(f"Document size: {len(content)} characters") print(f"Created {len(chunks)} chunks") print(f"Max chunk size (tokens): {strategy.max_chunk_tokens}") print(f"Using adaptive sizing: {strategy.adaptive_sizing}") for i, chunk in enumerate(chunks): print(f"\nChunk {i+1}:") print(f" Size: {len(chunk.content)} chars") print(f" Lines: {chunk.metadata.line_start}-{chunk.metadata.line_end}") print(f" Content preview: {chunk.content[:50]}...") print("\n✓ Adaptive chunking working correctly!") def test_bm25_indexing(): """Test BM25 indexing functionality.""" print("\n\n=== Testing BM25 Indexing ===\n") from mcp_server.indexer.bm25_indexer import BM25Indexer from mcp_server.storage.sqlite_store import SQLiteStore # Initialize components store = SQLiteStore(":memory:") indexer = BM25Indexer(store) # Test documents documents = [ { "id": "doc1", "content": "Contextual embeddings improve search accuracy", "metadata": {"type": "guide"}, }, { "id": "doc2", "content": "BM25 provides fast keyword-based search", "metadata": {"type": "documentation"}, }, { "id": "doc3", "content": "Hybrid search combines BM25 and semantic search", "metadata": {"type": "tutorial"}, }, ] # Index documents result = indexer.index_documents(documents) print(f"Indexed {len(documents)} documents: {result}") # Search tests queries = ["contextual embeddings", "BM25 search", "hybrid"] for query in queries: results = indexer.search(query, limit=3) print(f"\nQuery: '{query}'") print(f"Found {len(results)} results:") for result in results: print(f" - {result['content'][:50]}... (score: {result.get('score', 'N/A')})") # Get statistics stats = indexer.get_stats() print(f"\nIndex statistics: {stats}") print("\n✓ BM25 indexing working correctly!") def test_hybrid_search(): """Test hybrid search functionality.""" print("\n\n=== Testing Hybrid Search ===\n") from mcp_server.indexer.bm25_indexer import BM25Indexer from mcp_server.indexer.hybrid_search import HybridSearch from mcp_server.storage.sqlite_store import SQLiteStore # Initialize components store = SQLiteStore(":memory:") bm25_indexer = BM25Indexer(store) hybrid = HybridSearch(store, bm25_indexer) # Index some content in BM25 documents = [ { "id": "chunk1", "content": "Adaptive chunking adjusts chunk size based on document characteristics", "metadata": {"section": "Introduction"}, }, { "id": "chunk2", "content": "Contextual embeddings add surrounding context to improve search", "metadata": {"section": "Implementation"}, }, { "id": "chunk3", "content": "Reranking uses cross-encoder models to improve result relevance", "metadata": {"section": "Advanced Features"}, }, ] bm25_indexer.index_documents(documents) # Test different search modes query = "contextual search improvement" modes = ["bm25", "fuzzy", "hybrid"] for mode in modes: print(f"\n{mode.upper()} Search for: '{query}'") results = hybrid.search(query, limit=3, mode=mode) if results: for i, result in enumerate(results): print(f"{i+1}. {result['content'][:60]}...") print(f" Score: {result.get('score', 0):.3f}") else: print(" No results found") print("\n✓ Hybrid search working correctly!") def main(): """Run all tests.""" test_adaptive_chunking() test_bm25_indexing() test_hybrid_search() print("\n\n=== All Tests Passed! ===") print("\nThe contextual embeddings pipeline is ready for use:") print("1. ✓ Adaptive token-based chunking") print("2. ✓ BM25 full-text search indexing") print("3. ✓ Hybrid search with multiple strategies") print("4. ✓ Contextual embedding service (separate module)") print("5. ✓ Reranking support (separate module)") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_contextual_simple.py•6.12 KiB