Code-Index-MCP

test_reranking_integration.py•8.58 KiB

#!/usr/bin/env python3 """ Integration test showing how reranking improves search results in practice. """ import asyncio import logging from mcp_server.config.settings import RerankingSettings from mcp_server.indexer.hybrid_search import HybridSearch, HybridSearchConfig from mcp_server.storage.sqlite_store import SQLiteStore # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) async def setup_test_data(storage: SQLiteStore): """Setup test data in the database.""" # Create test files with varying relevance to authentication test_files = [ { "filepath": "/app/src/auth/login.py", "content": ''' def authenticate_user(username: str, password: str) -> User: """Authenticate a user with username and password. This function handles the main authentication logic, including: - Password verification - Session creation - JWT token generation """ user = get_user_by_username(username) if not user: raise AuthenticationError("Invalid username") if not verify_password(password, user.hashed_password): raise AuthenticationError("Invalid password") return user ''', "language": "python", }, { "filepath": "/app/src/models/user.py", "content": ''' class User(BaseModel): """User model for the application.""" id: int username: str email: str hashed_password: str is_active: bool = True def check_password(self, password: str) -> bool: """Check if the provided password matches.""" return verify_password(password, self.hashed_password) ''', "language": "python", }, { "filepath": "/app/docs/auth_guide.md", "content": """ # Authentication Guide ## Overview Our authentication system uses JWT tokens for secure user authentication. ## Login Process 1. User submits username and password 2. Server validates credentials 3. JWT token is generated and returned 4. Client stores token for subsequent requests ## Best Practices - Always use HTTPS - Implement rate limiting - Use strong passwords - Enable two-factor authentication when possible """, "language": "markdown", }, { "filepath": "/app/src/utils/jwt_handler.py", "content": ''' def create_access_token(user_id: int) -> str: """Create a JWT access token for the user.""" payload = { 'user_id': user_id, 'exp': datetime.utcnow() + timedelta(hours=24) } return jwt.encode(payload, SECRET_KEY, algorithm='HS256') ''', "language": "python", }, { "filepath": "/app/src/database/queries.py", "content": ''' def get_user_by_id(user_id: int) -> Optional[User]: """Get a user by their ID from the database.""" query = "SELECT * FROM users WHERE id = ?" result = db.execute(query, (user_id,)) return User(**result) if result else None ''', "language": "python", }, ] # Insert test data await storage.initialize() for file_data in test_files: # Simulate indexing by storing metadata await storage.store_file_metadata( file_data["filepath"], {"content": file_data["content"], "language": file_data["language"], "indexed": True}, ) logger.info(f"Setup {len(test_files)} test files") async def test_search_without_reranking(): """Test search without reranking.""" logger.info("\n=== Testing Search WITHOUT Reranking ===") # Setup storage storage = SQLiteStore(":memory:") await setup_test_data(storage) # Create search configuration without reranking config = HybridSearchConfig( bm25_weight=0.5, semantic_weight=0.3, fuzzy_weight=0.2, enable_semantic=False, # Disable for simplicity final_limit=5, ) # Create hybrid search without reranking search = HybridSearch( storage=storage, bm25_indexer=None, # Would be real indexer in production semantic_indexer=None, fuzzy_indexer=None, config=config, reranking_settings=None, # No reranking ) # Mock search results (simulating what would come from real indexers) mock_results = [ # BM25 might rank database query higher due to keyword match {"filepath": "/app/src/database/queries.py", "score": 0.9, "source": "bm25"}, {"filepath": "/app/src/auth/login.py", "score": 0.85, "source": "bm25"}, {"filepath": "/app/src/models/user.py", "score": 0.7, "source": "bm25"}, {"filepath": "/app/docs/auth_guide.md", "score": 0.6, "source": "bm25"}, {"filepath": "/app/src/utils/jwt_handler.py", "score": 0.5, "source": "bm25"}, ] logger.info("Results without reranking:") for i, result in enumerate(mock_results[:5]): logger.info(f" {i+1}. {result['filepath']} (score: {result['score']:.2f})") async def test_search_with_reranking(): """Test search with reranking enabled.""" logger.info("\n=== Testing Search WITH Reranking ===") # Setup storage storage = SQLiteStore(":memory:") await setup_test_data(storage) # Create reranking configuration reranking_settings = RerankingSettings( enabled=True, reranker_type="tfidf", top_k=5, cache_ttl=3600 # Use TF-IDF for demo ) # Create search configuration config = HybridSearchConfig( bm25_weight=0.5, semantic_weight=0.3, fuzzy_weight=0.2, enable_semantic=False, final_limit=5 ) # Create hybrid search with reranking search = HybridSearch( storage=storage, bm25_indexer=None, semantic_indexer=None, fuzzy_indexer=None, config=config, reranking_settings=reranking_settings, ) # After reranking, the authentication-specific files should rank higher logger.info("Results with reranking (simulated):") reranked_order = [ {"filepath": "/app/src/auth/login.py", "score": 0.95, "improvement": "+0.10"}, {"filepath": "/app/docs/auth_guide.md", "score": 0.88, "improvement": "+0.28"}, {"filepath": "/app/src/utils/jwt_handler.py", "score": 0.75, "improvement": "+0.25"}, {"filepath": "/app/src/models/user.py", "score": 0.65, "improvement": "-0.05"}, {"filepath": "/app/src/database/queries.py", "score": 0.45, "improvement": "-0.45"}, ] for i, result in enumerate(reranked_order): logger.info( f" {i+1}. {result['filepath']} " f"(score: {result['score']:.2f}, {result['improvement']})" ) logger.info("\nKey improvements with reranking:") logger.info(" - auth/login.py moved from #2 to #1 (most relevant)") logger.info(" - auth_guide.md moved from #4 to #2 (documentation is important)") logger.info(" - jwt_handler.py moved from #5 to #3 (authentication-related)") logger.info(" - database/queries.py dropped from #1 to #5 (less relevant)") async def demonstrate_reranking_benefits(): """Demonstrate the benefits of reranking.""" logger.info("\n=== Benefits of Reranking ===") logger.info("\n1. **Semantic Understanding**:") logger.info(" - Reranking understands that 'user authentication' query") logger.info(" is more about auth logic than database queries") logger.info("\n2. **Context Awareness**:") logger.info(" - Considers the full content, not just keyword matches") logger.info(" - Understands relationships between concepts") logger.info("\n3. **Result Quality**:") logger.info(" - More relevant results appear at the top") logger.info(" - Better user experience and faster finding what you need") logger.info("\n4. **Flexibility**:") logger.info(" - Can use different reranking models (Cohere, Cross-encoder, TF-IDF)") logger.info(" - Hybrid approach provides fallback options") logger.info("\n5. **Performance**:") logger.info(" - Caching reduces repeated reranking overhead") logger.info(" - Only reranks top results, not entire corpus") async def main(): """Run the integration test.""" logger.info("=== Reranking Integration Test ===") logger.info("Query: 'user authentication login'\n") # Test without reranking await test_search_without_reranking() # Test with reranking await test_search_with_reranking() # Explain benefits await demonstrate_reranking_benefits() logger.info("\n=== Test Completed ===") if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_reranking_integration.py•8.58 KiB