MCP Codebase Insight

test_server.py•28.4 kB

"""Test server API endpoints.""" import sys import os # Ensure the src directory is in the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) import pytest import pytest_asyncio from httpx import AsyncClient import uuid import logging import time from pathlib import Path from datetime import datetime, timezone from typing import Dict, List, Any, Optional from src.mcp_codebase_insight.core.config import ServerConfig from src.mcp_codebase_insight.core.vector_store import VectorStore from src.mcp_codebase_insight.core.knowledge import Pattern from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding from src.mcp_codebase_insight.server import CodebaseAnalysisServer from src.mcp_codebase_insight.server_test_isolation import get_isolated_server_state # Setup logger logger = logging.getLogger(__name__) # Environment variables or defaults for vector store testing QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") TEST_COLLECTION_NAME = os.environ.get("TEST_COLLECTION_NAME", "test_vector_search") EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "all-MiniLM-L6-v2") # Path to test repository TEST_REPO_PATH = Path("tests/fixtures/test_repo") @pytest_asyncio.fixture async def setup_test_vector_store(test_server_client): """Set up a test vector store with sample patterns for the server tests. This fixture initializes the vector store component in the server with test patterns, allowing the vector store search endpoint to be tested properly. """ # Get server state from the test client logger.info("Attempting to get server health status") request = await test_server_client.get("/health") if request.status_code != 200: logger.warning(f"Server health check failed with status code {request.status_code}") yield None return # Get the server state through test isolation utilities logger.info("Getting isolated server state") server_state = get_isolated_server_state() if not server_state: logger.warning("Could not get isolated server state, server_state is None") yield None return logger.info(f"Got server state, instance ID: {server_state.instance_id}") logger.info(f"Server state components: {server_state.list_components()}") # Create and initialize a test vector store try: # Create the embedder first logger.info(f"Creating embedding model with model name: {EMBEDDING_MODEL}") embedder = SentenceTransformerEmbedding(model_name=EMBEDDING_MODEL) await embedder.initialize() # Now create the vector store with the embedder logger.info(f"Creating vector store with URL: {QDRANT_URL}, collection: {TEST_COLLECTION_NAME}") vector_store = VectorStore( url=QDRANT_URL, embedder=embedder, collection_name=TEST_COLLECTION_NAME ) # Delete any existing collection with this name try: logger.info("Cleaning up vector store before use") await vector_store.cleanup() logger.info("Vector store cleaned up") except Exception as e: logger.warning(f"Error during vector store cleanup: {str(e)}") # Initialize the vector store logger.info("Initializing vector store") await vector_store.initialize() logger.info(f"Initialized vector store with collection: {TEST_COLLECTION_NAME}") # Add test patterns logger.info("Adding test patterns to vector store") await add_test_patterns(vector_store, embedder) # Register the vector store in the server state logger.info("Registering vector store component in server state") server_state.register_component("vector_store", vector_store) logger.info("Registered vector store component in server state") yield vector_store # Cleanup try: logger.info("Closing vector store") await vector_store.close() logger.info("Vector store closed") except Exception as e: logger.warning(f"Error during vector store closure: {str(e)}") except Exception as e: logger.error(f"Error setting up test vector store: {str(e)}", exc_info=True) yield None async def add_test_patterns(store: VectorStore, embedder: SentenceTransformerEmbedding): """Add test patterns to the vector store for testing.""" patterns = [] # Add sample patterns for testing patterns.append(Pattern( id=str(uuid.uuid4()), text="""class SearchResult: \"\"\"Represents a search result from the vector store.\"\"\" def __init__(self, id: str, score: float, metadata: Optional[Dict] = None): self.id = id self.score = score self.metadata = metadata or {} def to_dict(self): \"\"\"Convert to dictionary.\"\"\" return { "id": self.id, "score": self.score, "metadata": self.metadata }""", title="SearchResult Class", description="A class for vector store search results", pattern_type="code", tags=["python", "class", "search", "vector-store"], metadata={ "language": "python", "file_path": "src/core/models.py", "line_range": "10-25", "timestamp": datetime.now(timezone.utc).isoformat(), "type": "code" } )) patterns.append(Pattern( id=str(uuid.uuid4()), text="""async def search( self, query: str, limit: int = 5, threshold: float = 0.7, file_type: Optional[str] = None, path_pattern: Optional[str] = None ) -> List[Dict]: \"\"\"Search for patterns matching the query.\"\"\" # Generate embedding for the query embedding = await self.embedding_model.embed(query) # Prepare filter conditions filter_conditions = {} if file_type: filter_conditions["language"] = file_type if path_pattern: filter_conditions["file_path"] = {"$like": path_pattern} # Perform the search results = await self.vector_store.search( embedding=embedding, limit=limit, filter_conditions=filter_conditions ) # Filter by threshold filtered_results = [r for r in results if r.score >= threshold] return filtered_results""", title="Vector Store Search Method", description="Async method to search the vector store with filters", pattern_type="code", tags=["python", "async", "function", "search"], metadata={ "language": "python", "file_path": "src/core/search.py", "line_range": "50-75", "timestamp": datetime.now(timezone.utc).isoformat(), "type": "code" } )) patterns.append(Pattern( id=str(uuid.uuid4()), text="""# Vector Store Configuration ## Search Parameters - **query**: The text to search for similar patterns - **threshold**: Similarity score threshold (0.0 to 1.0) - **limit**: Maximum number of results to return - **file_type**: Filter by programming language/file type - **path_pattern**: Filter by file path pattern ## Recommended Threshold Values - **0.9-1.0**: Very high precision, almost exact matches - **0.8-0.9**: High precision, strongly similar - **0.7-0.8**: Good balance (default) - **0.6-0.7**: Higher recall, more results - **0.5-0.6**: Very high recall, may include less relevant matches""", title="Vector Store Documentation", description="Documentation on vector store search parameters", pattern_type="documentation", tags=["documentation", "markdown", "search", "parameters"], metadata={ "language": "markdown", "file_path": "docs/vector_store.md", "line_range": "50-70", "timestamp": datetime.now(timezone.utc).isoformat(), "type": "documentation" } )) # Store patterns with embeddings for pattern in patterns: # Generate embedding for the pattern text embedding = await embedder.embed(pattern.text) # Store the pattern await store.store_pattern( id=pattern.id, text=pattern.text, title=pattern.title, description=pattern.description, pattern_type=pattern.pattern_type, tags=pattern.tags, metadata=pattern.metadata, embedding=embedding ) logger.info(f"Added pattern: {pattern.title}") logger.info(f"Added {len(patterns)} patterns to the test vector store") return patterns # Use the test_client fixture from conftest.py @pytest_asyncio.fixture(scope="function") async def test_server_client(httpx_test_client): """Get a test client for server API testing. This uses the httpx_test_client from conftest.py to ensure proper event loop and resource management. """ yield httpx_test_client @pytest.fixture def test_code(): """Return a sample code snippet for testing.""" return """ def example_function(x: int) -> int: return x * 2 """ @pytest.fixture def test_issue(): """Return a sample issue description for testing.""" return "Error in function: example_function returns incorrect results for negative values" @pytest.fixture def test_adr(): """Return a sample ADR structure for testing.""" return { "title": "Test ADR", "status": "Proposed", "context": "This is a test ADR for automated testing purposes.", "decision": "We've decided to use this test ADR format.", "consequences": { "positive": ["Test positive consequence"], "negative": ["Test negative consequence"] }, "options": [ { "title": "Test option", "description": "Test description", "pros": ["Test pro"], "cons": ["Test con"] } ] } @pytest.mark.asyncio async def test_health_check(test_server_client: AsyncClient): """Test health check endpoint.""" response = await test_server_client.get("/health") assert response.status_code == 200 data = response.json() assert "status" in data @pytest.mark.asyncio async def test_metrics(test_server_client: AsyncClient): """Test metrics endpoint.""" response = await test_server_client.get("/metrics") # Some test servers may not have metrics enabled if response.status_code == 200: data = response.json() assert "metrics" in data else: logger.info(f"Metrics endpoint not available (status: {response.status_code})") assert response.status_code in [404, 503] # Not found or service unavailable @pytest.mark.asyncio async def test_analyze_code(test_server_client: AsyncClient, test_code: str): """Test code analysis endpoint.""" response = await test_server_client.post( "/tools/analyze-code", json={ "name": "analyze-code", "arguments": { "code": test_code, "context": {} } } ) # Component might not be available in test server if response.status_code == 200: data = response.json() assert "content" in data else: logger.info(f"Code analysis endpoint not available (status: {response.status_code})") assert response.status_code in [404, 503] # Not found or service unavailable @pytest.mark.asyncio async def test_create_adr(test_server_client: AsyncClient, test_adr: dict): """Test ADR creation endpoint.""" response = await test_server_client.post( "/tools/create-adr", json={ "name": "create-adr", "arguments": test_adr } ) # Component might not be available in test server if response.status_code == 200: data = response.json() assert "content" in data else: logger.info(f"ADR creation endpoint not available (status: {response.status_code})") assert response.status_code in [404, 503] # Not found or service unavailable @pytest.mark.asyncio async def test_debug_issue(test_server_client: AsyncClient, test_issue: str): """Test issue debugging endpoint.""" response = await test_server_client.post( "/tools/debug-issue", json={ "name": "debug-issue", "arguments": { "issue": test_issue, "context": {} } } ) # Component might not be available in test server if response.status_code == 200: data = response.json() assert "content" in data else: logger.info(f"Debug issue endpoint not available (status: {response.status_code})") assert response.status_code in [404, 503] # Not found or service unavailable @pytest.mark.asyncio async def test_search_knowledge(test_server_client: AsyncClient): """Test knowledge search endpoint.""" response = await test_server_client.post( "/tools/search-knowledge", json={ "name": "search-knowledge", "arguments": { "query": "test query", "limit": 5 } } ) # Component might not be available in test server if response.status_code == 200: data = response.json() assert "content" in data else: logger.info(f"Knowledge search endpoint not available (status: {response.status_code})") assert response.status_code in [404, 503] # Not found or service unavailable @pytest.mark.asyncio async def test_get_task(test_server_client: AsyncClient): """Test task endpoint.""" # Create a test task ID test_id = f"test_task_{uuid.uuid4().hex}" response = await test_server_client.post( "/task", json={ "task_id": test_id, "status": "pending", "result": None } ) assert response.status_code in [200, 404, 503] # Allow various responses depending on component availability @pytest.mark.asyncio async def test_invalid_request(test_server_client: AsyncClient): """Test invalid request handling.""" response = await test_server_client.post( "/tools/invalid-tool", json={ "name": "invalid-tool", "arguments": {} } ) assert response.status_code in [404, 400] # Either not found or bad request @pytest.mark.asyncio async def test_not_found(test_server_client: AsyncClient): """Test 404 handling.""" response = await test_server_client.get("/nonexistent-endpoint") assert response.status_code == 404 @pytest.mark.asyncio async def test_server_lifecycle(): """Test server lifecycle.""" # This is a safety check to ensure we're not breaking anything # The actual server lifecycle is tested by the conftest fixtures assert True # Replace with real checks if needed @pytest.mark.asyncio async def test_vector_store_search_threshold_validation(test_server_client: AsyncClient, setup_test_vector_store): """Test that the vector store search endpoint validates threshold values.""" # Skip if vector store setup failed if setup_test_vector_store is None: pytest.skip("Vector store setup failed, skipping test") # Test invalid threshold greater than 1.0 response = await test_server_client.get("/api/vector-store/search?query=test&threshold=1.5") assert response.status_code == 422 assert "threshold" in response.text assert "less than or equal to" in response.text # Test invalid threshold less than 0.0 response = await test_server_client.get("/api/vector-store/search?query=test&threshold=-0.5") assert response.status_code == 422 assert "threshold" in response.text assert "greater than or equal to" in response.text # Test boundary value 0.0 (should be valid) response = await test_server_client.get("/api/vector-store/search?query=test&threshold=0.0") assert response.status_code == 200 data = response.json() assert "results" in data assert data["threshold"] == 0.0 # Test boundary value 1.0 (should be valid) response = await test_server_client.get("/api/vector-store/search?query=test&threshold=1.0") assert response.status_code == 200 data = response.json() assert "results" in data assert data["threshold"] == 1.0 # Test with valid filter parameters response = await test_server_client.get("/api/vector-store/search?query=test&threshold=0.7&file_type=python&path_pattern=src/*") assert response.status_code == 200 data = response.json() assert "results" in data assert "query" in data assert "total_results" in data assert "limit" in data assert "threshold" in data assert data["threshold"] == 0.7 # If we have results, check their format if data["results"]: result = data["results"][0] assert "id" in result assert "score" in result assert "text" in result assert "file_path" in result assert "line_range" in result assert "type" in result assert "language" in result assert "timestamp" in result @pytest.mark.asyncio async def test_vector_store_search_functionality(test_server_client: AsyncClient, setup_test_vector_store): """Test comprehensive vector store search functionality. This test validates the full functionality of the vector store search endpoint, including result format, filtering, and metadata handling. The test checks: 1. Basic search returns properly formatted results 2. File type filtering works correctly 3. Path pattern filtering works correctly 4. Limit parameter controls result count 5. Results contain all required metadata fields """ # Skip if vector store setup failed if setup_test_vector_store is None: pytest.skip("Vector store setup failed, skipping test") # Test basic search functionality response = await test_server_client.get( "/api/vector-store/search", params={ "query": "test query", "threshold": 0.7, "limit": 5 } ) # We should have a successful response now that the vector store is initialized assert response.status_code == 200 data = response.json() # Validate response structure assert "query" in data assert data["query"] == "test query" assert "results" in data assert "threshold" in data assert data["threshold"] == 0.7 assert "total_results" in data assert "limit" in data assert data["limit"] == 5 # Test with file type filter response = await test_server_client.get( "/api/vector-store/search", params={ "query": "test query", "threshold": 0.7, "limit": 5, "file_type": "python" } ) assert response.status_code == 200 data = response.json() assert "file_type" in data assert data["file_type"] == "python" # Test with path pattern filter response = await test_server_client.get( "/api/vector-store/search", params={ "query": "test query", "threshold": 0.7, "limit": 5, "path_pattern": "src/**/*.py" } ) assert response.status_code == 200 data = response.json() assert "path_pattern" in data assert data["path_pattern"] == "src/**/*.py" # Test with limit=1 response = await test_server_client.get( "/api/vector-store/search", params={ "query": "test query", "threshold": 0.7, "limit": 1 } ) assert response.status_code == 200 data = response.json() assert data["limit"] == 1 # If we have results, verify the result format if data["results"]: result = data["results"][0] # Check all required fields are present assert "id" in result assert "score" in result assert "text" in result assert "file_path" in result assert "line_range" in result assert "type" in result assert "language" in result assert "timestamp" in result # Validate data types assert isinstance(result["id"], str) assert isinstance(result["score"], (int, float)) assert isinstance(result["text"], str) assert isinstance(result["file_path"], str) assert isinstance(result["line_range"], str) assert isinstance(result["type"], str) assert isinstance(result["language"], str) assert isinstance(result["timestamp"], str) @pytest.mark.asyncio async def test_vector_store_search_error_handling(test_server_client: AsyncClient, setup_test_vector_store): """Test error handling for vector store search endpoint. This test validates the error handling capabilities of the vector store search endpoint when provided with invalid or missing required parameters. The test checks: 1. Missing query parameter returns appropriate error 2. Invalid limit parameter (negative/zero) returns appropriate error """ # Skip if vector store setup failed if setup_test_vector_store is None: pytest.skip("Vector store setup failed, skipping test") # Test missing query parameter response = await test_server_client.get( "/api/vector-store/search", params={ "threshold": 0.7, "limit": 5 } ) # Missing required query parameter should return 422 assert response.status_code == 422 data = response.json() assert "detail" in data assert any("query" in error["loc"] for error in data["detail"]) # Test invalid limit parameter (negative) response = await test_server_client.get( "/api/vector-store/search", params={ "query": "test query", "threshold": 0.7, "limit": -5 } ) assert response.status_code == 422 data = response.json() assert "detail" in data assert any("limit" in error["loc"] for error in data["detail"]) # Test invalid limit parameter (zero) response = await test_server_client.get( "/api/vector-store/search", params={ "query": "test query", "threshold": 0.7, "limit": 0 } ) assert response.status_code == 422 data = response.json() assert "detail" in data assert any("limit" in error["loc"] for error in data["detail"]) @pytest.mark.asyncio async def test_vector_store_search_performance(test_server_client: AsyncClient, setup_test_vector_store): """Test performance of vector store search endpoint. This test measures the response time of the vector store search endpoint to ensure it meets performance requirements. The test checks: 1. Search response time is within acceptable limits (< 1000ms) 2. Multiple consecutive searches maintain performance """ # Skip if vector store setup failed if setup_test_vector_store is None: pytest.skip("Vector store setup failed, skipping test") # Define performance thresholds max_response_time_ms = 1000 # 1 second maximum response time # Perform timed search tests for i in range(3): # Test 3 consecutive searches start_time = time.time() response = await test_server_client.get( "/api/vector-store/search", params={ "query": f"test performance query {i}", "threshold": 0.7, "limit": 5 } ) end_time = time.time() response_time_ms = (end_time - start_time) * 1000 assert response.status_code == 200 logger.info(f"Search {i+1} response time: {response_time_ms:.2f}ms") # Assert performance is within acceptable limits assert response_time_ms < max_response_time_ms, \ f"Search response time ({response_time_ms:.2f}ms) exceeds threshold ({max_response_time_ms}ms)" # Verify we got a valid response data = response.json() assert "results" in data assert "query" in data @pytest.mark.asyncio async def test_vector_store_search_threshold_validation_mock(test_server_client: AsyncClient): """Test that the vector store search endpoint validates threshold values using mock approach. This test isolates FastAPI's parameter validation from the actual server initialization. It doesn't test the vector store implementation but only the parameter validation logic. """ # First, check if server is responding at all by checking health endpoint health_response = await test_server_client.get("/health") # If we can't even reach the server, skip the test if health_response.status_code >= 500: pytest.skip(f"Server is not responding (status: {health_response.status_code})") # Create a list of test cases: (threshold, expected_validation_error) # None for expected_validation_error means we expect validation to pass test_cases = [ # Invalid thresholds (should fail validation) (1.5, "less than or equal to 1.0"), (-0.5, "greater than or equal to 0.0"), # Valid thresholds (should pass validation) (0.0, None), (1.0, None), (0.7, None), ] # Try each test case for threshold, expected_validation_error in test_cases: # Skip testing health check which will never have parameter validation errors # Here we're just testing the static validation in the FastAPI route definition # This will trigger validation errors regardless of server state response = await test_server_client.get(f"/api/vector-store/search?query=test&threshold={threshold}") # Check response based on expected validation if expected_validation_error: # If validation error is expected, check for 422 status # Note: If we got 503, parameter validation didn't even happen # In some test environments this is normal, so we'll skip the assertion if response.status_code == 503: logger.info(f"Server returned 503 for threshold={threshold}, " f"parameter validation couldn't be tested due to server state") continue # If we get here, we should have a 422 validation error assert response.status_code == 422, \ f"Expected 422 for invalid threshold {threshold}, got {response.status_code}: {response.text}" # Check if validation error message contains expected text assert expected_validation_error in response.text, \ f"Expected validation error to contain '{expected_validation_error}', got: {response.text}" logger.info(f"Threshold {threshold} correctly failed validation with message containing '{expected_validation_error}'") else: # For valid thresholds, skip assertion if server returned 503 if response.status_code == 503: logger.info(f"Server returned 503 for valid threshold={threshold}, " f"but parameter validation passed (otherwise would be 422)") continue # If we get a non-503 response for a valid threshold, it should be 200 # (or 404 if the endpoint doesn't exist in test server) assert response.status_code in [200, 404], \ f"Expected 200 for valid threshold {threshold}, got {response.status_code}: {response.text}" logger.info(f"Threshold {threshold} correctly passed validation") logger.info("Completed threshold parameter validation tests")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/mcp-codebase-insight'

If you have feedback or need assistance with the MCP directory API, please join our Discord server