Crawl4AI+SearXNG MCP Server

conftest.py•10.1 KiB

""" Integration test configuration and fixtures. Provides Docker-based test services and shared fixtures for integration testing. Uses real services (Qdrant, SearXNG) running in Docker containers. """ import os import subprocess import sys import time from pathlib import Path from unittest.mock import AsyncMock, patch import pytest src_path = Path(__file__).parent.parent.parent / "src" sys.path.insert(0, str(src_path)) from src.database.factory import create_and_initialize_database # noqa: E402 from src.database.qdrant_adapter import QdrantAdapter # noqa: E402 from src.database.supabase_adapter import SupabaseAdapter # noqa: E402 @pytest.fixture(scope="session") def docker_compose_services(): """Start Docker Compose services for integration tests.""" compose_file = Path(__file__).parent.parent.parent / "docker-compose.yml" if not compose_file.exists(): pytest.skip("Docker Compose file not found, skipping integration tests") # Start services try: subprocess.run( [ "docker", "compose", "-f", str(compose_file), "up", "-d", "--wait", "qdrant", "searxng", "valkey", ], check=True, capture_output=True, timeout=120, ) # Wait for services to be ready time.sleep(10) yield except subprocess.TimeoutExpired: pytest.fail("Docker services failed to start within timeout") except subprocess.CalledProcessError as e: pytest.skip(f"Failed to start Docker services: {e}") finally: # Cleanup: Stop services try: subprocess.run( ["docker", "compose", "-f", str(compose_file), "down", "-v"], check=False, timeout=60, ) except subprocess.TimeoutExpired: pass @pytest.fixture(scope="session") def qdrant_service_url(docker_compose_services): """Get Qdrant service URL for integration tests.""" return "http://localhost:6333" @pytest.fixture(scope="session") def searxng_service_url(docker_compose_services): """Get SearXNG service URL for integration tests.""" return "http://localhost:8080" @pytest.fixture async def qdrant_client(qdrant_service_url): """Create Qdrant client connected to test service.""" adapter = QdrantAdapter( url=qdrant_service_url, api_key=None, # No auth for test instance ) # Initialize and clean up any existing data await adapter.initialize() # Clean up any existing test data try: # Delete all test data by filtering on test metadata import asyncio loop = asyncio.get_event_loop() if hasattr(adapter, "client") and adapter.client: from qdrant_client.models import FieldCondition, Filter, MatchValue try: test_filter = Filter( must=[FieldCondition(key="test", match=MatchValue(value=True))], ) await loop.run_in_executor( None, adapter.client.delete, adapter.CRAWLED_PAGES, test_filter, ) except: pass # Ignore cleanup errors except: pass # Collection might not exist yield adapter # Cleanup after test try: import asyncio loop = asyncio.get_event_loop() if hasattr(adapter, "client") and adapter.client: from qdrant_client.models import FieldCondition, Filter, MatchValue try: test_filter = Filter( must=[FieldCondition(key="test", match=MatchValue(value=True))], ) await loop.run_in_executor( None, adapter.client.delete, adapter.CRAWLED_PAGES, test_filter, ) except: pass # Ignore cleanup errors except: pass @pytest.fixture async def supabase_client(): """Create Supabase client for testing (if configured).""" supabase_url = os.getenv("SUPABASE_URL") supabase_key = os.getenv("SUPABASE_SERVICE_ROLE_KEY") if not supabase_url or not supabase_key: pytest.skip("Supabase credentials not configured, skipping Supabase tests") adapter = SupabaseAdapter( url=supabase_url, key=supabase_key, table_name="test_crawled_pages", ) await adapter.initialize() # Clean up test data await adapter.delete_all_test_data() yield adapter # Cleanup await adapter.delete_all_test_data() @pytest.fixture async def database_factory(qdrant_client): """Create database factory with test configuration.""" # Mock environment for testing test_env = { "VECTOR_DATABASE": "qdrant", "QDRANT_URL": "http://localhost:6333", "QDRANT_API_KEY": "", "QDRANT_COLLECTION_NAME": "test_collection", } with patch.dict(os.environ, test_env): adapter = await create_and_initialize_database() yield adapter # Cleanup if hasattr(adapter, "close"): await adapter.close() @pytest.fixture def integration_test_env(docker_compose_services): """Set up environment variables for integration tests. Depends on docker_compose_services to ensure services are running. """ test_env = { "ENVIRONMENT": "test", "DATABASE_TYPE": "qdrant", "QDRANT_URL": "http://localhost:6333", "QDRANT_API_KEY": "", "QDRANT_COLLECTION_NAME": "test_collection", "SEARXNG_URL": "http://localhost:8080", "CACHE_REDIS_URL": "redis://localhost:6379/1", "ENHANCED_CONTEXT": "false", "USE_RERANKING": "false", "USE_AGENTIC_RAG": "false", "USE_HYBRID_SEARCH": "false", "LOG_LEVEL": "INFO", } # Backup original environment original_env = {k: os.environ.get(k) for k in test_env} # Set test environment os.environ.update(test_env) yield test_env # Restore original environment for key, value in original_env.items(): if value is None: os.environ.pop(key, None) else: os.environ[key] = value @pytest.fixture async def crawl4ai_session(): """Create a Crawl4AI session for testing.""" try: from crawl4ai import AsyncWebCrawler async with AsyncWebCrawler( headless=True, verbose=False, # Use minimal config for testing browser_type="chromium", headers={"User-Agent": "test-crawler/1.0"}, ) as crawler: yield crawler except ImportError: # Mock crawler if Crawl4AI not available mock_crawler = AsyncMock() mock_crawler.arun = AsyncMock() mock_crawler.arun_many = AsyncMock() yield mock_crawler @pytest.fixture def sample_urls(): """Sample URLs for testing.""" return [ "https://example.com", "https://httpbin.org/json", "https://httpbin.org/html", ] @pytest.fixture def sample_crawl_results(): """Sample crawl results for testing.""" return [ { "url": "https://example.com", "html": "<html><body><h1>Example</h1><p>This is an example page.</p></body></html>", "cleaned_html": "<h1>Example</h1><p>This is an example page.</p>", "markdown": "# Example\n\nThis is an example page.", "extracted_content": "Example\nThis is an example page.", "success": True, "status_code": 200, "response_headers": {"content-type": "text/html"}, "links": {"internal": [], "external": []}, "media": {"images": [], "videos": [], "audios": []}, }, { "url": "https://httpbin.org/json", "html": '<html><body><pre>{"slideshow": {"title": "Sample"}}</pre></body></html>', "cleaned_html": '<pre>{"slideshow": {"title": "Sample"}}</pre>', "markdown": '```\n{"slideshow": {"title": "Sample"}}\n```', "extracted_content": '{"slideshow": {"title": "Sample"}}', "success": True, "status_code": 200, "response_headers": {"content-type": "application/json"}, "links": {"internal": [], "external": []}, "media": {"images": [], "videos": [], "audios": []}, }, ] @pytest.fixture def performance_thresholds(): """Performance thresholds for integration tests.""" return { "crawl_single_url_ms": 5000, # 5 seconds max "crawl_batch_urls_ms": 15000, # 15 seconds max for batch "store_document_ms": 1000, # 1 second max "search_documents_ms": 2000, # 2 seconds max "e2e_workflow_ms": 20000, # 20 seconds max for full workflow } @pytest.fixture async def cleanup_database(qdrant_client): """Fixture to clean up database after each test.""" yield # Clean up test data try: # Delete all documents in test collection await qdrant_client.client.delete( collection_name=qdrant_client.collection_name, points_selector={ "filter": {"must": [{"key": "test", "match": {"value": True}}]}, }, ) except Exception: pass # Ignore cleanup errors # Pytest markers for integration tests def pytest_configure(config): """Configure pytest markers.""" config.addinivalue_line( "markers", "integration: mark test as integration test requiring Docker services", ) config.addinivalue_line("markers", "e2e: mark test as end-to-end workflow test") config.addinivalue_line( "markers", "performance: mark test as performance benchmark", ) config.addinivalue_line("markers", "slow: mark test as slow running") def pytest_collection_modifyitems(config, items): """Add markers to integration tests automatically.""" for item in items: if "integration" in str(item.fspath): item.add_marker(pytest.mark.integration) item.add_marker(pytest.mark.slow)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-enthusiasts/crawl4ai-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

conftest.py•10.1 KiB