Crawl4AI+SearXNG MCP Server

test_search_pipeline.py•5.88 KiB

#!/usr/bin/env python3
"""
Test script to verify the search-scrape-RAG pipeline functionality.
This script tests the complete pipeline: search → scrape → RAG
"""

import sys
from pathlib import Path

import requests

# Add src to path to import modules
sys.path.append(str(Path(__file__).parent / "src"))


def test_searxng_connection():
    """Test direct connection to SearXNG"""
    searxng_url = "http://localhost:8080"

    print("🔍 Testing SearXNG connection...")
    try:
        response = requests.get(
            f"{searxng_url}/search",
            params={"q": "python", "format": "json", "limit": 2},
            timeout=10,
        )
        response.raise_for_status()

        data = response.json()
        results = data.get("results", [])

        print("✅ SearXNG connected successfully")
        print(f"   Found {len(results)} search results")

        if results:
            print("   Sample result:")
            result = results[0]
            print(f"   - Title: {result.get('title', 'N/A')}")
            print(f"   - URL: {result.get('url', 'N/A')}")

        return True

    except Exception as e:
        print(f"❌ SearXNG connection failed: {e}")
        return False


def test_qdrant_connection():
    """Test direct connection to Qdrant"""
    qdrant_url = "http://localhost:6333"

    print("\n🗄️ Testing Qdrant connection...")
    try:
        response = requests.get(f"{qdrant_url}/collections", timeout=10)
        response.raise_for_status()

        collections = response.json()
        print("✅ Qdrant connected successfully")
        print(
            f"   Available collections: {list(collections.get('result', {}).get('collections', []))}",
        )

        return True

    except Exception as e:
        print(f"❌ Qdrant connection failed: {e}")
        return False


def test_search_function_imports():
    """Test that we can import the search function"""
    print("\n📦 Testing function imports...")
    try:
        # Test import of main components

        print("✅ Successfully imported search function and dependencies")
        return True

    except Exception as e:
        print(f"❌ Import failed: {e}")
        return False


def analyze_search_function():
    """Analyze the search function implementation"""
    print("\n🔬 Analyzing search function implementation...")

    try:
        # Read the main file and analyze the search function
        with Path("src/crawl4ai_mcp.py").open() as f:
            content = f.read()

        # Check for key components
        checks = {
            "Search function defined": "async def search(" in content,
            "SearXNG integration": "searxng_url" in content
            and "search_endpoint" in content,
            "URL extraction": "valid_urls" in content,
            "Scraping integration": "scrape_urls.fn(" in content,
            "RAG integration": "perform_rag_query.fn(" in content,
            "Raw markdown support": "return_raw_markdown" in content,
            "Error handling": "try:" in content and "except" in content,
        }

        print("Search function analysis:")
        for check, passed in checks.items():
            status = "✅" if passed else "❌"
            print(f"   {status} {check}")

        # Look for potential issues
        issues = []

        # Check for FunctionTool usage
        if "scrape_urls.fn(" in content:
            print("   ✅ Uses .fn attribute for FunctionTool calls")
        elif "scrape_urls(" in content:
            issues.append("Potential FunctionTool issue - not using .fn attribute")

        # Check for metadata_filter issues
        if "metadata_filter" in content:
            issues.append(
                "Uses metadata_filter (deprecated) - should use filter_metadata",
            )

        if issues:
            print("\n⚠️ Potential issues found:")
            for issue in issues:
                print(f"   - {issue}")
        else:
            print("\n✅ No obvious issues detected in search function")

        return len(issues) == 0

    except Exception as e:
        print(f"❌ Analysis failed: {e}")
        return False


def test_pipeline_flow():
    """Test the conceptual flow of the pipeline"""
    print("\n🔄 Testing pipeline flow logic...")

    pipeline_steps = [
        "1. SearXNG Search: query → URLs",
        "2. URL Validation: filter valid URLs",
        "3. Content Scraping: URLs → markdown content",
        "4. Content Storage: markdown → Qdrant embeddings",
        "5. RAG Query: query + embeddings → results",
    ]

    print("Expected pipeline flow:")
    for step in pipeline_steps:
        print(f"   {step}")

    print("\n✅ Pipeline flow analysis complete")
    return True


def main():
    """Run all tests"""
    print("🚀 Testing Crawl4AI MCP Search-Scrape-RAG Pipeline")
    print("=" * 60)

    results = []

    # Test external dependencies
    results.append(("SearXNG Connection", test_searxng_connection()))
    results.append(("Qdrant Connection", test_qdrant_connection()))

    # Test code structure
    results.append(("Function Imports", test_search_function_imports()))
    results.append(("Search Function Analysis", analyze_search_function()))
    results.append(("Pipeline Flow", test_pipeline_flow()))

    # Summary
    print("\n" + "=" * 60)
    print("📊 TEST SUMMARY")
    print("=" * 60)

    passed = 0
    total = len(results)

    for test_name, result in results:
        status = "PASS" if result else "FAIL"
        print(f"{status:>4} | {test_name}")
        if result:
            passed += 1

    print(f"\nResults: {passed}/{total} tests passed")

    if passed == total:
        print(
            "🎉 All tests passed! The search-scrape-RAG pipeline appears to be properly implemented.",
        )
    else:
        print("⚠️ Some tests failed. Check the issues above.")

    return passed == total


if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-enthusiasts/crawl4ai-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_search_pipeline.py•5.88 KiB

#!/usr/bin/env python3
"""
Test script to verify the search-scrape-RAG pipeline functionality.
This script tests the complete pipeline: search → scrape → RAG
"""

import sys
from pathlib import Path

import requests

# Add src to path to import modules
sys.path.append(str(Path(__file__).parent / "src"))


def test_searxng_connection():
    """Test direct connection to SearXNG"""
    searxng_url = "http://localhost:8080"

    print("🔍 Testing SearXNG connection...")
    try:
        response = requests.get(
            f"{searxng_url}/search",
            params={"q": "python", "format": "json", "limit": 2},
            timeout=10,
        )
        response.raise_for_status()

        data = response.json()
        results = data.get("results", [])

        print("✅ SearXNG connected successfully")
        print(f"   Found {len(results)} search results")

        if results:
            print("   Sample result:")
            result = results[0]
            print(f"   - Title: {result.get('title', 'N/A')}")
            print(f"   - URL: {result.get('url', 'N/A')}")

        return True

    except Exception as e:
        print(f"❌ SearXNG connection failed: {e}")
        return False


def test_qdrant_connection():
    """Test direct connection to Qdrant"""
    qdrant_url = "http://localhost:6333"

    print("\n🗄️ Testing Qdrant connection...")
    try:
        response = requests.get(f"{qdrant_url}/collections", timeout=10)
        response.raise_for_status()

        collections = response.json()
        print("✅ Qdrant connected successfully")
        print(
            f"   Available collections: {list(collections.get('result', {}).get('collections', []))}",
        )

        return True

    except Exception as e:
        print(f"❌ Qdrant connection failed: {e}")
        return False


def test_search_function_imports():
    """Test that we can import the search function"""
    print("\n📦 Testing function imports...")
    try:
        # Test import of main components

        print("✅ Successfully imported search function and dependencies")
        return True

    except Exception as e:
        print(f"❌ Import failed: {e}")
        return False


def analyze_search_function():
    """Analyze the search function implementation"""
    print("\n🔬 Analyzing search function implementation...")

    try:
        # Read the main file and analyze the search function
        with Path("src/crawl4ai_mcp.py").open() as f:
            content = f.read()

        # Check for key components
        checks = {
            "Search function defined": "async def search(" in content,
            "SearXNG integration": "searxng_url" in content
            and "search_endpoint" in content,
            "URL extraction": "valid_urls" in content,
            "Scraping integration": "scrape_urls.fn(" in content,
            "RAG integration": "perform_rag_query.fn(" in content,
            "Raw markdown support": "return_raw_markdown" in content,
            "Error handling": "try:" in content and "except" in content,
        }

        print("Search function analysis:")
        for check, passed in checks.items():
            status = "✅" if passed else "❌"
            print(f"   {status} {check}")

        # Look for potential issues
        issues = []

        # Check for FunctionTool usage
        if "scrape_urls.fn(" in content:
            print("   ✅ Uses .fn attribute for FunctionTool calls")
        elif "scrape_urls(" in content:
            issues.append("Potential FunctionTool issue - not using .fn attribute")

        # Check for metadata_filter issues
        if "metadata_filter" in content:
            issues.append(
                "Uses metadata_filter (deprecated) - should use filter_metadata",
            )

        if issues:
            print("\n⚠️ Potential issues found:")
            for issue in issues:
                print(f"   - {issue}")
        else:
            print("\n✅ No obvious issues detected in search function")

        return len(issues) == 0

    except Exception as e:
        print(f"❌ Analysis failed: {e}")
        return False


def test_pipeline_flow():
    """Test the conceptual flow of the pipeline"""
    print("\n🔄 Testing pipeline flow logic...")

    pipeline_steps = [
        "1. SearXNG Search: query → URLs",
        "2. URL Validation: filter valid URLs",
        "3. Content Scraping: URLs → markdown content",
        "4. Content Storage: markdown → Qdrant embeddings",
        "5. RAG Query: query + embeddings → results",
    ]

    print("Expected pipeline flow:")
    for step in pipeline_steps:
        print(f"   {step}")

    print("\n✅ Pipeline flow analysis complete")
    return True


def main():
    """Run all tests"""
    print("🚀 Testing Crawl4AI MCP Search-Scrape-RAG Pipeline")
    print("=" * 60)

    results = []

    # Test external dependencies
    results.append(("SearXNG Connection", test_searxng_connection()))
    results.append(("Qdrant Connection", test_qdrant_connection()))

    # Test code structure
    results.append(("Function Imports", test_search_function_imports()))
    results.append(("Search Function Analysis", analyze_search_function()))
    results.append(("Pipeline Flow", test_pipeline_flow()))

    # Summary
    print("\n" + "=" * 60)
    print("📊 TEST SUMMARY")
    print("=" * 60)

    passed = 0
    total = len(results)

    for test_name, result in results:
        status = "PASS" if result else "FAIL"
        print(f"{status:>4} | {test_name}")
        if result:
            passed += 1

    print(f"\nResults: {passed}/{total} tests passed")

    if passed == total:
        print(
            "🎉 All tests passed! The search-scrape-RAG pipeline appears to be properly implemented.",
        )
    else:
        print("⚠️ Some tests failed. Check the issues above.")

    return passed == total


if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)