test_mocks.pyā¢6.15 kB
"""
Tests for the mock infrastructure to ensure it works correctly.
"""
import pytest
from tests.mocks import (
MockAsyncWebCrawler,
MockJsonCssExtractionStrategy,
MockCrawlResult,
create_mock_crawler,
create_mock_extraction_strategy,
generate_test_html,
generate_test_schema,
generate_extracted_data
)
@pytest.mark.unit
def test_mock_crawl_result():
"""Test that MockCrawlResult works correctly."""
result = MockCrawlResult()
assert result.success is True
assert result.url == "https://example.com"
assert len(result.cleaned_html) > 100
assert len(result.markdown) > 50
assert len(result.screenshot) > 50 # Base64 data
assert isinstance(result.extracted_content, list)
@pytest.mark.unit
def test_mock_crawl_result_failure():
"""Test MockCrawlResult failure scenario."""
result = MockCrawlResult(success=False, error_message="Test error")
assert result.success is False
assert result.error_message == "Test error"
@pytest.mark.asyncio
@pytest.mark.unit
async def test_mock_async_web_crawler():
"""Test that MockAsyncWebCrawler works correctly."""
crawler = MockAsyncWebCrawler()
async with crawler:
result = await crawler.arun("https://example.com")
assert result.success is True
assert result.url == "https://example.com"
assert len(result.cleaned_html) > 0
@pytest.mark.asyncio
@pytest.mark.unit
async def test_mock_async_web_crawler_failure_scenarios():
"""Test MockAsyncWebCrawler failure scenarios."""
crawler = MockAsyncWebCrawler()
async with crawler:
# Test invalid URL
result = await crawler.arun("https://invalid-url-test.com")
assert result.success is False
assert "Invalid URL" in result.error_message
# Test timeout URL
result = await crawler.arun("https://timeout-test.com")
assert result.success is False
assert "timeout" in result.error_message.lower()
# Test 500 error URL
result = await crawler.arun("https://status/500")
assert result.success is False
assert "500" in result.error_message
@pytest.mark.unit
def test_mock_extraction_strategy():
"""Test MockJsonCssExtractionStrategy."""
schema = {"title": "h1", "price": ".price"}
strategy = MockJsonCssExtractionStrategy(schema)
html = "<html><h1>Test</h1><span class='price'>$20</span></html>"
results = strategy.extract(html)
assert isinstance(results, list)
assert len(results) > 0
assert "title" in results[0]
assert "price" in results[0]
@pytest.mark.unit
def test_create_mock_crawler():
"""Test the create_mock_crawler factory function."""
# Test successful crawler
crawler = create_mock_crawler(success=True)
assert isinstance(crawler, MockAsyncWebCrawler)
# Test failing crawler
crawler = create_mock_crawler(success=False)
assert isinstance(crawler, MockAsyncWebCrawler)
@pytest.mark.unit
def test_create_mock_extraction_strategy():
"""Test the create_mock_extraction_strategy factory function."""
test_data = [{"title": "Test", "price": "$10"}]
strategy = create_mock_extraction_strategy(extracted_data=test_data)
assert isinstance(strategy, MockJsonCssExtractionStrategy)
results = strategy.extract("dummy html")
assert results == test_data
@pytest.mark.unit
def test_generate_test_html():
"""Test the generate_test_html utility."""
html = generate_test_html(title="Custom Title", content="Custom content")
assert "Custom Title" in html
assert "Custom content" in html
assert "<html>" in html
assert "</html>" in html
@pytest.mark.unit
def test_generate_test_schema():
"""Test the generate_test_schema utility."""
schema = generate_test_schema()
assert isinstance(schema, dict)
assert "title" in schema
assert "price" in schema
assert "description" in schema
# Test custom fields
custom_schema = generate_test_schema(["title", "link"])
assert "title" in custom_schema
assert "link" in custom_schema
assert "price" not in custom_schema
@pytest.mark.unit
def test_generate_extracted_data():
"""Test the generate_extracted_data utility."""
data = generate_extracted_data()
assert isinstance(data, list)
assert len(data) == 1
assert isinstance(data[0], dict)
assert "title" in data[0]
assert "price" in data[0]
# Test multiple items
multi_data = generate_extracted_data(num_items=3)
assert len(multi_data) == 3
@pytest.mark.unit
def test_fixture_integration(mock_crawler, sample_schema, mock_extraction_result):
"""Test that fixtures work correctly together."""
assert mock_crawler is not None
assert isinstance(sample_schema, dict)
assert isinstance(mock_extraction_result, dict)
# Test that schema has expected structure
assert "title" in sample_schema
assert "price" in sample_schema
# Test that extraction result has expected structure
assert isinstance(mock_extraction_result.get("title"), str)
@pytest.mark.asyncio
@pytest.mark.unit
async def test_mock_crawler_with_extraction_strategy():
"""Test MockAsyncWebCrawler with extraction strategy."""
crawler = MockAsyncWebCrawler()
strategy = MockJsonCssExtractionStrategy({"title": "h1"})
async with crawler:
result = await crawler.arun(
"https://example.com",
extraction_strategy=strategy
)
assert result.success is True
assert len(result.extracted_content) > 0
assert isinstance(result.extracted_content[0], dict)
@pytest.mark.unit
def test_all_mock_classes_instantiate():
"""Test that all mock classes can be instantiated without errors."""
# This is a smoke test to ensure no import or instantiation issues
result = MockCrawlResult()
crawler = MockAsyncWebCrawler()
strategy = MockJsonCssExtractionStrategy({})
assert result is not None
assert crawler is not None
assert strategy is not None