Skip to main content
Glama

Scrapy MCP Server

by ThreeFish-AI
conftest.py3.82 kB
"""Pytest configuration and shared fixtures.""" import pytest import asyncio import tempfile from unittest.mock import Mock, AsyncMock from extractor.config import DataExtractorSettings from extractor.scraper import WebScraper from extractor.advanced_features import AntiDetectionScraper, FormHandler @pytest.fixture(scope="session") def event_loop(): """Create an instance of the default event loop for the test session.""" loop = asyncio.get_event_loop_policy().new_event_loop() yield loop loop.close() @pytest.fixture def test_config(): """Test configuration with safe defaults.""" return DataExtractorSettings( server_name="Test Data Extractor", server_version="1.0.0-test", enable_javascript=False, use_random_user_agent=False, concurrent_requests=1, browser_timeout=10, max_retries=2, ) @pytest.fixture def mock_web_scraper(): """Mock WebScraper for testing.""" scraper = Mock(spec=WebScraper) scraper.scrape_url = AsyncMock() scraper.scrape_multiple_urls = AsyncMock() return scraper @pytest.fixture def mock_anti_detection_scraper(): """Mock AntiDetectionScraper for testing.""" scraper = Mock(spec=AntiDetectionScraper) scraper.scrape_with_stealth = AsyncMock() return scraper @pytest.fixture def mock_form_handler(): """Mock FormHandler for testing.""" handler = Mock(spec=FormHandler) handler.fill_and_submit_form = AsyncMock() return handler @pytest.fixture def sample_html(): """Sample HTML content for testing.""" return """ <!DOCTYPE html> <html> <head> <title>Test Page</title> </head> <body> <h1>Test Heading</h1> <div class="content"> <p>Test paragraph 1</p> <p>Test paragraph 2</p> </div> <ul class="list"> <li>Item 1</li> <li>Item 2</li> <li>Item 3</li> </ul> <a href="https://example.com">Test Link</a> <form id="test-form"> <input type="text" name="username" /> <input type="password" name="password" /> <button type="submit">Submit</button> </form> </body> </html> """ @pytest.fixture def sample_extraction_config(): """Sample extraction configuration for testing.""" return { "title": "title", "heading": "h1", "content": {"selector": ".content p", "multiple": True, "attr": "text"}, "links": {"selector": "a", "multiple": True, "attr": "href"}, } @pytest.fixture def sample_scrape_result(): """Sample scrape result for testing.""" return { "url": "https://example.com", "status_code": 200, "title": "Test Page", "content": "Test content", "extracted_data": { "title": "Test Page", "heading": "Test Heading", "content": ["Test paragraph 1", "Test paragraph 2"], "links": ["https://example.com"], }, "metadata": { "content_length": 500, "response_time": 1.5, "final_url": "https://example.com", "content_type": "text/html", }, } @pytest.fixture def temp_cache_dir(): """Temporary directory for cache testing.""" with tempfile.TemporaryDirectory() as temp_dir: yield temp_dir @pytest.fixture def mock_http_response(): """Mock HTTP response for testing.""" mock_response = Mock() mock_response.status_code = 200 mock_response.text = """ <html> <head><title>Mock Page</title></head> <body><h1>Mock Content</h1></body> </html> """ mock_response.headers = {"content-type": "text/html"} mock_response.url = "https://example.com" return mock_response

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ThreeFish-AI/scrapy-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server