Skip to main content
Glama

Scrapy MCP Server

by ThreeFish-AI
test_config.py13.5 kB
""" 单元测试:配置管理模块 测试 extractor.config 模块的配置加载、验证和使用 """ import pytest import os from unittest.mock import patch, Mock from pydantic import ValidationError from extractor.config import DataExtractorSettings, settings class TestDataExtractorSettings: """测试配置设置类""" def test_default_settings(self): """测试默认配置值""" config = DataExtractorSettings() # 测试默认值(考虑环境变量覆盖) assert config.server_name == "data-extractor" assert config.server_version == "0.1.5" assert config.enable_javascript is False assert config.concurrent_requests == 16 assert config.request_timeout == 30.0 assert config.rate_limit_requests_per_minute == 60 assert config.max_retries == 3 assert config.retry_delay == 1.0 assert config.cache_ttl_hours == 24 assert config.browser_timeout == 30 assert config.browser_headless is True def test_environment_variable_loading(self): """测试环境变量加载""" env_vars = { "DATA_EXTRACTOR_SERVER_NAME": "Custom Server", "DATA_EXTRACTOR_ENABLE_JAVASCRIPT": "true", "DATA_EXTRACTOR_CONCURRENT_REQUESTS": "32", "DATA_EXTRACTOR_REQUEST_TIMEOUT": "60.0", "DATA_EXTRACTOR_RATE_LIMIT_REQUESTS_PER_MINUTE": "120", "DATA_EXTRACTOR_MAX_RETRIES": "5", "DATA_EXTRACTOR_BROWSER_HEADLESS": "false", } with patch.dict(os.environ, env_vars): config = DataExtractorSettings() assert config.server_name == "Custom Server" assert config.enable_javascript is True assert config.concurrent_requests == 32 assert config.request_timeout == 60.0 assert config.rate_limit_requests_per_minute == 120 assert config.max_retries == 5 assert config.browser_headless is False def test_boolean_environment_variables(self): """测试布尔型环境变量的解析""" # 测试各种布尔值表示 boolean_test_cases = [ ("true", True), ("True", True), ("TRUE", True), ("1", True), ("yes", True), ("false", False), ("False", False), ("FALSE", False), ("0", False), ("no", False), ] for env_value, expected_value in boolean_test_cases: with patch.dict( os.environ, {"DATA_EXTRACTOR_ENABLE_JAVASCRIPT": env_value} ): config = DataExtractorSettings() assert config.enable_javascript is expected_value, ( f"Failed for {env_value}" ) def test_numeric_validation(self): """测试数值型配置的验证""" # 测试有效的数值 valid_configs = [ {"concurrent_requests": 1}, {"concurrent_requests": 100}, {"request_timeout": 0.1}, {"request_timeout": 300.0}, {"rate_limit_requests_per_minute": 1}, {"max_retries": 0}, {"max_retries": 10}, ] for config_data in valid_configs: config = DataExtractorSettings(**config_data) assert config is not None # 测试无效的数值 invalid_configs = [ {"concurrent_requests": 0}, # 必须大于0 {"concurrent_requests": -1}, # 不能为负数 {"request_timeout": -1.0}, # 不能为负数 {"rate_limit_requests_per_minute": -1}, # 不能为负数 {"max_retries": -1}, # 不能为负数 ] for config_data in invalid_configs: with pytest.raises(ValidationError): DataExtractorSettings(**config_data) def test_proxy_configuration(self): """测试代理配置""" # 无代理配置 config = DataExtractorSettings(use_proxy=False) assert config.use_proxy is False assert config.proxy_url is None # 有代理配置 proxy_url = "http://proxy.example.com:8080" config = DataExtractorSettings(use_proxy=True, proxy_url=proxy_url) assert config.use_proxy is True assert config.proxy_url == proxy_url def test_user_agent_configuration(self): """测试User-Agent配置""" # 随机User-Agent config = DataExtractorSettings(use_random_user_agent=True) assert config.use_random_user_agent is True assert config.default_user_agent is not None # 固定User-Agent custom_ua = "Custom Bot 1.0" config = DataExtractorSettings( use_random_user_agent=False, default_user_agent=custom_ua ) assert config.use_random_user_agent is False assert config.default_user_agent == custom_ua def test_cache_configuration(self): """测试缓存配置""" config = DataExtractorSettings( enable_caching=True, cache_ttl_hours=48, cache_max_size=200 ) assert config.enable_caching is True assert config.cache_ttl_hours == 48 assert config.cache_max_size == 200 def test_logging_configuration(self): """测试日志配置""" config = DataExtractorSettings( log_level="DEBUG", log_requests=True, log_responses=False ) assert config.log_level == "DEBUG" assert config.log_requests is True assert config.log_responses is False def test_browser_configuration(self): """测试浏览器配置""" config = DataExtractorSettings( browser_timeout=60, browser_headless=False, browser_window_size="1920x1080" ) assert config.browser_timeout == 60 assert config.browser_headless is False assert config.browser_window_size == "1920x1080" class TestGlobalSettings: """测试全局设置实例""" def test_global_settings_instance(self): """测试全局settings实例""" assert settings is not None assert isinstance(settings, DataExtractorSettings) assert settings.server_name is not None assert settings.server_version is not None def test_settings_immutability(self): """测试设置的不可变性(一旦创建就固定)""" original_name = settings.server_name # 不应该能直接修改settings with pytest.raises(ValidationError): settings.server_name = "Modified Name" # 验证值没有被修改 assert settings.server_name == original_name @patch.dict(os.environ, {"DATA_EXTRACTOR_SERVER_NAME": "Test Server"}) def test_settings_environment_override(self): """测试环境变量覆盖设置""" # 重新导入以获取新的设置 from importlib import reload import extractor.config reload(extractor.config) assert extractor.config.settings.server_name == "Test Server" class TestConfigurationValidation: """测试配置验证逻辑""" def test_timeout_validation(self): """测试超时配置验证""" # 有效超时配置 valid_timeouts = [0.1, 1.0, 30.0, 300.0] for timeout in valid_timeouts: config = DataExtractorSettings( request_timeout=timeout, browser_timeout=int(timeout) ) assert config.request_timeout == timeout # 无效超时配置 with pytest.raises(ValidationError): DataExtractorSettings(request_timeout=-1.0) with pytest.raises(ValidationError): DataExtractorSettings(browser_timeout=-1) def test_concurrency_validation(self): """测试并发配置验证""" # 有效并发配置 valid_values = [1, 8, 16, 32, 64] for value in valid_values: config = DataExtractorSettings(concurrent_requests=value) assert config.concurrent_requests == value # 无效并发配置 invalid_values = [0, -1, -10] for value in invalid_values: with pytest.raises(ValidationError): DataExtractorSettings(concurrent_requests=value) def test_rate_limit_validation(self): """测试速率限制配置验证""" # 有效速率限制 valid_rates = [1, 60, 120, 1000] for rate in valid_rates: config = DataExtractorSettings(rate_limit_requests_per_minute=rate) assert config.rate_limit_requests_per_minute == rate # 无效速率限制 with pytest.raises(ValidationError): DataExtractorSettings(rate_limit_requests_per_minute=-1) def test_retry_configuration_validation(self): """测试重试配置验证""" # 有效重试配置 config = DataExtractorSettings(max_retries=5, retry_delay=2.0) assert config.max_retries == 5 assert config.retry_delay == 2.0 # 边界情况 config = DataExtractorSettings(max_retries=0) # 0次重试应该有效 assert config.max_retries == 0 # 无效重试配置 with pytest.raises(ValidationError): DataExtractorSettings(max_retries=-1) with pytest.raises(ValidationError): DataExtractorSettings(retry_delay=-1.0) class TestConfigurationIntegration: """测试配置集成和实际使用""" def test_scrapy_settings_generation(self): """测试Scrapy设置生成""" config = DataExtractorSettings( concurrent_requests=32, request_timeout=60.0, use_random_user_agent=True, default_user_agent="Custom Bot", ) # 验证配置可以用于Scrapy设置 assert config.concurrent_requests > 0 assert config.request_timeout > 0 assert config.default_user_agent is not None def test_browser_settings_generation(self): """测试浏览器设置生成""" config = DataExtractorSettings( browser_timeout=45, browser_headless=True, browser_window_size="1366x768" ) # 验证浏览器配置的有效性 assert config.browser_timeout > 0 assert isinstance(config.browser_headless, bool) assert "x" in config.browser_window_size # 窗口尺寸格式 def test_proxy_settings_integration(self): """测试代理设置集成""" config = DataExtractorSettings( use_proxy=True, proxy_url="http://proxy.example.com:8080" ) if config.use_proxy: assert config.proxy_url is not None assert config.proxy_url.startswith("http") def test_cache_settings_integration(self): """测试缓存设置集成""" config = DataExtractorSettings( enable_caching=True, cache_ttl_hours=12, cache_max_size=150 ) if config.enable_caching: assert config.cache_ttl_hours > 0 assert config.cache_max_size > 0 def test_logging_settings_integration(self): """测试日志设置集成""" valid_log_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] for level in valid_log_levels: config = DataExtractorSettings( log_level=level, log_requests=True, log_responses=True ) assert config.log_level == level assert isinstance(config.log_requests, bool) assert isinstance(config.log_responses, bool) class TestConfigurationEdgeCases: """测试配置边界情况和异常处理""" def test_missing_environment_variables(self): """测试环境变量缺失时的处理""" # 清空相关环境变量 env_vars_to_clear = [ "DATA_EXTRACTOR_SERVER_NAME", "DATA_EXTRACTOR_ENABLE_JAVASCRIPT", "DATA_EXTRACTOR_CONCURRENT_REQUESTS", ] with patch.dict(os.environ, {}, clear=True): config = DataExtractorSettings() # 应该使用默认值 assert config.server_name == "data-extractor" assert config.enable_javascript is False assert config.concurrent_requests == 16 def test_invalid_environment_variable_types(self): """测试无效环境变量类型处理""" invalid_env_vars = { "DATA_EXTRACTOR_CONCURRENT_REQUESTS": "not-a-number", "DATA_EXTRACTOR_REQUEST_TIMEOUT": "invalid-float", "DATA_EXTRACTOR_ENABLE_JAVASCRIPT": "maybe", } with patch.dict(os.environ, invalid_env_vars): # 应该处理无效值并使用默认值或抛出异常 with pytest.raises((ValidationError, ValueError)): DataExtractorSettings() def test_extreme_configuration_values(self): """测试极端配置值""" # 最小值 config = DataExtractorSettings( concurrent_requests=1, request_timeout=0.1, rate_limit_requests_per_minute=1, max_retries=0, retry_delay=0.1, ) assert config is not None # 大值(但合理) config = DataExtractorSettings( concurrent_requests=1000, request_timeout=3600.0, rate_limit_requests_per_minute=10000, max_retries=100, retry_delay=60.0, ) assert config is not None

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ThreeFish-AI/scrapy-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server