Skip to main content
Glama

Scrapy MCP Server

by ThreeFish-AI
pyproject.toml3.8 kB
[build-system] requires = ["setuptools>=45", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = ["."] include = ["extractor*"] exclude = ["tests*", "reports*", "htmlcov*", "scripts*"] [project] name = "data-extractor" version = "0.1.5" description = "A robust web scraping MCP server based on Scrapy and FastMCP" readme = "README.md" requires-python = ">=3.12" authors = [{ name = "Aurelius", email = "aureliusshu@gmail.com" }] license = "MIT" keywords = ["mcp", "scraping", "scrapy", "web-crawler"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] dependencies = [ "fastmcp>=2.11.0", "scrapy>=2.11.0", "aiohttp>=3.9.0", "beautifulsoup4>=4.12.0", "lxml>=5.0.0", "requests>=2.32.0", "selenium>=4.20.0", "playwright>=1.45.0", "undetected-chromedriver>=3.5.0", "fake-useragent>=1.5.0", "python-dotenv>=1.0.0", "pydantic>=2.8.0", "pydantic-settings>=2.0.0", "httpx>=0.27.0", "twisted>=24.7.0", "ruff>=0.12.12", "markitdown[all]>=0.0.1a2", "pypdf>=5.0.0", "pymupdf>=1.26.4", ] [project.optional-dependencies] dev = [ "pytest>=8.0.0", "pytest-asyncio>=0.23.0", "pytest-cov>=6.0.0", "pytest-html>=4.1.0", "pytest-json-report>=1.5.0", "pytest-mock>=3.14.0", "pytest-xdist>=3.6.0", "mypy>=1.10.0", "pre-commit>=3.8.0", "coverage[toml]>=7.6.0", ] [project.scripts] data-extractor = "extractor.server:main" [tool.pytest.ini_options] minversion = "8.0" python_files = ["test_*.py", "*_test.py"] python_classes = ["Test*"] python_functions = ["test_*"] testpaths = ["tests"] addopts = [ "-ra", "--strict-config", "--strict-markers", "--disable-warnings", "--tb=short", "-v", "--cov=extractor", "--cov-report=html:htmlcov", "--cov-report=term-missing", "--cov-report=xml:coverage.xml", "--cov-report=json:coverage.json", "--html=tests/reports/test-report.html", "--self-contained-html", "--json-report", "--json-report-file=tests/reports/test-results.json" ] markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "integration: marks tests as integration tests", "unit: marks tests as unit tests", "requires_network: marks tests that require network access", "requires_browser: marks tests that require browser setup" ] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" log_cli = true log_cli_level = "INFO" log_cli_format = "%(asctime)s [%(levelname)8s] %(name)s: %(message)s" log_cli_date_format = "%Y-%m-%d %H:%M:%S" [tool.coverage.run] source = ["extractor"] omit = [ "extractor/__init__.py", "tests/*", "venv/*", ".venv/*", "setup.py" ] branch = true parallel = true [tool.coverage.report] show_missing = true skip_covered = false sort = "Cover" exclude_lines = [ "pragma: no cover", "def __repr__", "if self.debug:", "if settings.DEBUG", "raise AssertionError", "raise NotImplementedError", "if 0:", "if __name__ == .__main__.:", "class .*\\bProtocol\\):", "@(abc\\.)?abstractmethod", ] [tool.coverage.html] directory = "htmlcov" title = "Data Extractor Test Coverage Report" [tool.coverage.xml] output = "tests/reports/coverage.xml" [tool.coverage.json] output = "tests/reports/coverage.json" [tool.mypy] python_version = "3.12" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true [tool.uv] dev-dependencies = [ "pytest>=8.0.0", "pytest-asyncio>=0.23.0", "mypy>=1.10.0", "pre-commit>=3.8.0", "types-requests>=2.32.4.20250809", ]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ThreeFish-AI/scrapy-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server