pyproject.toml•3.8 kB
[build-system]
requires = ["setuptools>=45", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["."]
include = ["extractor*"]
exclude = ["tests*", "reports*", "htmlcov*", "scripts*"]
[project]
name = "data-extractor"
version = "0.1.5"
description = "A robust web scraping MCP server based on Scrapy and FastMCP"
readme = "README.md"
requires-python = ">=3.12"
authors = [{ name = "Aurelius", email = "aureliusshu@gmail.com" }]
license = "MIT"
keywords = ["mcp", "scraping", "scrapy", "web-crawler"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
dependencies = [
"fastmcp>=2.11.0",
"scrapy>=2.11.0",
"aiohttp>=3.9.0",
"beautifulsoup4>=4.12.0",
"lxml>=5.0.0",
"requests>=2.32.0",
"selenium>=4.20.0",
"playwright>=1.45.0",
"undetected-chromedriver>=3.5.0",
"fake-useragent>=1.5.0",
"python-dotenv>=1.0.0",
"pydantic>=2.8.0",
"pydantic-settings>=2.0.0",
"httpx>=0.27.0",
"twisted>=24.7.0",
"ruff>=0.12.12",
"markitdown[all]>=0.0.1a2",
"pypdf>=5.0.0",
"pymupdf>=1.26.4",
]
[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.23.0",
"pytest-cov>=6.0.0",
"pytest-html>=4.1.0",
"pytest-json-report>=1.5.0",
"pytest-mock>=3.14.0",
"pytest-xdist>=3.6.0",
"mypy>=1.10.0",
"pre-commit>=3.8.0",
"coverage[toml]>=7.6.0",
]
[project.scripts]
data-extractor = "extractor.server:main"
[tool.pytest.ini_options]
minversion = "8.0"
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
testpaths = ["tests"]
addopts = [
"-ra",
"--strict-config",
"--strict-markers",
"--disable-warnings",
"--tb=short",
"-v",
"--cov=extractor",
"--cov-report=html:htmlcov",
"--cov-report=term-missing",
"--cov-report=xml:coverage.xml",
"--cov-report=json:coverage.json",
"--html=tests/reports/test-report.html",
"--self-contained-html",
"--json-report",
"--json-report-file=tests/reports/test-results.json"
]
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks tests as integration tests",
"unit: marks tests as unit tests",
"requires_network: marks tests that require network access",
"requires_browser: marks tests that require browser setup"
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
log_cli = true
log_cli_level = "INFO"
log_cli_format = "%(asctime)s [%(levelname)8s] %(name)s: %(message)s"
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
[tool.coverage.run]
source = ["extractor"]
omit = [
"extractor/__init__.py",
"tests/*",
"venv/*",
".venv/*",
"setup.py"
]
branch = true
parallel = true
[tool.coverage.report]
show_missing = true
skip_covered = false
sort = "Cover"
exclude_lines = [
"pragma: no cover",
"def __repr__",
"if self.debug:",
"if settings.DEBUG",
"raise AssertionError",
"raise NotImplementedError",
"if 0:",
"if __name__ == .__main__.:",
"class .*\\bProtocol\\):",
"@(abc\\.)?abstractmethod",
]
[tool.coverage.html]
directory = "htmlcov"
title = "Data Extractor Test Coverage Report"
[tool.coverage.xml]
output = "tests/reports/coverage.xml"
[tool.coverage.json]
output = "tests/reports/coverage.json"
[tool.mypy]
python_version = "3.12"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
[tool.uv]
dev-dependencies = [
"pytest>=8.0.0",
"pytest-asyncio>=0.23.0",
"mypy>=1.10.0",
"pre-commit>=3.8.0",
"types-requests>=2.32.4.20250809",
]