Skip to main content
Glama

PDF Knowledgebase MCP Server

by juanqui
pyproject.toml7.61 kB
[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "pdfkb-mcp" dynamic = ["version"] description = "A Model Context Protocol server for managing PDF documents with vector search capabilities" readme = "README.md" license = "MIT" authors = [ { name = "Juan Villa", email = "juanqui@villafam.com" } ] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Indexing", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] keywords = [ "mcp", "pdf", "vector-search", "embeddings", "knowledge-base", "ai", "openai", "chroma", ] requires-python = ">=3.10" dependencies = [ # Core MCP runtime "fastmcp>=2.12.3", # Vector database "chromadb>=1.0.15", # OpenAI API "openai>=1.99.1", # File system monitoring "watchdog>=6.0.0", # Environment configuration "python-dotenv>=0.9.9", # Web interface core dependencies "fastapi>=0.104.0", "hypercorn>=0.17.0", # ASGI server with excellent websockets 14+ support "python-multipart>=0.0.6", "websockets>=12.0", "pydantic>=2.5.0", # Default parser and chunker dependencies for OOTB experience "pymupdf4llm>=0.0.27", "PyMuPDF>=1.26.3", "langchain-text-splitters>=0.3.9", # Local embeddings support (default) "torch>=2.0.0", "transformers>=4.55.0", "accelerate>=0.24.0", "safetensors>=0.4.0", "huggingface-hub>=0.19.0", "gguf>=0.17.0", # Hybrid search support (included by default) "whoosh>=2.7.4", # HTTP client for DeepInfra API "aiohttp>=3.9.0", ] [project.optional-dependencies] hybrid = [ "whoosh>=2.7.4", ] local-embeddings = [ "torch>=2.0.0", # For MPS support and torch.compile "transformers>=4.55.0", # For model support "accelerate>=0.24.0", # For device management "safetensors>=0.4.0", # For safe model loading "huggingface-hub>=0.19.0", # For model downloading ] dev = [ # Testing "pytest>=8.0.0", "pytest-asyncio>=1.1.0", "pytest-cov>=6.0.0", "pytest-mock>=3.14.0", # Code quality "black>=24.8.0", "isort>=6.0.0", "flake8>=7.3.0", "mypy>=1.17.0", # Type stubs "types-aiofiles", # Development utilities "pre-commit>=4.2.0", "bump2version>=1.0.0", ] unstructured = [ "unstructured[pdf]>=0.18.11", ] pymupdf4llm = [ "pymupdf4llm>=0.0.27", ] langchain = [ "langchain-text-splitters>=0.3.9", ] semantic = [ "langchain-experimental>=0.3.0", "langchain-text-splitters>=0.3.9", ] mineru = [ "mineru[pipeline]>=2.1.10", ] marker = [ "marker-pdf>=1.8.3", ] docling = [ "docling>=2.43.0", ] docling-complete = [ "docling>=2.43.0", "easyocr>=1.7.2", "pytesseract>=0.3.13", "rapidocr-onnxruntime>=1.2.3", ] llm = [ "httpx>=0.28.1", "PyMuPDF>=1.26.3", "Jinja2>=3.1.6", "Pillow>=10.1.0", ] unstructured_chunker = [ "unstructured>=0.18.11", ] web = [ # Enhanced web server monitoring and performance "psutil>=6.0.0", # System metrics for /api/metrics endpoint "starlette>=0.27.0", # FastAPI dependency (explicit for clarity) ] all = [ # Include all optional dependency groups "pdfkb-mcp[dev]", "pdfkb-mcp[web]", "pdfkb-mcp[hybrid]", "pdfkb-mcp[unstructured]", "pdfkb-mcp[pymupdf4llm]", "pdfkb-mcp[langchain]", "pdfkb-mcp[mineru]", "pdfkb-mcp[marker]", "pdfkb-mcp[docling-complete]", "pdfkb-mcp[llm]", "pdfkb-mcp[unstructured_chunker]", ] [project.urls] Homepage = "https://github.com/juanqui/pdfkb-mcp" Documentation = "https://github.com/juanqui/pdfkb-mcp#readme" Repository = "https://github.com/juanqui/pdfkb-mcp" Issues = "https://github.com/juanqui/pdfkb-mcp/issues" [project.scripts] pdfkb-mcp = "pdfkb.main:main" [tool.hatch.build.targets.wheel] packages = ["src/pdfkb"] only-include = ["src/pdfkb", "src/webui"] [tool.hatch.version] path = "src/pdfkb/__init__.py" [tool.hatch.build.targets.sdist] include = [ "/src/pdfkb", "/src/webui", "/tests", "/docs", "/README.md", "/pyproject.toml", ] # Black configuration [tool.black] line-length = 120 target-version = ['py310'] include = '\.pyi?$' extend-exclude = ''' /( # directories \.eggs | \.git | \.hg | \.mypy_cache | \.tox | \.venv | build | dist )/ ''' # isort configuration [tool.isort] profile = "black" line_length = 120 multi_line_output = 3 include_trailing_comma = true force_grid_wrap = 0 use_parentheses = true ensure_newline_before_comments = true # flake8 configuration [tool.flake8] max-line-length = 120 extend-ignore = ["E203", "W503"] # mypy configuration [tool.mypy] python_version = "3.11" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true disallow_incomplete_defs = true check_untyped_defs = true disallow_untyped_decorators = true no_implicit_optional = true warn_redundant_casts = true warn_unused_ignores = true warn_no_return = true warn_unreachable = true strict_equality = true [[tool.mypy.overrides]] module = [ "chromadb.*", "unstructured.*", "fastmcp.*", "mcp.*", "hypercorn.*", "websockets.*", "psutil.*", ] ignore_missing_imports = true # pytest configuration [tool.pytest.ini_options] minversion = "7.0" addopts = "-ra -q --strict-markers --strict-config" asyncio_mode = "auto" testpaths = ["tests"] python_files = ["test_*.py", "*_test.py"] python_classes = ["Test*"] python_functions = ["test_*"] markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "integration: marks tests as integration tests", "unit: marks tests as unit tests", "performance: marks tests as performance tests", "asyncio: marks tests as asyncio tests", ] # Coverage configuration [tool.coverage.run] source = ["src/pdfkb"] omit = [ "*/tests/*", "*/test_*", "*/__pycache__/*", ] [tool.coverage.report] exclude_lines = [ "pragma: no cover", "def __repr__", "if self.debug:", "if settings.DEBUG", "raise AssertionError", "raise NotImplementedError", "if 0:", "if __name__ == .__main__.:", "class .*\\bProtocol\\):", "@(abc\\.)?abstractmethod", ] [tool.coverage.html] directory = "htmlcov" # Hatch environment configuration [tool.hatch.envs.default] dependencies = [ "pytest", "pytest-asyncio", "pytest-cov", "pytest-mock", "black", "isort", "mypy", "flake8", # Web interface testing dependencies "httpx>=0.25.0", "psutil>=6.0.0", # Common optional dependencies needed by tests "unstructured[pdf]>=0.18.11", "pymupdf4llm>=0.0.27", "langchain-text-splitters>=0.3.9", "docling>=2.43.0", "easyocr>=1.7.2", "reportlab>=4.4.0", "whoosh>=2.7.4", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" test-cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=src/pdfkb {args:tests}" cov-html = "pytest --cov-report=html --cov-config=pyproject.toml --cov=src/pdfkb {args:tests}" format = [ "black src tests", "isort src tests", ] lint = [ "black --check --diff src tests", "isort --check-only --diff src tests", "flake8 src tests", # "mypy src", # Temporarily disabled - needs type annotation fixes ]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/juanqui/pdfkb-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server