Skip to main content
Glama

PDF Knowledgebase MCP Server

by juanqui
Dockerfile6.4 kB
# Multi-stage Dockerfile for pdfkb-mcp MCP Server # Optimized for size, security, and performance # Base image: python:3.11-slim with CPU-only PyTorch for optimal size/compatibility balance # Build arguments for customization ARG PYTHON_VERSION=3.11 ARG PDFKB_VERSION=latest # ============================================================================ # Stage 1: Builder - Install build dependencies and compile packages # ============================================================================ FROM python:${PYTHON_VERSION}-slim AS builder # Build arguments ARG TARGETPLATFORM ARG BUILDPLATFORM # Install build dependencies in a single layer RUN apt-get update && apt-get install -y --no-install-recommends \ # Build essentials build-essential \ gcc \ g++ \ # For some Python packages that need compilation python3-dev \ # Git for VCS dependencies git \ # SSL certificates for downloads ca-certificates \ # For some native dependencies pkg-config \ # Cleanup cache && rm -rf /var/lib/apt/lists/* # Set up Python environment ENV PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 # Create build directory WORKDIR /build # Copy only requirements first for better caching COPY pyproject.toml . COPY src/ src/ COPY README.md . # Install UV (Rust-based Python package installer) for faster builds RUN pip install --no-cache-dir --upgrade uv # Install CPU-only PyTorch first to avoid CUDA dependencies RUN uv pip install --system --no-cache \ --index-url https://download.pytorch.org/whl/cpu \ torch torchvision torchaudio # Install the package with remaining dependencies from pyproject.toml # PyTorch is now already installed with CPU-only support RUN uv pip install --system --no-cache -e . \ && pip uninstall -y pip setuptools wheel uv # Remove build tools to save space # ============================================================================ # Stage 2: Runtime - Minimal production image # ============================================================================ FROM python:${PYTHON_VERSION}-slim AS runtime # Build arguments ARG PYTHON_VERSION ARG PDFKB_VERSION ARG BUILD_DATE ARG VCS_REF # Add labels for metadata LABEL org.opencontainers.image.title="pdfkb-mcp" \ org.opencontainers.image.description="PDF Knowledgebase MCP Server - Document search with vector embeddings" \ org.opencontainers.image.version="${PDFKB_VERSION}" \ org.opencontainers.image.created="${BUILD_DATE}" \ org.opencontainers.image.revision="${VCS_REF}" \ org.opencontainers.image.source="https://github.com/juanqui/pdfkb-mcp" \ org.opencontainers.image.licenses="MIT" \ org.opencontainers.image.authors="Juan Villa <juanqui@villafam.com>" # Install only runtime system dependencies with aggressive cleanup RUN apt-get update && apt-get install -y --no-install-recommends \ # Essential runtime libraries libc6 \ libgcc-s1 \ libstdc++6 \ # For HTTP health checks curl \ # SSL/TLS certificates ca-certificates \ # Aggressive cleanup to minimize image size && rm -rf /var/lib/apt/lists/* \ && rm -rf /var/cache/apt/archives/* \ && rm -rf /var/cache/apt/archives/partial/* \ && rm -rf /var/log/apt/* \ && rm -rf /var/log/dpkg.log \ && rm -rf /root/.cache \ && find /usr/local -name "*.pyc" -delete \ && find /usr/local -name "__pycache__" -type d -exec rm -rf {} + || true # Create non-root user for security RUN groupadd -r -g 1001 pdfkb && \ useradd -r -g pdfkb -u 1001 -m -s /bin/false pdfkb # Set up Python environment ENV PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONPATH=/app/src \ PATH="/home/pdfkb/.local/bin:$PATH" # Application directories ENV PDFKB_APP_DIR=/app \ PDFKB_KNOWLEDGEBASE_PATH=/app/documents \ PDFKB_CACHE_DIR=/app/cache \ PDFKB_LOG_DIR=/app/logs \ PDFKB_CONFIG_DIR=/app/config # Create application directories with proper ownership RUN mkdir -p ${PDFKB_APP_DIR} \ ${PDFKB_KNOWLEDGEBASE_PATH} \ ${PDFKB_CACHE_DIR} \ ${PDFKB_LOG_DIR} \ ${PDFKB_CONFIG_DIR} \ /home/pdfkb/.local/bin && \ chown -R pdfkb:pdfkb ${PDFKB_APP_DIR} /home/pdfkb # Switch to non-root user USER pdfkb # Set working directory WORKDIR ${PDFKB_APP_DIR} # Copy Python packages and installed libraries from builder stage COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages COPY --from=builder /usr/local/bin /usr/local/bin # Copy application source code COPY --chown=pdfkb:pdfkb src/ src/ COPY --chown=pdfkb:pdfkb pyproject.toml . COPY --chown=pdfkb:pdfkb README.md . # Final cleanup to minimize image size USER root RUN rm -rf /root/.cache \ && rm -rf /tmp/* \ && find /usr/local -name "*.pyc" -delete \ && find /usr/local -name "__pycache__" -type d -exec rm -rf {} + || true # Copy entrypoint script COPY --chown=pdfkb:pdfkb docker-entrypoint.sh /app/docker-entrypoint.sh # Make entrypoint executable USER root RUN chmod +x /app/docker-entrypoint.sh USER pdfkb # Default environment variables for container deployment # Models are downloaded dynamically on first use (not pre-installed) ENV PDFKB_EMBEDDING_PROVIDER=local \ PDFKB_LOCAL_EMBEDDING_MODEL="Qwen/Qwen3-Embedding-0.6B" \ PDFKB_ENABLE_HYBRID_SEARCH=true \ PDFKB_TRANSPORT=http \ PDFKB_SERVER_HOST=0.0.0.0 \ PDFKB_SERVER_PORT=8000 \ PDFKB_WEB_ENABLE=false \ PDFKB_LOG_LEVEL=INFO \ PDFKB_MAX_PARALLEL_PARSING=1 \ PDFKB_MAX_PARALLEL_EMBEDDING=1 \ PDFKB_BACKGROUND_QUEUE_WORKERS=2 \ PDFKB_PDF_PARSER=pymupdf4llm \ PDFKB_DOCUMENT_CHUNKER=langchain \ PDFKB_MODEL_CACHE_DIR=/app/cache/models # Expose default ports # 8000: Unified web + mcp port EXPOSE 8000 # Health check for container orchestration HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -f http://localhost:${PDFKB_SERVER_PORT}/health || exit 1 # Volume mount points for data persistence VOLUME ["${PDFKB_KNOWLEDGEBASE_PATH}", "${PDFKB_CACHE_DIR}", "${PDFKB_LOG_DIR}"] # Set entrypoint ENTRYPOINT ["/app/docker-entrypoint.sh"] # Default command - can be overridden CMD ["pdfkb-mcp", "--transport", "http"]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/juanqui/pdfkb-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server