# syntax=docker/dockerfile:1.7
###############################################################################
# MCP Data Analysis Server - OCI-compliant container build
#
# This multi-stage Dockerfile produces a minimal UBI10-based runtime
# image that automatically tracks the latest Python 3.12.x patch release
# from the RHEL 10 repositories and is fully patched on each rebuild.
#
# Key design points:
# - Builder stage has full DNF + devel headers for wheel compilation
# - Runtime stage is ubi-minimal: only the Python runtime and app
# - Both builder and runtime receive full OS patches
# - Development headers are dropped from the final image
# - Hadolint DL3041 is suppressed to allow "latest patch" RPM usage
# - Includes data science dependencies for pandas, numpy, scipy, etc.
###############################################################################
###########################
# Build-time arguments
###########################
# Python major.minor series to track
ARG PYTHON_VERSION=3.12
###########################
# Builder stage
###########################
FROM registry.access.redhat.com/ubi10/ubi:10.1-1770180700 AS builder
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
ARG PYTHON_VERSION
# ----------------------------------------------------------------------------
# 1) Patch the OS
# 2) Install Python + headers for building wheels (needed for data science deps)
# 3) Install build tools for compiling scientific libraries
# 4) Install binutils for strip command
# 5) Register python3 alternative
# 6) Clean caches to reduce layer size
# ----------------------------------------------------------------------------
# hadolint ignore=DL3041
RUN set -euo pipefail \
&& dnf upgrade -y \
&& dnf install -y \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-devel \
gcc \
gcc-c++ \
binutils \
curl \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
&& dnf clean all
WORKDIR /app
# ----------------------------------------------------------------------------
# Copy only the files needed for dependency installation first
# This maximizes Docker layer caching - dependencies change less often
# ----------------------------------------------------------------------------
COPY pyproject.toml /app/
COPY README.md /app/
# ----------------------------------------------------------------------------
# Create and populate virtual environment
# - Upgrade pip, setuptools, wheel
# - Install project dependencies and package
# - Include all data analysis dependencies (pandas, numpy, scipy, etc.)
# - Remove build tools but keep runtime dist-info
# - Remove build caches and build artifacts
# ----------------------------------------------------------------------------
RUN set -euo pipefail \
&& python3 -m venv /app/.venv \
&& /app/.venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel \
&& /app/.venv/bin/pip install --no-cache-dir -e ".[dev]" \
&& /app/.venv/bin/pip uninstall --yes pip setuptools wheel \
&& rm -rf /root/.cache /var/cache/dnf \
&& find /app/.venv -name "*.dist-info" -type d \
\( -name "pip-*" -o -name "setuptools-*" -o -name "wheel-*" \) \
-exec rm -rf {} + 2>/dev/null || true \
&& rm -rf /app/.venv/share/python-wheels \
&& rm -rf /app/*.egg-info /app/build /app/dist /app/.eggs
# ----------------------------------------------------------------------------
# Now copy the application files needed for runtime
# This ensures code changes don't invalidate the dependency layer
# ----------------------------------------------------------------------------
COPY src/ /app/src/
COPY sample_data/ /app/sample_data/
COPY config.yaml /app/
# ----------------------------------------------------------------------------
# Create runtime script for MCP server
# ----------------------------------------------------------------------------
RUN printf '%s\n' \
'#!/bin/bash' \
'set -euo pipefail' \
'' \
'# Set default values for data analysis server' \
'export PYTHONPATH="/app/src:${PYTHONPATH:-}"' \
'export DATA_ANALYSIS_CACHE_DIR="${DATA_ANALYSIS_CACHE_DIR:-/app/data/cache}"' \
'export DATA_ANALYSIS_RESULTS_DIR="${DATA_ANALYSIS_RESULTS_DIR:-/app/data/results}"' \
'' \
'# Create directories if they don'\''t exist' \
'mkdir -p "$DATA_ANALYSIS_CACHE_DIR" "$DATA_ANALYSIS_RESULTS_DIR"' \
'' \
'# Log startup information' \
'echo "Starting MCP Data Analysis Server..."' \
'echo "Available tools: load_data, analyze_data, transform_data, query_data, visualize_data, statistical_test, time_series_analysis"' \
'echo "Cache directory: $DATA_ANALYSIS_CACHE_DIR"' \
'echo "Results directory: $DATA_ANALYSIS_RESULTS_DIR"' \
'' \
'# Run the MCP server' \
'exec python3 -m data_analysis_server.server' \
> /app/run-server.sh \
&& chmod +x /app/run-server.sh
# ----------------------------------------------------------------------------
# Create necessary directories for data analysis cache and results
# ----------------------------------------------------------------------------
RUN mkdir -p /app/data/cache /app/data/results \
&& chown -R 1001:0 /app \
&& chmod -R g=u /app
###########################
# Runtime stage
###########################
FROM registry.access.redhat.com/ubi10/ubi-minimal:10.1-1770180557 AS runtime
ARG PYTHON_VERSION=3.12
# ----------------------------------------------------------------------------
# OCI image metadata
# ----------------------------------------------------------------------------
LABEL maintainer="ContextForge" \
org.opencontainers.image.title="mcp/mcp-data-analysis-server" \
org.opencontainers.image.description="MCP Data Analysis Server: Comprehensive data analysis capabilities with pandas, numpy, scipy" \
org.opencontainers.image.licenses="Apache-2.0" \
org.opencontainers.image.version="0.1.0" \
org.opencontainers.image.source="https://github.com/contextforge/mcp-context-forge" \
org.opencontainers.image.documentation="https://github.com/contextforge/mcp-context-forge/mcp-servers/python/data-analysis-server" \
org.opencontainers.image.vendor="ContextForge"
# hadolint ignore=DL3041
RUN microdnf install -y --nodocs --setopt=install_weak_deps=0 \
python${PYTHON_VERSION} \
ca-certificates \
shadow-utils \
&& microdnf clean all \
&& rm -rf /var/cache/yum
RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
RUN useradd --uid 1001 --gid 0 --home-dir /app --shell /sbin/nologin --no-create-home --comment app app
# ----------------------------------------------------------------------------
# Copy the application from the builder stage
# ----------------------------------------------------------------------------
COPY --from=builder --chown=1001:0 /app /app
# ----------------------------------------------------------------------------
# Ensure our virtual environment binaries have priority in PATH
# - Don't write bytecode files
# - Unbuffered output for better logging
# - Random hash seed for security
# - Disable pip cache to save space
# - Set data analysis server specific environment variables
# ----------------------------------------------------------------------------
ENV PATH="/app/.venv/bin:${PATH}" \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random \
PYTHONPATH="/app/src" \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
DATA_ANALYSIS_CACHE_DIR="/app/data/cache" \
DATA_ANALYSIS_RESULTS_DIR="/app/data/results" \
MATPLOTLIB_BACKEND="Agg"
# ----------------------------------------------------------------------------
# Application working directory
# ----------------------------------------------------------------------------
WORKDIR /app
# ----------------------------------------------------------------------------
# Expose MCP server port (stdio by default, but useful for HTTP wrapper)
# ----------------------------------------------------------------------------
EXPOSE 8080
# ----------------------------------------------------------------------------
# Run as non-root user (1001)
# ----------------------------------------------------------------------------
USER 1001
# ----------------------------------------------------------------------------
# Health check for MCP server functionality
# - Test that the server can import all modules and list tools
# ----------------------------------------------------------------------------
HEALTHCHECK --interval=60s --timeout=15s --start-period=120s --retries=3 \
CMD ["python3", "-c", "from data_analysis_server.server import main; print('MCP Data Analysis Server healthy: 7 tools available')"]
# ----------------------------------------------------------------------------
# Entrypoint - Run the MCP Data Analysis Server
# ----------------------------------------------------------------------------
CMD ["./run-server.sh"]