# NornicDB AMD64 CUDA
# Uses pre-built llama-cuda-libs for fast builds (~2 min vs ~15 min)
#
# Prereq (one-time): docker build -f docker/Dockerfile.llama-cuda -t timothyswt/llama-cuda-libs:7285 .
#
# Build: docker build -f docker/Dockerfile.amd64-cuda -t nornicdb-amd64-cuda .
# With BGE model embedded:
# docker build -f docker/Dockerfile.amd64-cuda --build-arg EMBED_MODEL=true -t nornicdb-amd64-cuda-bge .
# Headless (no UI):
# docker build -f docker/Dockerfile.amd64-cuda --build-arg HEADLESS=true -t nornicdb-amd64-cuda-headless .
ARG LLAMA_CUDA_IMAGE=timothyswt/llama-cuda-libs:7285
# =============================================================================
# Stage 1: UI (skipped in headless mode)
# =============================================================================
FROM node:20-alpine AS ui
ARG HEADLESS=false
WORKDIR /ui
COPY ui/package*.json ./
RUN if [ "$HEADLESS" != "true" ]; then \
npm ci 2>/dev/null || npm install --legacy-peer-deps; \
fi
COPY ui/ .
RUN if [ "$HEADLESS" != "true" ]; then \
npm run build; \
else \
mkdir -p dist && echo "Headless mode - UI skipped" > dist/HEADLESS; \
fi
# =============================================================================
# Stage 2: Pre-built CUDA libs
# =============================================================================
FROM ${LLAMA_CUDA_IMAGE} AS llama
# =============================================================================
# Stage 3: Go build
# =============================================================================
FROM nvidia/cuda:12.6.3-devel-ubuntu22.04 AS builder
ARG HEADLESS=false
ENV GO_VERSION=1.25.5
RUN apt-get update && apt-get install -y wget git gcc g++ libgomp1 && \
wget -q https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && \
tar -C /usr/local -xzf go${GO_VERSION}.linux-amd64.tar.gz && rm go*.tar.gz
ENV PATH="/usr/local/go/bin:${PATH}" CUDA_HOME=/usr/local/cuda
WORKDIR /build
# Copy llama artifacts
COPY --from=llama /output/lib/*.a /build/lib/llama/
COPY --from=llama /output/include/*.h /build/lib/llama/
# Go dependencies
COPY go.mod go.sum ./
RUN go mod download
# Source + UI (only copy UI dist if not headless)
COPY . .
COPY --from=ui /ui/dist ./ui/dist
# Build with CUDA + localllm (with or without UI based on HEADLESS arg)
RUN COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") && \
BUILD_TIME=$(date -u +%Y%m%d-%H%M%S) && \
if [ "$HEADLESS" = "true" ]; then \
echo "Building headless (no UI)..." && \
CGO_ENABLED=1 go build -tags "cuda localllm noui" \
-ldflags="-s -w -X main.buildTime=${BUILD_TIME} -X main.commit=${COMMIT_HASH}" \
-o nornicdb ./cmd/nornicdb; \
else \
echo "Building with UI..." && \
CGO_ENABLED=1 go build -tags "cuda localllm" \
-ldflags="-s -w -X main.buildTime=${BUILD_TIME} -X main.commit=${COMMIT_HASH}" \
-o nornicdb ./cmd/nornicdb; \
fi
# Build APOC plugin
RUN echo "Building APOC plugin..." && \
mkdir -p apoc/built-plugins && \
cd apoc/plugin-src/apoc && go build -buildmode=plugin -o ../../../apoc/built-plugins/apoc.so apoc_plugin.go && \
echo "✓ Built plugin:" && ls -lh /build/apoc/built-plugins/*.so
# =============================================================================
# Stage 4: Runtime
# =============================================================================
FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates tzdata wget libgomp1 && rm -rf /var/lib/apt/lists/* && \
mkdir -p /data /app/models
COPY --from=builder /build/nornicdb /app/
COPY --from=builder /build/apoc/built-plugins /app/plugins/
COPY docker/entrypoint.sh /app/entrypoint-real.sh
COPY docker/cuda-fallback-wrapper.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh /app/entrypoint-real.sh
# Model embedding: only copy when EMBED_MODEL=true (uses BuildKit mount to avoid layer bloat)
ARG EMBED_MODEL=false
ARG HEADLESS=false
RUN --mount=type=bind,source=models,target=/models,ro \
if [ "$EMBED_MODEL" = "true" ]; then \
if [ -f /models/bge-m3.gguf ]; then \
cp /models/bge-m3.gguf /app/models/ && \
echo "✓ Embedded bge-m3.gguf model ($(du -h /app/models/bge-m3.gguf | cut -f1))"; \
else \
echo "ERROR: EMBED_MODEL=true but models/bge-m3.gguf not found" && exit 1; \
fi; \
else \
echo "→ BYOM mode (no embedded model)"; \
fi && \
if [ "$HEADLESS" = "true" ]; then \
echo "✓ Headless mode enabled (no UI)"; \
fi
EXPOSE 7474 7687
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
CMD wget --spider -q http://localhost:7474/health || exit 1
# Set NORNICDB_HEADLESS based on build arg
ENV NORNICDB_DATA_DIR=/data \
NORNICDB_HTTP_PORT=7474 \
NORNICDB_BOLT_PORT=7687 \
NORNICDB_EMBEDDING_PROVIDER=local \
NORNICDB_EMBEDDING_MODEL=bge-m3 \
NORNICDB_EMBEDDING_DIMENSIONS=1024 \
NORNICDB_MODELS_DIR=/app/models \
NORNICDB_EMBEDDING_GPU_LAYERS=-1 \
NORNICDB_NO_AUTH=true \
NORNICDB_GPU_ENABLED=true \
NORNICDB_HEADLESS=${HEADLESS} \
NORNICDB_PLUGINS_DIR=/app/plugins \
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENTRYPOINT ["/app/entrypoint.sh"]