# CPU-only configuration for systems without NVIDIA GPU
# Use this if GPU is not available or you want CPU-only execution
services:
animagine-mcp:
build:
context: .
dockerfile: Dockerfile
# Enable BuildKit for faster, parallel builds
args:
- BUILDKIT_INLINE_CACHE=1
container_name: animagine-mcp-server
# Ports
ports:
- "8000:8000"
# NO GPU SUPPORT - This uses regular Python
# Volumes
volumes:
# Persist model checkpoints
- ./checkpoints:/app/checkpoints:rw
# Persist LoRA models
- ./loras:/app/loras:rw
# Persist generated outputs
- ./outputs:/app/outputs:rw
# Cache for Hugging Face models
- hf_cache:/root/.cache/huggingface
# Cache for torch models
- torch_cache:/root/.cache/torch
# Environment variables - CPU optimized
environment:
# Python settings
PYTHONUNBUFFERED: "1"
PYTHONDONTWRITEBYTECODE: "1"
# Hugging Face settings
HF_HOME: /root/.cache/huggingface
TORCH_HOME: /root/.cache/torch
# CPU Settings
OMP_NUM_THREADS: "8" # Adjust based on CPU cores (e.g., nproc)
OPENBLAS_NUM_THREADS: "8"
MKL_NUM_THREADS: "8"
VECLIB_MAXIMUM_THREADS: "8"
NUMEXPR_NUM_THREADS: "8"
# PyTorch CPU settings
TORCH_NUM_THREADS: "8"
# Disable telemetry
HF_HUB_DISABLE_TELEMETRY: "1"
# Optional: Skip model verification on startup (model is pre-downloaded in image)
# SKIP_MODEL_DOWNLOAD: "true"
# Resource limits - CPU focused
deploy:
resources:
limits:
cpus: '4' # Adjust based on available cores
memory: 8G # Increase if possible
reservations:
cpus: '2'
memory: 4G
# Restart policy
restart: unless-stopped
# Logging
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# Healthcheck - simpler for CPU
healthcheck:
test: ["CMD", "python", "-c", "import sys; sys.exit(0)"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
volumes:
hf_cache:
driver: local
torch_cache:
driver: local