Skip to main content
Glama
deploy-nim-llm.sh11.5 kB
#!/usr/bin/env bash # # NVIDIA NIM LLM Deployment # # Deploys NVIDIA NIM LLM container (Llama 3.1 8B Instruct) with GPU support. # Requires GPU drivers and Docker GPU runtime to be configured. # # Usage: # ./deploy-nim-llm.sh [--remote <host>] [--ssh-key <path>] # # Options: # --remote <host> Deploy on remote host via SSH # --ssh-key <path> Path to SSH key for remote deployment # --force-recreate Remove existing container and recreate # --skip-verification Skip health check verification # --model <model> NIM model to deploy (default: meta/llama-3.1-8b-instruct) # # Environment Variables: # PUBLIC_IP Remote host IP (alternative to --remote) # SSH_KEY_PATH Path to SSH key (alternative to --ssh-key) # NVIDIA_API_KEY NVIDIA NGC API key (required) set -euo pipefail # Colors readonly RED='\033[0;31m' readonly GREEN='\033[0;32m' readonly YELLOW='\033[1;33m' readonly BLUE='\033[0;34m' readonly NC='\033[0m' # Configuration REMOTE_HOST="${PUBLIC_IP:-}" SSH_KEY="${SSH_KEY_PATH:-}" FORCE_RECREATE=false SKIP_VERIFICATION=false NIM_MODEL="${NIM_MODEL:-meta/llama-3.1-8b-instruct}" CONTAINER_NAME="nim-llm" NIM_PORT=8001 LOCAL_CONTAINER_PORT=8000 # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --remote) REMOTE_HOST="$2" shift 2 ;; --ssh-key) SSH_KEY="$2" shift 2 ;; --force-recreate) FORCE_RECREATE=true shift ;; --skip-verification) SKIP_VERIFICATION=true shift ;; --model) NIM_MODEL="$2" shift 2 ;; --help) grep '^#' "$0" | sed 's/^# \?//' exit 0 ;; *) echo -e "${RED}Unknown option: $1${NC}" >&2 exit 1 ;; esac done # Logging functions log_info() { echo -e "${BLUE}→${NC} $1" } log_success() { echo -e "${GREEN}✓${NC} $1" } log_warn() { echo -e "${YELLOW}!${NC} $1" } log_error() { echo -e "${RED}✗${NC} $1" >&2 } # Function to check for NVIDIA API key check_api_key() { if [[ -z "${NVIDIA_API_KEY:-}" ]]; then log_error "NVIDIA_API_KEY environment variable is required" log_error "Set it in your .env file or export it before running this script" return 1 fi log_success "NVIDIA API key found" } # Function to deploy NIM LLM locally deploy_nim_llm_local() { log_info "Deploying NVIDIA NIM LLM..." log_info "Model: $NIM_MODEL" # Check for API key check_api_key || return 1 # Check if container already exists if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then if [[ "$FORCE_RECREATE" == "true" ]]; then log_warn "Removing existing container..." docker stop "$CONTAINER_NAME" 2>/dev/null || true docker rm "$CONTAINER_NAME" 2>/dev/null || true else log_warn "Container $CONTAINER_NAME already exists" log_info "Use --force-recreate to remove and recreate" # Check if it's running if docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then log_success "Container is running" return 0 else log_info "Starting existing container..." docker start "$CONTAINER_NAME" log_success "Container started" return 0 fi fi fi # Verify GPU is available log_info "Verifying GPU availability..." if ! docker run --rm --gpus all nvidia/cuda:12.2.0-base-ubuntu22.04 nvidia-smi &>/dev/null; then log_error "GPU not accessible in Docker containers" log_error "Run ./scripts/aws/setup-docker-gpu.sh first" return 1 fi log_success "GPU is accessible" # Pull NIM LLM image log_info "Pulling NIM LLM image (this may take several minutes)..." local nim_image="nvcr.io/nim/${NIM_MODEL}:latest" if ! docker pull "$nim_image"; then log_error "Failed to pull NIM image: $nim_image" log_error "Verify NVIDIA_API_KEY is valid and has access to NIM" return 1 fi log_success "Image pulled" # Run NIM LLM container log_info "Starting NIM LLM container..." log_info "This will download model weights on first run (5-10 minutes)..." docker run -d \ --name "$CONTAINER_NAME" \ --gpus all \ --restart unless-stopped \ -p ${NIM_PORT}:${LOCAL_CONTAINER_PORT} \ -e NGC_API_KEY="$NVIDIA_API_KEY" \ -e NIM_MODEL_PROFILE=auto \ --shm-size=16g \ "$nim_image" log_success "NIM LLM container started" # Wait for initialization log_info "Waiting for NIM to initialize (checking every 30s)..." local max_wait=600 # 10 minutes local elapsed=0 local interval=30 while [[ $elapsed -lt $max_wait ]]; do if docker ps | grep -q "$CONTAINER_NAME"; then # Check logs for ready signal if docker logs "$CONTAINER_NAME" 2>&1 | grep -q -i "ready\|listening\|started"; then log_success "NIM is initializing" break fi else log_error "Container stopped unexpectedly" docker logs "$CONTAINER_NAME" --tail 50 return 1 fi sleep $interval elapsed=$((elapsed + interval)) log_info "Still initializing... ($elapsed/${max_wait}s)" done if [[ $elapsed -ge $max_wait ]]; then log_warn "Initialization taking longer than expected" log_warn "Check logs: docker logs $CONTAINER_NAME" fi log_success "NIM LLM deployed" } # Function to deploy on remote host deploy_nim_llm_remote() { local host="$1" local ssh_key="$2" log_info "Deploying NIM LLM on remote host: $host" if [[ -z "$ssh_key" ]]; then log_error "SSH key required for remote deployment" return 1 fi if [[ ! -f "$ssh_key" ]]; then log_error "SSH key not found: $ssh_key" return 1 fi # Execute deployment on remote host ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 -i "$ssh_key" "ubuntu@${host}" << ENDSSH set -e # Load environment variables if [ -f ~/fhir-ai-hackathon/.env ]; then source ~/fhir-ai-hackathon/.env fi CONTAINER_NAME="$CONTAINER_NAME" NIM_MODEL="$NIM_MODEL" FORCE_RECREATE="$FORCE_RECREATE" NIM_PORT="$NIM_PORT" echo "→ Deploying NVIDIA NIM LLM..." echo " Model: \$NIM_MODEL" # Check for API key if [[ -z "\${NVIDIA_API_KEY:-}" ]]; then echo "✗ NVIDIA_API_KEY not found in environment" exit 1 fi echo "✓ NVIDIA API key found" # Check if container exists if docker ps -a --format '{{.Names}}' | grep -q "^\${CONTAINER_NAME}\$"; then if [[ "\$FORCE_RECREATE" == "true" ]]; then echo "! Removing existing container..." docker stop \$CONTAINER_NAME 2>/dev/null || true docker rm \$CONTAINER_NAME 2>/dev/null || true else echo "! Container already exists" if docker ps --format '{{.Names}}' | grep -q "^\${CONTAINER_NAME}\$"; then echo "✓ Container is running" exit 0 else echo "→ Starting existing container..." docker start \$CONTAINER_NAME echo "✓ Container started" exit 0 fi fi fi # Verify GPU echo "→ Verifying GPU..." if ! docker run --rm --gpus all nvidia/cuda:12.2.0-base-ubuntu22.04 nvidia-smi &>/dev/null; then echo "✗ GPU not accessible in Docker" exit 1 fi echo "✓ GPU accessible" # Pull image echo "→ Pulling NIM LLM image..." nim_image="nvcr.io/nim/\${NIM_MODEL}:latest" if ! docker pull "\$nim_image"; then echo "✗ Failed to pull NIM image" exit 1 fi echo "✓ Image pulled" # Run container echo "→ Starting NIM LLM container..." docker run -d \ --name \$CONTAINER_NAME \ --gpus all \ --restart unless-stopped \ -p \${NIM_PORT}:8000 \ -e NGC_API_KEY="\$NVIDIA_API_KEY" \ -e NIM_MODEL_PROFILE=auto \ --shm-size=16g \ "\$nim_image" echo "✓ NIM LLM container started" echo "" echo "→ Waiting for initialization (30 seconds)..." sleep 30 # Verify container is running if docker ps | grep -q "\$CONTAINER_NAME"; then echo "✓ Container is running" echo "" docker ps | grep \$CONTAINER_NAME else echo "✗ Container failed to start" docker logs \$CONTAINER_NAME --tail 50 exit 1 fi echo "" echo "✓ NIM LLM deployment complete" ENDSSH return $? } # Function to verify deployment verify_deployment() { local host="${1:-localhost}" local is_remote="${2:-false}" if [[ "$SKIP_VERIFICATION" == "true" ]]; then log_info "Skipping verification (--skip-verification)" return 0 fi log_info "Verifying NIM LLM deployment..." if [[ "$is_remote" == "true" ]]; then ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "ubuntu@${host}" << 'ENDSSH' # Check container status if docker ps | grep -q nim-llm; then echo "✓ Container is running" echo "" docker ps | grep nim-llm echo "" echo "API Endpoint:" echo " http://$(curl -s ifconfig.me):8001/v1/chat/completions" else echo "✗ Container is not running" exit 1 fi # Check logs for errors echo "" echo "Recent logs:" docker logs nim-llm --tail 10 ENDSSH else if docker ps | grep -q "$CONTAINER_NAME"; then log_success "Container is running" echo "" docker ps | grep "$CONTAINER_NAME" echo "" log_info "API Endpoint:" log_info " http://localhost:${NIM_PORT}/v1/chat/completions" # Show recent logs echo "" log_info "Recent logs:" docker logs "$CONTAINER_NAME" --tail 10 else log_error "Container is not running" return 1 fi fi } # Main execution main() { log_info "NVIDIA NIM LLM Deployment" echo "" if [[ -n "$REMOTE_HOST" ]]; then # Remote deployment if deploy_nim_llm_remote "$REMOTE_HOST" "$SSH_KEY"; then log_success "Remote deployment complete" verify_deployment "$REMOTE_HOST" true else log_error "Remote deployment failed" exit 1 fi else # Local deployment deploy_nim_llm_local verify_deployment fi echo "" log_success "==========================================" log_success "NVIDIA NIM LLM Deployed" log_success "==========================================" echo "" log_info "Model: $NIM_MODEL" log_info "Endpoint: http://${REMOTE_HOST:-localhost}:${NIM_PORT}/v1/chat/completions" echo "" log_info "Test with curl:" echo "" cat << 'EOF' curl -X POST http://localhost:8001/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "meta/llama-3.1-8b-instruct", "messages": [{"role": "user", "content": "What is RAG?"}], "max_tokens": 100 }' EOF echo "" log_info "Monitor logs:" log_info " docker logs -f $CONTAINER_NAME" echo "" log_info "Next steps:" log_info " 1. Wait for model download to complete (check logs)" log_info " 2. Test LLM endpoint with curl" log_info " 3. Integrate with RAG pipeline" echo "" } # Run main function main

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/isc-tdyar/medical-graphrag-assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server