Skip to main content
Glama
install-gpu-drivers.sh7.5 kB
#!/usr/bin/env bash # # NVIDIA GPU Driver Installation # # Installs NVIDIA driver 535 (LTS) and utilities on Ubuntu 24.04. # Verifies installation and provides reboot guidance. # # Usage: # ./install-gpu-drivers.sh [--remote <host>] [--ssh-key <path>] # # Options: # --remote <host> Install on remote host via SSH # --ssh-key <path> Path to SSH key for remote installation # --skip-verification Skip GPU verification after installation # # Environment Variables: # PUBLIC_IP Remote host IP (alternative to --remote) # SSH_KEY_PATH Path to SSH key (alternative to --ssh-key) set -euo pipefail # Colors readonly RED='\033[0;31m' readonly GREEN='\033[0;32m' readonly YELLOW='\033[1;33m' readonly BLUE='\033[0;34m' readonly NC='\033[0m' # Configuration REMOTE_HOST="${PUBLIC_IP:-}" SSH_KEY="${SSH_KEY_PATH:-}" SKIP_VERIFICATION=false # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --remote) REMOTE_HOST="$2" shift 2 ;; --ssh-key) SSH_KEY="$2" shift 2 ;; --skip-verification) SKIP_VERIFICATION=true shift ;; --help) grep '^#' "$0" | sed 's/^# \?//' exit 0 ;; *) echo -e "${RED}Unknown option: $1${NC}" >&2 exit 1 ;; esac done # Logging functions log_info() { echo -e "${BLUE}→${NC} $1" } log_success() { echo -e "${GREEN}✓${NC} $1" } log_warn() { echo -e "${YELLOW}!${NC} $1" } log_error() { echo -e "${RED}✗${NC} $1" >&2 } # Function to install drivers locally install_drivers_local() { log_info "Installing NVIDIA drivers (driver-535 LTS)..." # Update package list sudo apt-get update -qq # Install NVIDIA driver and utilities log_info "Installing nvidia-driver-535 and nvidia-utils-535..." sudo apt-get install -y -qq nvidia-driver-535 nvidia-utils-535 log_success "NVIDIA drivers installed" } # Function to verify installation verify_installation() { log_info "Verifying GPU driver installation..." if command -v nvidia-smi &> /dev/null; then log_success "nvidia-smi is available" # Try to run nvidia-smi if nvidia-smi &> /dev/null; then log_success "GPU is accessible" echo "" nvidia-smi return 0 else log_warn "nvidia-smi installed but GPU not accessible" log_warn "Reboot required for driver to load" return 1 fi else log_error "nvidia-smi not found" return 1 fi } # Function to install on remote host install_drivers_remote() { local host="$1" local ssh_key="$2" log_info "Installing NVIDIA drivers on remote host: $host" if [[ -z "$ssh_key" ]]; then log_error "SSH key required for remote installation" log_error "Use --ssh-key or set SSH_KEY_PATH environment variable" return 1 fi if [[ ! -f "$ssh_key" ]]; then log_error "SSH key not found: $ssh_key" return 1 fi # Execute installation on remote host ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 -i "$ssh_key" "ubuntu@${host}" << 'ENDSSH' set -e echo "→ Updating package list..." sudo apt-get update -qq echo "→ Installing NVIDIA driver 535 (LTS)..." sudo apt-get install -y -qq nvidia-driver-535 nvidia-utils-535 echo "✓ NVIDIA drivers installed" # Check if nvidia-smi is available if command -v nvidia-smi &> /dev/null; then echo "✓ nvidia-smi is available" # Try to run nvidia-smi if nvidia-smi &> /dev/null; then echo "✓ GPU is accessible (no reboot needed)" nvidia-smi else echo "! GPU not yet accessible - reboot required" exit 2 # Special exit code for reboot needed fi else echo "✗ nvidia-smi not found" exit 1 fi ENDSSH local exit_code=$? if [[ $exit_code -eq 0 ]]; then log_success "Drivers installed and GPU is accessible" return 0 elif [[ $exit_code -eq 2 ]]; then log_warn "Drivers installed but reboot required" return 2 else log_error "Driver installation failed" return 1 fi } # Function to reboot remote host reboot_remote_host() { local host="$1" local ssh_key="$2" log_info "Rebooting remote host..." ssh -o StrictHostKeyChecking=no -i "$ssh_key" "ubuntu@${host}" 'sudo reboot' || true log_info "Waiting 60 seconds for instance to reboot..." sleep 60 # Wait for SSH to be available again log_info "Waiting for SSH to be available..." local retries=30 local count=0 while [[ $count -lt $retries ]]; do if ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -i "$ssh_key" "ubuntu@${host}" 'echo ready' &> /dev/null; then log_success "Instance is back online" return 0 fi count=$((count + 1)) sleep 10 done log_error "Instance did not come back online after reboot" return 1 } # Function to verify remote GPU verify_remote_gpu() { local host="$1" local ssh_key="$2" log_info "Verifying GPU on remote host..." ssh -o StrictHostKeyChecking=no -i "$ssh_key" "ubuntu@${host}" << 'ENDSSH' if nvidia-smi &> /dev/null; then echo "✓ GPU is accessible" echo "" nvidia-smi exit 0 else echo "✗ GPU still not accessible" exit 1 fi ENDSSH return $? } # Main execution main() { log_info "NVIDIA GPU Driver Installation" echo "" if [[ -n "$REMOTE_HOST" ]]; then # Remote installation if install_drivers_remote "$REMOTE_HOST" "$SSH_KEY"; then log_success "Installation complete - GPU is accessible" elif [[ $? -eq 2 ]]; then log_warn "Reboot required to load drivers" echo "" read -p "Reboot instance now? (yes/no): " -r response if [[ "$response" == "yes" ]]; then reboot_remote_host "$REMOTE_HOST" "$SSH_KEY" verify_remote_gpu "$REMOTE_HOST" "$SSH_KEY" else log_info "Reboot manually and run again to verify" fi else log_error "Installation failed" exit 1 fi else # Local installation install_drivers_local if [[ "$SKIP_VERIFICATION" != "true" ]]; then if ! verify_installation; then echo "" log_warn "==========================================" log_warn "REBOOT REQUIRED" log_warn "==========================================" log_warn "Drivers are installed but GPU is not accessible" log_warn "The system must be rebooted for drivers to load" echo "" log_info "To reboot: sudo reboot" log_info "After reboot, verify with: nvidia-smi" echo "" exit 2 fi fi fi echo "" log_success "==========================================" log_success "NVIDIA Driver Installation Complete" log_success "==========================================" echo "" log_info "Next steps:" log_info " 1. If not already done, reboot the system" log_info " 2. Verify GPU: nvidia-smi" log_info " 3. Run: ./scripts/aws/setup-docker-gpu.sh" echo "" } # Run main function main

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/isc-tdyar/medical-graphrag-assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server