Skip to main content
Glama
validate-deployment.sh12.3 kB
#!/usr/bin/env bash # # AWS GPU NIM RAG System Validation # # Comprehensive validation script for deployed system. # Validates GPU, Docker, IRIS database, and NIM services. # # Usage: # ./validate-deployment.sh [--remote <host>] [--ssh-key <path>] # # Options: # --remote <host> Validate remote host via SSH # --ssh-key <path> Path to SSH key for remote validation # --skip-<component> Skip specific validation checks # # Environment Variables: # PUBLIC_IP Remote host IP (alternative to --remote) # SSH_KEY_PATH Path to SSH key (alternative to --ssh-key) # IRIS_HOST IRIS host (default: localhost) # IRIS_PORT IRIS port (default: 1972) # NIM_HOST NIM LLM host (default: localhost) # NIM_PORT NIM LLM port (default: 8001) set -euo pipefail # Colors readonly RED='\033[0;31m' readonly GREEN='\033[0;32m' readonly YELLOW='\033[1;33m' readonly BLUE='\033[0;34m' readonly CYAN='\033[0;36m' readonly NC='\033[0m' # Configuration REMOTE_HOST="${PUBLIC_IP:-}" SSH_KEY="${SSH_KEY_PATH:-}" SKIP_GPU=false SKIP_DOCKER=false SKIP_IRIS=false SKIP_NIM=false IRIS_HOST="${IRIS_HOST:-localhost}" IRIS_PORT="${IRIS_PORT:-1972}" NIM_HOST="${NIM_HOST:-localhost}" NIM_PORT="${NIM_PORT:-8001}" # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --remote) REMOTE_HOST="$2" shift 2 ;; --ssh-key) SSH_KEY="$2" shift 2 ;; --skip-gpu) SKIP_GPU=true shift ;; --skip-docker) SKIP_DOCKER=true shift ;; --skip-iris) SKIP_IRIS=true shift ;; --skip-nim) SKIP_NIM=true shift ;; --help) grep '^#' "$0" | sed 's/^# \?//' exit 0 ;; *) echo -e "${RED}Unknown option: $1${NC}" >&2 exit 1 ;; esac done # Logging functions log_header() { echo "" echo -e "${CYAN}╔══════════════════════════════════════════════════════════════╗${NC}" echo -e "${CYAN}║${NC} $1" echo -e "${CYAN}╚══════════════════════════════════════════════════════════════╝${NC}" echo "" } log_check() { echo -e "${BLUE}→${NC} Checking $1..." } log_success() { echo -e "${GREEN}✓${NC} $1" } log_warn() { echo -e "${YELLOW}!${NC} $1" } log_error() { echo -e "${RED}✗${NC} $1" >&2 } log_info() { echo -e " $1" } # Validation functions for local execution validate_gpu_local() { log_check "GPU availability" if ! command -v nvidia-smi &> /dev/null; then log_error "nvidia-smi not found" return 1 fi if ! nvidia-smi &> /dev/null; then log_error "GPU not accessible" return 1 fi # Get GPU info gpu_name=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n1) gpu_memory=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n1) driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1) cuda_version=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}' || echo "Unknown") log_success "GPU detected: $gpu_name" log_info "Memory: ${gpu_memory} MB" log_info "Driver: $driver_version" log_info "CUDA: $cuda_version" return 0 } validate_docker_gpu_local() { log_check "Docker GPU runtime" if ! command -v docker &> /dev/null; then log_error "Docker not installed" return 1 fi # Test GPU access in container if docker run --rm --gpus all nvidia/cuda:12.2.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then log_success "Docker can access GPU" return 0 else log_error "Docker cannot access GPU" log_info "Run: ./scripts/aws/setup-docker-gpu.sh" return 1 fi } validate_iris_local() { log_check "IRIS database connectivity" # Check if iris container is running if ! docker ps | grep -q "iris-vector-db\|iris-fhir"; then log_error "IRIS container not running" log_info "Run: ./scripts/aws/deploy-iris.sh" return 1 fi log_success "IRIS container running" # Test Python connection log_check "IRIS database connection (Python)" python3 << 'EOF' import sys try: import iris conn = iris.connect('localhost', 1972, 'DEMO', '_SYSTEM', 'SYS') cursor = conn.cursor() cursor.execute("SELECT 1") result = cursor.fetchone() conn.close() if result and result[0] == 1: print("✓ Python connection successful") sys.exit(0) else: print("✗ Query failed") sys.exit(1) except Exception as e: print(f"✗ Connection failed: {e}") sys.exit(1) EOF if [[ $? -eq 0 ]]; then log_success "IRIS database connection working" else log_error "IRIS database connection failed" return 1 fi # Check for vector tables log_check "Vector tables existence" python3 << 'EOF' import sys try: import iris conn = iris.connect('localhost', 1972, 'DEMO', '_SYSTEM', 'SYS') cursor = conn.cursor() # Check ClinicalNoteVectors table cursor.execute(""" SELECT COUNT(*) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='DEMO' AND TABLE_NAME='ClinicalNoteVectors' """) clinical_exists = cursor.fetchone()[0] > 0 # Check MedicalImageVectors table cursor.execute(""" SELECT COUNT(*) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='DEMO' AND TABLE_NAME='MedicalImageVectors' """) image_exists = cursor.fetchone()[0] > 0 conn.close() if clinical_exists and image_exists: print("✓ Both vector tables exist") sys.exit(0) elif clinical_exists: print("! ClinicalNoteVectors exists, MedicalImageVectors missing") sys.exit(0) elif image_exists: print("! MedicalImageVectors exists, ClinicalNoteVectors missing") sys.exit(0) else: print("✗ No vector tables found") sys.exit(1) except Exception as e: print(f"✗ Table check failed: {e}") sys.exit(1) EOF if [[ $? -eq 0 ]]; then log_success "Vector tables validated" else log_error "Vector tables not found" log_info "Tables may need to be created" return 1 fi return 0 } validate_nim_llm_local() { log_check "NIM LLM service health" # Check if NIM container is running if ! docker ps | grep -q "nim-llm"; then log_error "NIM LLM container not running" log_info "Run: ./scripts/aws/deploy-nim-llm.sh" return 1 fi log_success "NIM LLM container running" # Test health endpoint log_check "NIM LLM health endpoint" if curl -s -f "http://${NIM_HOST}:${NIM_PORT}/health" > /dev/null; then log_success "NIM LLM health endpoint responding" else log_warn "Health endpoint not available (may still be initializing)" fi # Test inference log_check "NIM LLM inference test" response=$(curl -s -X POST "http://${NIM_HOST}:${NIM_PORT}/v1/chat/completions" \ -H "Content-Type: application/json" \ -d '{ "model": "meta/llama-3.1-8b-instruct", "messages": [{"role": "user", "content": "What is 2+2?"}], "max_tokens": 10 }' 2>/dev/null || echo "") if [[ -n "$response" ]] && echo "$response" | grep -q "choices"; then log_success "NIM LLM inference working" # Extract and show response answer=$(echo "$response" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data['choices'][0]['message']['content'] if 'choices' in data else 'No response')" 2>/dev/null || echo "") if [[ -n "$answer" ]]; then log_info "Test response: $answer" fi else log_warn "NIM LLM inference not responding (may still be loading model)" log_info "Check logs: docker logs nim-llm" return 1 fi return 0 } # Remote validation function validate_remote() { local host="$1" local ssh_key="$2" log_info "Validating remote host: $host" if [[ -z "$ssh_key" ]]; then log_error "SSH key required for remote validation" return 1 fi if [[ ! -f "$ssh_key" ]]; then log_error "SSH key not found: $ssh_key" return 1 fi # Execute validation on remote host ssh -o StrictHostKeyChecking=no -i "$ssh_key" "ubuntu@${host}" << 'ENDSSH' set -e # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' echo -e "${BLUE}→${NC} Checking GPU..." if nvidia-smi &> /dev/null; then gpu_name=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n1) echo -e "${GREEN}✓${NC} GPU detected: $gpu_name" else echo -e "${RED}✗${NC} GPU not accessible" exit 1 fi echo "" echo -e "${BLUE}→${NC} Checking Docker GPU runtime..." if docker run --rm --gpus all nvidia/cuda:12.2.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then echo -e "${GREEN}✓${NC} Docker can access GPU" else echo -e "${RED}✗${NC} Docker cannot access GPU" exit 1 fi echo "" echo -e "${BLUE}→${NC} Checking IRIS database..." if docker ps | grep -q "iris-vector-db"; then echo -e "${GREEN}✓${NC} IRIS container running" else echo -e "${RED}✗${NC} IRIS container not running" exit 1 fi echo "" echo -e "${BLUE}→${NC} Checking NIM LLM service..." if docker ps | grep -q "nim-llm"; then echo -e "${GREEN}✓${NC} NIM LLM container running" # Test health endpoint if curl -s -f http://localhost:8001/health > /dev/null 2>&1; then echo -e "${GREEN}✓${NC} NIM LLM health endpoint responding" else echo -e "${YELLOW}!${NC} Health endpoint not available (may be initializing)" fi else echo -e "${RED}✗${NC} NIM LLM container not running" exit 1 fi echo "" echo -e "${GREEN}✓${NC} All checks passed" ENDSSH return $? } # Main execution main() { log_header "AWS GPU NIM RAG System Validation" local all_passed=true if [[ -n "$REMOTE_HOST" ]]; then # Remote validation if ! validate_remote "$REMOTE_HOST" "$SSH_KEY"; then all_passed=false fi else # Local validation if [[ "$SKIP_GPU" != "true" ]]; then if ! validate_gpu_local; then all_passed=false fi echo "" fi if [[ "$SKIP_DOCKER" != "true" ]]; then if ! validate_docker_gpu_local; then all_passed=false fi echo "" fi if [[ "$SKIP_IRIS" != "true" ]]; then if ! validate_iris_local; then all_passed=false fi echo "" fi if [[ "$SKIP_NIM" != "true" ]]; then if ! validate_nim_llm_local; then all_passed=false fi echo "" fi fi # Summary echo "" log_header "Validation Summary" if [[ "$all_passed" == "true" ]]; then echo -e "${GREEN}✓ All validation checks passed${NC}" echo "" echo "System is ready for use!" echo "" echo "Next steps:" echo " 1. Vectorize clinical notes: python src/vectorization/vectorize_documents.py" echo " 2. Test vector search: python src/query/test_vector_search.py" echo " 3. Run RAG query: python src/query/rag_query.py --query 'your question'" echo "" return 0 else echo -e "${RED}✗ Some validation checks failed${NC}" echo "" echo "Please review the errors above and:" echo " 1. Check troubleshooting guide: docs/troubleshooting.md" echo " 2. Review deployment logs" echo " 3. Verify all services are running: docker ps" echo "" return 1 fi } # Run main function main

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/isc-tdyar/medical-graphrag-assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server