#!/usr/bin/env python
"""Setup CUDA DLLs for ONNX Runtime GPU acceleration.
Copies cuDNN, cuBLAS, and CUDA runtime DLLs to onnxruntime/capi/
so that ONNX Runtime can find them during CUDA provider initialization.
Run this after installing nvidia-cudnn-cu12 and nvidia-cuda-runtime-cu12.
"""
import shutil
import sys
from pathlib import Path
def setup_cuda_dlls():
"""Copy NVIDIA DLLs to onnxruntime/capi/ directory."""
venv = Path(sys.prefix)
site_packages = venv / "Lib" / "site-packages"
# Destination: onnxruntime/capi/
ort_capi = site_packages / "onnxruntime" / "capi"
if not ort_capi.exists():
print(f"ERROR: onnxruntime not found at {ort_capi}")
print("Install onnxruntime-gpu first: pip install onnxruntime-gpu")
return False
# Source: nvidia packages
nvidia_dir = site_packages / "nvidia"
if not nvidia_dir.exists():
print("WARNING: No nvidia packages found.")
print("Install required packages:")
print(" pip install nvidia-cudnn-cu12 nvidia-cublas-cu12 nvidia-cuda-runtime-cu12")
return False
# Copy all DLLs from nvidia packages to onnxruntime/capi/
copied = 0
for pkg in nvidia_dir.iterdir():
if pkg.is_dir():
bin_dir = pkg / "bin"
if bin_dir.exists():
for dll in bin_dir.glob("*.dll"):
dest = ort_capi / dll.name
if not dest.exists():
print(f"Copying {dll.name}")
shutil.copy2(dll, dest)
copied += 1
else:
print(f"Already exists: {dll.name}")
if copied > 0:
print(f"\nCopied {copied} DLLs to {ort_capi}")
else:
print("\nAll DLLs already in place.")
return True
def verify_cuda():
"""Verify CUDA is working with ONNX Runtime."""
import os
# Add onnxruntime/capi to PATH
venv = Path(sys.prefix)
ort_capi = venv / "Lib" / "site-packages" / "onnxruntime" / "capi"
os.environ["PATH"] = str(ort_capi) + os.pathsep + os.environ.get("PATH", "")
os.environ["ORT_LOGGING_LEVEL"] = "ERROR"
import onnxruntime as ort
print(f"\nONNX Runtime version: {ort.__version__}")
print(f"Available providers: {ort.get_available_providers()}")
if "CUDAExecutionProvider" not in ort.get_available_providers():
print("\nWARNING: CUDAExecutionProvider not available")
return False
# Test CUDA with a simple model
try:
import numpy as np
import onnx
from onnx import helper, TensorProto
# Create simple model
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, 10])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None, 10])
node = helper.make_node("Identity", ["X"], ["Y"])
graph = helper.make_graph([node], "test", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)])
model.ir_version = 6
model_bytes = model.SerializeToString()
# Create session with CUDA
sess = ort.InferenceSession(
model_bytes,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
# Run inference
x = np.random.randn(1, 10).astype(np.float32)
y = sess.run(["Y"], {"X": x})[0]
active_providers = sess.get_providers()
print(f"Session providers: {active_providers}")
if "CUDAExecutionProvider" in active_providers:
print("\nCUDA is working!")
return True
else:
print("\nWARNING: CUDA session created but not using GPU")
return False
except Exception as e:
print(f"\nERROR: CUDA test failed: {e}")
return False
if __name__ == "__main__":
print("Setting up CUDA for ONNX Runtime...")
print("=" * 50)
if setup_cuda_dlls():
print("\n" + "=" * 50)
print("Verifying CUDA...")
verify_cuda()