From 3f2c745b89f9f8cb8ac4c7b4e00596c9b95498b7 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 6 Mar 2025 13:27:17 -0500 Subject: [PATCH] fix(infra): reset nvidia-driver module before enabling -open one Till recently, we used "null" profile. With the switch to 570, we switched to -open. When switching a dnf module, dnf complains as follows: ``` The operation would result in switching of module 'nvidia-driver' stream 'XXX' to stream '570-open' Error: It is not possible to switch enabled streams of a module unless explicitly enabled via configuration option module_stream_switch. It is recommended to rather remove all installed content from the module, and reset the module using 'dnf module reset ' command. After you reset the module, you can install the other stream. ``` To avoid this issue on upgrade, always reset the module when running the script. Signed-off-by: Ihar Hrachyshka --- scripts/infra/nvidia-setup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/infra/nvidia-setup.sh b/scripts/infra/nvidia-setup.sh index 4258181418..95e221ce1a 100755 --- a/scripts/infra/nvidia-setup.sh +++ b/scripts/infra/nvidia-setup.sh @@ -119,6 +119,7 @@ dnf install -y "kernel-devel-${KERNEL_VERSION}" \ && if [ "${TARGET_ARCH}" == "aarch64" ]; then CUDA_REPO_ARCH="sbsa"; fi \ && cp -a /etc/dnf/dnf.conf{,.tmp} && mv /etc/dnf/dnf.conf{.tmp,} \ && dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel${OS_VERSION_MAJOR}/${CUDA_REPO_ARCH}/cuda-rhel${OS_VERSION_MAJOR}.repo \ + && dnf module reset -y nvidia-driver \ && dnf -y module enable nvidia-driver:${DRIVER_STREAM}-open/default \ && export NCCL_PACKAGE=$(dnf search libnccl --showduplicates 2>/dev/null | grep ${CUDA_MAJOR_MINOR} | awk '{print $1}' | grep libnccl-2 | tail -1) \ && dnf install -y \ @@ -155,6 +156,7 @@ dnf install -y "kernel-devel-${KERNEL_VERSION}" \ && if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGET_ARCH" != "arm64" ]; then \ versionArray=(${DRIVER_VERSION//./ }); \ DRIVER_BRANCH=${versionArray[0]}; \ + dnf module reset -y nvidia-driver && \ dnf module enable -y nvidia-driver:${DRIVER_BRANCH}-open && \ dnf install -y nvidia-fabric-manager-${DRIVER_VERSION} libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION} ; \ fi \