mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: This mitigates number of issues with CUDA 11.6 update and updates Linux driver . New issues discovered #[75391](https://github.com/pytorch/pytorch/issues/75391) #[75375](https://github.com/pytorch/pytorch/issues/75375) Old issue present since 11.3 #[57482](https://github.com/pytorch/pytorch/issues/57482) #[70111](https://github.com/pytorch/pytorch/issues/70111) These changes already testsed WIP PR: #[75337](https://github.com/pytorch/pytorch/pull/75337) Pull Request resolved: https://github.com/pytorch/pytorch/pull/75420 Reviewed By: seemethere Differential Revision: D35481973 Pulled By: atalman fbshipit-source-id: 4db00c646e2df4f8650404763963c3b215110f1f (cherry picked from commit 518e19dc361b43273f5bd6bdfff942614e8466f5)
56 lines
1.6 KiB
Bash
Executable File
56 lines
1.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -eou pipefail
|
|
|
|
DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
|
|
DRIVER_FN="NVIDIA-Linux-x86_64-510.60.02.run"
|
|
YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"
|
|
|
|
install_nvidia_docker2_amzn2() {
|
|
(
|
|
set -x
|
|
# Needed for yum-config-manager
|
|
sudo yum install -y yum-utils
|
|
sudo yum-config-manager --add-repo "${YUM_REPO_URL}"
|
|
sudo yum install -y nvidia-docker2
|
|
sudo systemctl restart docker
|
|
)
|
|
}
|
|
|
|
install_nvidia_driver_amzn2() {
|
|
(
|
|
set -x
|
|
sudo yum groupinstall -y "Development Tools"
|
|
# ensure our kernel install is the same as our underlying kernel,
|
|
# groupinstall "Development Tools" has a habit of mismatching kernel headers
|
|
sudo yum install -y "kernel-devel-uname-r == $(uname -r)"
|
|
sudo curl -fsL -o /tmp/nvidia_driver "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
|
|
sudo /bin/bash /tmp/nvidia_driver -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
|
|
sudo rm -fv /tmp/nvidia_driver
|
|
nvidia-smi
|
|
)
|
|
}
|
|
|
|
# Install container toolkit based on distribution
|
|
echo "== Installing nvidia container toolkit for ${DISTRIBUTION} =="
|
|
case "${DISTRIBUTION}" in
|
|
amzn*)
|
|
install_nvidia_docker2_amzn2
|
|
;;
|
|
*)
|
|
echo "ERROR: Unknown distribution ${DISTRIBUTION}"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
echo "== Installing nvidia driver ${DRIVER_FN} =="
|
|
case "${DISTRIBUTION}" in
|
|
amzn*)
|
|
install_nvidia_driver_amzn2
|
|
;;
|
|
*)
|
|
echo "ERROR: Unknown distribution ${DISTRIBUTION}"
|
|
exit 1
|
|
;;
|
|
esac
|