diff --git a/.ci/pytorch/numba-cuda-13.patch b/.ci/pytorch/numba-cuda-13.patch new file mode 100644 index 00000000000..f96ff287ed3 --- /dev/null +++ b/.ci/pytorch/numba-cuda-13.patch @@ -0,0 +1,25 @@ +From 6e08c9d08e9de59c7af28b720289debbbd384764 Mon Sep 17 00:00:00 2001 +From: Michael Wang <13521008+isVoid@users.noreply.github.com> +Date: Tue, 1 Apr 2025 17:28:05 -0700 +Subject: [PATCH] Avoid bumping certain driver API to avoid future breakage + (#185) + +Co-authored-by: isVoid +--- + numba_cuda/numba/cuda/cudadrv/driver.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py +index 1641bf77..233e9ed7 100644 +--- a/numba_cuda/numba/cuda/cudadrv/driver.py ++++ b/numba_cuda/numba/cuda/cudadrv/driver.py +@@ -365,6 +365,9 @@ def _find_api(self, fname): + else: + variants = ('_v2', '') + ++ if fname in ("cuCtxGetDevice", "cuCtxSynchronize"): ++ return getattr(self.lib, fname) ++ + for variant in variants: + try: + return getattr(self.lib, f'{fname}{variant}') diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index 17911338ca1..7290ff6c895 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -32,6 +32,16 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v git config --global --add safe.directory /var/lib/jenkins/workspace fi + +# Patch numba to avoid CUDA-13 crash, see https://github.com/pytorch/pytorch/issues/162878 +NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true) +if [ -n "$NUMBA_CUDA_DIR" ]; then + NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch" + pushd "$NUMBA_CUDA_DIR" + patch -p4 <"$NUMBA_PATCH" + popd +fi + echo "Environment variables:" env diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml index 66579b573a6..537e94488b3 100644 --- a/.github/workflows/_linux-test.yml +++ b/.github/workflows/_linux-test.yml @@ -169,7 +169,7 @@ jobs: id: install-nvidia-driver uses: pytorch/test-infra/.github/actions/setup-nvidia@main with: - driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }} + driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '580.82.07' }} if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }} - name: Setup GPU_FLAG for docker run