From fe100c3c5bcb19899b1f92c3281e527ba1eb011e Mon Sep 17 00:00:00 2001 From: Ting Lu Date: Fri, 21 Feb 2025 03:04:04 +0000 Subject: [PATCH] Add libtorch nightly build for CUDA 12.8 (#146265) Try removing sm50 and sm60 to shrink binary size, and resolve the ld --relink error "Architecture support for Maxwell, Pascal, and Volta is considered feature-complete and will be frozen in an upcoming release." from 12.8 release note. Also updating the runner for cuda 12.8 test to g4dn (T4, sm75) due to the drop in sm50/60 support. https://github.com/pytorch/pytorch/issues/145570 Pull Request resolved: https://github.com/pytorch/pytorch/pull/146265 Approved by: https://github.com/atalman --- .ci/manywheel/build_cuda.sh | 2 +- .../scripts/generate_binary_build_matrix.py | 3 - .../linux_binary_build_workflow.yml.j2 | 5 +- ...inux-binary-libtorch-cxx11-abi-nightly.yml | 65 +++++++++++++++++++ .../generated-linux-binary-manywheel-main.yml | 2 +- ...nerated-linux-binary-manywheel-nightly.yml | 12 ++-- 6 files changed, 77 insertions(+), 12 deletions(-) diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh index bf2622cc55b..f6b2cb61d7b 100644 --- a/.ci/manywheel/build_cuda.sh +++ b/.ci/manywheel/build_cuda.sh @@ -54,7 +54,7 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6" case ${CUDA_VERSION} in 12.8) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0;10.0;12.0+PTX" #Ripping out 5.0 and 6.0 due to ld error + TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX" #Ripping out 5.0 and 6.0 due to ld error EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; 12.6) diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 5b50917fc70..67160bda12c 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -246,9 +246,6 @@ def generate_libtorch_matrix( if os == "linux": arches += CUDA_ARCHES arches += ROCM_ARCHES - # skip CUDA 12.8 builds for libtorch - if "12.8" in arches: - arches.remove("12.8") elif os == "windows": arches += CUDA_ARCHES if libtorch_variants is None: diff --git a/.github/templates/linux_binary_build_workflow.yml.j2 b/.github/templates/linux_binary_build_workflow.yml.j2 index 5b1acca98fe..e0fda97e324 100644 --- a/.github/templates/linux_binary_build_workflow.yml.j2 +++ b/.github/templates/linux_binary_build_workflow.yml.j2 @@ -111,7 +111,10 @@ jobs: ALPINE_IMAGE: "docker.io/s390x/alpine" {%- elif config["gpu_arch_type"] == "rocm" %} runs_on: linux.rocm.gpu - {%- elif config["gpu_arch_type"] == "cuda" %} + {%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] == "12.8" %} + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner + {%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] != "12.8"%} runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.4xlarge.nvidia.gpu {%- else %} diff --git a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml index 415e0a0645e..4114ab11f05 100644 --- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml +++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml @@ -301,6 +301,71 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} uses: ./.github/workflows/_binary-upload.yml + libtorch-cuda12_8-shared-with-deps-cxx11-abi-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: 12.8 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.8-main + LIBTORCH_VARIANT: shared-with-deps + DESIRED_DEVTOOLSET: cxx11-abi + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: libtorch-cuda12_8-shared-with-deps-cxx11-abi + build_environment: linux-binary-libtorch-cxx11-abi + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cuda12_8-shared-with-deps-cxx11-abi-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_8-shared-with-deps-cxx11-abi-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: 12.8 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.8-main + LIBTORCH_VARIANT: shared-with-deps + DESIRED_DEVTOOLSET: cxx11-abi + build_name: libtorch-cuda12_8-shared-with-deps-cxx11-abi + build_environment: linux-binary-libtorch-cxx11-abi + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cuda12_8-shared-with-deps-cxx11-abi-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_8-shared-with-deps-cxx11-abi-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: 12.8 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.8-main + LIBTORCH_VARIANT: shared-with-deps + DESIRED_DEVTOOLSET: cxx11-abi + build_name: libtorch-cuda12_8-shared-with-deps-cxx11-abi + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + libtorch-rocm6_2_4-shared-with-deps-cxx11-abi-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml index 3a8530cce7a..e619743227e 100644 --- a/.github/workflows/generated-linux-binary-manywheel-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-main.yml @@ -223,6 +223,6 @@ jobs: build_name: manywheel-py3_9-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index cfc4dbd7b73..72c26af7bda 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -424,7 +424,7 @@ jobs: build_name: manywheel-py3_9-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_9-cuda12_8-upload: # Uploading @@ -1122,7 +1122,7 @@ jobs: build_name: manywheel-py3_10-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda12_8-upload: # Uploading @@ -1885,7 +1885,7 @@ jobs: build_name: manywheel-py3_11-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda12_8-upload: # Uploading @@ -2583,7 +2583,7 @@ jobs: build_name: manywheel-py3_12-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_8-upload: # Uploading @@ -3281,7 +3281,7 @@ jobs: build_name: manywheel-py3_13-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda12_8-upload: # Uploading @@ -3979,7 +3979,7 @@ jobs: build_name: manywheel-py3_13t-cuda12_8 build_environment: linux-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - runs_on: linux.4xlarge.nvidia.gpu + runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda12_8-upload: # Uploading