mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[ROCm][CI] upgrade nightly wheels to ROCm 7.1 (#166730)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/166730 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
parent
ffaa6578b7
commit
239e7b541a
|
|
@ -12,8 +12,8 @@ function do_install() {
|
||||||
|
|
||||||
rocm_version_nodot=${rocm_version//./}
|
rocm_version_nodot=${rocm_version//./}
|
||||||
|
|
||||||
# https://github.com/icl-utk-edu/magma/pull/65
|
# post merge of https://github.com/icl-utk-edu/magma/pull/65
|
||||||
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
|
MAGMA_VERSION=c0792ae825fb36872784892ea643dd6f3456bc5f
|
||||||
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||||
|
|
||||||
rocm_dir="/opt/rocm"
|
rocm_dir="/opt/rocm"
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ CUDA_ARCHES_CUDNN_VERSION = {
|
||||||
"13.0": "9",
|
"13.0": "9",
|
||||||
}
|
}
|
||||||
|
|
||||||
ROCM_ARCHES = ["6.4", "7.0"]
|
ROCM_ARCHES = ["7.0", "7.1"]
|
||||||
|
|
||||||
XPU_ARCHES = ["xpu"]
|
XPU_ARCHES = ["xpu"]
|
||||||
|
|
||||||
|
|
|
||||||
2
.github/workflows/build-almalinux-images.yml
vendored
2
.github/workflows/build-almalinux-images.yml
vendored
|
|
@ -36,7 +36,7 @@ jobs:
|
||||||
runs-on: linux.9xlarge.ephemeral
|
runs-on: linux.9xlarge.ephemeral
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.4", "rocm7.0", "rocm7.1", "cpu"]
|
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm7.0", "rocm7.1", "cpu"]
|
||||||
steps:
|
steps:
|
||||||
- name: Build docker image
|
- name: Build docker image
|
||||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
||||||
|
|
|
||||||
1
.github/workflows/build-libtorch-images.yml
vendored
1
.github/workflows/build-libtorch-images.yml
vendored
|
|
@ -52,7 +52,6 @@ jobs:
|
||||||
{ tag: "cuda12.9" },
|
{ tag: "cuda12.9" },
|
||||||
{ tag: "cuda12.8" },
|
{ tag: "cuda12.8" },
|
||||||
{ tag: "cuda12.6" },
|
{ tag: "cuda12.6" },
|
||||||
{ tag: "rocm6.4" },
|
|
||||||
{ tag: "rocm7.0" },
|
{ tag: "rocm7.0" },
|
||||||
{ tag: "rocm7.1" },
|
{ tag: "rocm7.1" },
|
||||||
{ tag: "cpu" },
|
{ tag: "cpu" },
|
||||||
|
|
|
||||||
2
.github/workflows/build-magma-rocm-linux.yml
vendored
2
.github/workflows/build-magma-rocm-linux.yml
vendored
|
|
@ -34,7 +34,7 @@ jobs:
|
||||||
id-token: write
|
id-token: write
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
rocm_version: ["71", "70", "64"]
|
rocm_version: ["71", "70"]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
|
||||||
1
.github/workflows/build-manywheel-images.yml
vendored
1
.github/workflows/build-manywheel-images.yml
vendored
|
|
@ -54,7 +54,6 @@ jobs:
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.9", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinuxaarch64-builder", tag: "cuda12.9", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm7.0", runner: "linux.9xlarge.ephemeral" },
|
{ name: "manylinux2_28-builder", tag: "rocm7.0", runner: "linux.9xlarge.ephemeral" },
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm7.1", runner: "linux.9xlarge.ephemeral" },
|
{ name: "manylinux2_28-builder", tag: "rocm7.1", runner: "linux.9xlarge.ephemeral" },
|
||||||
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
|
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
|
||||||
|
|
|
||||||
2
.github/workflows/build-triton-wheel.yml
vendored
2
.github/workflows/build-triton-wheel.yml
vendored
|
|
@ -55,7 +55,7 @@ jobs:
|
||||||
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
||||||
include:
|
include:
|
||||||
- device: "rocm"
|
- device: "rocm"
|
||||||
rocm_version: "7.0"
|
rocm_version: "7.1"
|
||||||
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
|
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
|
||||||
- device: "cuda"
|
- device: "cuda"
|
||||||
rocm_version: ""
|
rocm_version: ""
|
||||||
|
|
|
||||||
236
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
236
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
|
|
@ -384,124 +384,6 @@ jobs:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-build:
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: "6.4"
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
timeout-minutes: 300
|
|
||||||
build_name: libtorch-rocm6_4-shared-with-deps-release
|
|
||||||
build_environment: linux-binary-libtorch
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-test: # Testing
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
needs:
|
|
||||||
- libtorch-rocm6_4-shared-with-deps-release-build
|
|
||||||
- get-label-type
|
|
||||||
runs-on: linux.rocm.gpu.mi250
|
|
||||||
timeout-minutes: 240
|
|
||||||
env:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: "6.4"
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
SKIP_ALL_TESTS: 1
|
|
||||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
steps:
|
|
||||||
- name: Setup ROCm
|
|
||||||
uses: ./.github/actions/setup-rocm
|
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: libtorch-rocm6_4-shared-with-deps-release
|
|
||||||
path: "${{ runner.temp }}/artifacts/"
|
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: ROCm set GPU_FLAG
|
|
||||||
run: |
|
|
||||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
|
||||||
- name: configure aws credentials
|
|
||||||
id: aws_creds
|
|
||||||
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
||||||
uses: aws-actions/configure-aws-credentials@v4
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
aws-region: us-east-1
|
|
||||||
role-duration-seconds: 18000
|
|
||||||
- name: Calculate docker image
|
|
||||||
id: calculate-docker-image
|
|
||||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
|
||||||
docker-image-name: libtorch-cxx11-builder
|
|
||||||
custom-tag-prefix: rocm6.4
|
|
||||||
docker-build-dir: .ci/docker
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: Pull Docker image
|
|
||||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
- name: Test Pytorch binary
|
|
||||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
|
||||||
env:
|
|
||||||
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
- name: Teardown ROCm
|
|
||||||
uses: ./.github/actions/teardown-rocm
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-upload: # Uploading
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
needs: libtorch-rocm6_4-shared-with-deps-release-test
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: "6.4"
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
build_name: libtorch-rocm6_4-shared-with-deps-release
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
|
||||||
|
|
||||||
libtorch-rocm7_0-shared-with-deps-release-build:
|
libtorch-rocm7_0-shared-with-deps-release-build:
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
|
|
@ -619,3 +501,121 @@ jobs:
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
|
libtorch-rocm7_1-shared-with-deps-release-build:
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm7.1
|
||||||
|
GPU_ARCH_VERSION: "7.1"
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm7.1
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
timeout-minutes: 300
|
||||||
|
build_name: libtorch-rocm7_1-shared-with-deps-release
|
||||||
|
build_environment: linux-binary-libtorch
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
libtorch-rocm7_1-shared-with-deps-release-test: # Testing
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs:
|
||||||
|
- libtorch-rocm7_1-shared-with-deps-release-build
|
||||||
|
- get-label-type
|
||||||
|
runs-on: linux.rocm.gpu.mi250
|
||||||
|
timeout-minutes: 240
|
||||||
|
env:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm7.1
|
||||||
|
GPU_ARCH_VERSION: "7.1"
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
SKIP_ALL_TESTS: 1
|
||||||
|
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm7.1
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- name: Setup ROCm
|
||||||
|
uses: ./.github/actions/setup-rocm
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: libtorch-rocm7_1-shared-with-deps-release
|
||||||
|
path: "${{ runner.temp }}/artifacts/"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: ROCm set GPU_FLAG
|
||||||
|
run: |
|
||||||
|
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||||
|
- name: configure aws credentials
|
||||||
|
id: aws_creds
|
||||||
|
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||||
|
aws-region: us-east-1
|
||||||
|
role-duration-seconds: 18000
|
||||||
|
- name: Calculate docker image
|
||||||
|
id: calculate-docker-image
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
||||||
|
docker-image-name: libtorch-cxx11-builder
|
||||||
|
custom-tag-prefix: rocm7.1
|
||||||
|
docker-build-dir: .ci/docker
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: Pull Docker image
|
||||||
|
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Test Pytorch binary
|
||||||
|
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||||
|
env:
|
||||||
|
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Teardown ROCm
|
||||||
|
uses: ./.github/actions/teardown-rocm
|
||||||
|
libtorch-rocm7_1-shared-with-deps-release-upload: # Uploading
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
contents: read
|
||||||
|
needs: libtorch-rocm7_1-shared-with-deps-release-test
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm7.1
|
||||||
|
GPU_ARCH_VERSION: "7.1"
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm7.1
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
build_name: libtorch-rocm7_1-shared-with-deps-release
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
|
||||||
1610
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
1610
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user