mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Enable manywheel build and smoke test on main branch for ROCm (#153287)
Fixes issue of not discovering breakage of ROCm wheel builds until the nightly job runs e.g. https://github.com/pytorch/pytorch/pull/153253 Pull Request resolved: https://github.com/pytorch/pytorch/pull/153287 Approved by: https://github.com/jeffdaily
This commit is contained in:
parent
5285d10243
commit
794ef6c9b8
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
|
|
@ -49,6 +49,7 @@ self-hosted-runner:
|
||||||
# Organization-wide AMD-hosted runners
|
# Organization-wide AMD-hosted runners
|
||||||
# MI2xx runners
|
# MI2xx runners
|
||||||
- linux.rocm.gpu
|
- linux.rocm.gpu
|
||||||
|
- linux.rocm.gpu.mi250
|
||||||
- linux.rocm.gpu.2
|
- linux.rocm.gpu.2
|
||||||
- linux.rocm.gpu.4
|
- linux.rocm.gpu.4
|
||||||
# MI300 runners
|
# MI300 runners
|
||||||
|
|
|
||||||
2
.github/scripts/generate_ci_workflows.py
vendored
2
.github/scripts/generate_ci_workflows.py
vendored
|
|
@ -152,7 +152,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||||
package_type="manywheel",
|
package_type="manywheel",
|
||||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||||
OperatingSystem.LINUX,
|
OperatingSystem.LINUX,
|
||||||
arches=["12.6", "12.8", "12.9"],
|
arches=["12.6", "12.8", "12.9", "6.4"],
|
||||||
python_versions=["3.9"],
|
python_versions=["3.9"],
|
||||||
),
|
),
|
||||||
branches="main",
|
branches="main",
|
||||||
|
|
|
||||||
|
|
@ -171,7 +171,7 @@ jobs:
|
||||||
- name: Teardown XPU
|
- name: Teardown XPU
|
||||||
uses: ./.github/actions/teardown-xpu
|
uses: ./.github/actions/teardown-xpu
|
||||||
{%- else %}
|
{%- else %}
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: !{{ common.timeout_minutes }}
|
timeout-minutes: !{{ common.timeout_minutes }}
|
||||||
!{{ upload.binary_env(config) }}
|
!{{ upload.binary_env(config) }}
|
||||||
steps:
|
steps:
|
||||||
|
|
|
||||||
4
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
4
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
|
|
@ -274,7 +274,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- libtorch-rocm6_3-shared-with-deps-release-build
|
- libtorch-rocm6_3-shared-with-deps-release-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -388,7 +388,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- libtorch-rocm6_4-shared-with-deps-release-build
|
- libtorch-rocm6_4-shared-with-deps-release-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
|
||||||
92
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
92
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
|
|
@ -182,3 +182,95 @@ jobs:
|
||||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
manywheel-py3_9-rocm6_4-build:
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: manywheel
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.4
|
||||||
|
GPU_ARCH_VERSION: 6.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||||
|
use_split_build: False
|
||||||
|
DESIRED_PYTHON: "3.9"
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
build_name: manywheel-py3_9-rocm6_4
|
||||||
|
build_environment: linux-binary-manywheel
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
manywheel-py3_9-rocm6_4-test: # Testing
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs:
|
||||||
|
- manywheel-py3_9-rocm6_4-build
|
||||||
|
- get-label-type
|
||||||
|
runs-on: linux.rocm.gpu.mi250
|
||||||
|
timeout-minutes: 240
|
||||||
|
env:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: manywheel
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.4
|
||||||
|
GPU_ARCH_VERSION: 6.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
SKIP_ALL_TESTS: 1
|
||||||
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||||
|
use_split_build: False
|
||||||
|
DESIRED_PYTHON: "3.9"
|
||||||
|
steps:
|
||||||
|
- name: Setup ROCm
|
||||||
|
uses: ./.github/actions/setup-rocm
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: manywheel-py3_9-rocm6_4
|
||||||
|
path: "${{ runner.temp }}/artifacts/"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: ROCm set GPU_FLAG
|
||||||
|
run: |
|
||||||
|
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||||
|
- name: configure aws credentials
|
||||||
|
id: aws_creds
|
||||||
|
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||||
|
aws-region: us-east-1
|
||||||
|
role-duration-seconds: 18000
|
||||||
|
- name: Calculate docker image
|
||||||
|
id: calculate-docker-image
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
||||||
|
docker-image-name: manylinux2_28-builder
|
||||||
|
custom-tag-prefix: rocm6.4
|
||||||
|
docker-build-dir: .ci/docker
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: Pull Docker image
|
||||||
|
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Test Pytorch binary
|
||||||
|
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||||
|
env:
|
||||||
|
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Teardown ROCm
|
||||||
|
uses: ./.github/actions/teardown-rocm
|
||||||
|
|
|
||||||
24
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
24
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
|
|
@ -345,7 +345,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_9-rocm6_3-build
|
- manywheel-py3_9-rocm6_3-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -459,7 +459,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_9-rocm6_4-build
|
- manywheel-py3_9-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -958,7 +958,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_10-rocm6_3-build
|
- manywheel-py3_10-rocm6_3-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -1072,7 +1072,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_10-rocm6_4-build
|
- manywheel-py3_10-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -1639,7 +1639,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_11-rocm6_3-build
|
- manywheel-py3_11-rocm6_3-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -1753,7 +1753,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_11-rocm6_4-build
|
- manywheel-py3_11-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -2252,7 +2252,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_12-rocm6_3-build
|
- manywheel-py3_12-rocm6_3-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -2366,7 +2366,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_12-rocm6_4-build
|
- manywheel-py3_12-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -2865,7 +2865,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_13-rocm6_3-build
|
- manywheel-py3_13-rocm6_3-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -2979,7 +2979,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_13-rocm6_4-build
|
- manywheel-py3_13-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -3478,7 +3478,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_13t-rocm6_3-build
|
- manywheel-py3_13t-rocm6_3-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
@ -3592,7 +3592,7 @@ jobs:
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_13t-rocm6_4-build
|
- manywheel-py3_13t-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user