mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[CI] Move main branch rocm binary builds to its own workflow (#158161)
Petition to move out of ciflow/trunk and into ciflow/rocm because it's a long pole for TTS <img width="1192" height="312" alt="image" src="https://github.com/user-attachments/assets/b12a097a-3763-4c62-b09f-094ee9ae1c37" /> Pull Request resolved: https://github.com/pytorch/pytorch/pull/158161 Approved by: https://github.com/seemethere
This commit is contained in:
parent
48315181c7
commit
08799217ae
30
.github/scripts/generate_ci_workflows.py
vendored
30
.github/scripts/generate_ci_workflows.py
vendored
|
|
@ -22,6 +22,7 @@ LABEL_CIFLOW_BINARIES = "ciflow/binaries"
|
||||||
LABEL_CIFLOW_PERIODIC = "ciflow/periodic"
|
LABEL_CIFLOW_PERIODIC = "ciflow/periodic"
|
||||||
LABEL_CIFLOW_BINARIES_LIBTORCH = "ciflow/binaries_libtorch"
|
LABEL_CIFLOW_BINARIES_LIBTORCH = "ciflow/binaries_libtorch"
|
||||||
LABEL_CIFLOW_BINARIES_WHEEL = "ciflow/binaries_wheel"
|
LABEL_CIFLOW_BINARIES_WHEEL = "ciflow/binaries_wheel"
|
||||||
|
LABEL_CIFLOW_ROCM = "ciflow/rocm"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -146,13 +147,35 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
ROCM_SMOKE_WORKFLOWS = [
|
||||||
|
BinaryBuildWorkflow(
|
||||||
|
os=OperatingSystem.LINUX,
|
||||||
|
package_type="manywheel",
|
||||||
|
build_variant="rocm",
|
||||||
|
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||||
|
OperatingSystem.LINUX,
|
||||||
|
arches=["6.4"],
|
||||||
|
python_versions=["3.9"],
|
||||||
|
),
|
||||||
|
ciflow_config=CIFlowConfig(
|
||||||
|
labels={
|
||||||
|
LABEL_CIFLOW_BINARIES,
|
||||||
|
LABEL_CIFLOW_BINARIES_WHEEL,
|
||||||
|
LABEL_CIFLOW_ROCM,
|
||||||
|
},
|
||||||
|
isolated_workflow=True,
|
||||||
|
),
|
||||||
|
branches="main",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
LINUX_BINARY_SMOKE_WORKFLOWS = [
|
LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||||
BinaryBuildWorkflow(
|
BinaryBuildWorkflow(
|
||||||
os=OperatingSystem.LINUX,
|
os=OperatingSystem.LINUX,
|
||||||
package_type="manywheel",
|
package_type="manywheel",
|
||||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||||
OperatingSystem.LINUX,
|
OperatingSystem.LINUX,
|
||||||
arches=["12.6", "12.8", "12.9", "6.4"],
|
arches=["12.6", "12.8", "12.9"],
|
||||||
python_versions=["3.9"],
|
python_versions=["3.9"],
|
||||||
),
|
),
|
||||||
branches="main",
|
branches="main",
|
||||||
|
|
@ -387,6 +410,11 @@ def main() -> None:
|
||||||
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
|
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
|
||||||
S390X_BINARY_BUILD_WORKFLOWS,
|
S390X_BINARY_BUILD_WORKFLOWS,
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
# Give rocm it's own workflow file
|
||||||
|
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
|
||||||
|
ROCM_SMOKE_WORKFLOWS,
|
||||||
|
),
|
||||||
(
|
(
|
||||||
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
|
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
|
||||||
LINUX_BINARY_SMOKE_WORKFLOWS,
|
LINUX_BINARY_SMOKE_WORKFLOWS,
|
||||||
|
|
|
||||||
92
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
92
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
|
|
@ -182,95 +182,3 @@ jobs:
|
||||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
manywheel-py3_9-rocm6_4-build:
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: manywheel
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: 6.4
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: manylinux2_28-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
|
||||||
use_split_build: False
|
|
||||||
DESIRED_PYTHON: "3.9"
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
build_name: manywheel-py3_9-rocm6_4
|
|
||||||
build_environment: linux-binary-manywheel
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
manywheel-py3_9-rocm6_4-test: # Testing
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
needs:
|
|
||||||
- manywheel-py3_9-rocm6_4-build
|
|
||||||
- get-label-type
|
|
||||||
runs-on: linux.rocm.gpu.mi250
|
|
||||||
timeout-minutes: 240
|
|
||||||
env:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: manywheel
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: 6.4
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
SKIP_ALL_TESTS: 1
|
|
||||||
DOCKER_IMAGE: manylinux2_28-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
|
||||||
use_split_build: False
|
|
||||||
DESIRED_PYTHON: "3.9"
|
|
||||||
steps:
|
|
||||||
- name: Setup ROCm
|
|
||||||
uses: ./.github/actions/setup-rocm
|
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: manywheel-py3_9-rocm6_4
|
|
||||||
path: "${{ runner.temp }}/artifacts/"
|
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: ROCm set GPU_FLAG
|
|
||||||
run: |
|
|
||||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
|
||||||
- name: configure aws credentials
|
|
||||||
id: aws_creds
|
|
||||||
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
||||||
uses: aws-actions/configure-aws-credentials@v4
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
aws-region: us-east-1
|
|
||||||
role-duration-seconds: 18000
|
|
||||||
- name: Calculate docker image
|
|
||||||
id: calculate-docker-image
|
|
||||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
|
||||||
docker-image-name: manylinux2_28-builder
|
|
||||||
custom-tag-prefix: rocm6.4
|
|
||||||
docker-build-dir: .ci/docker
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: Pull Docker image
|
|
||||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
- name: Test Pytorch binary
|
|
||||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
|
||||||
env:
|
|
||||||
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
- name: Teardown ROCm
|
|
||||||
uses: ./.github/actions/teardown-rocm
|
|
||||||
|
|
|
||||||
137
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
Normal file
137
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
# @generated DO NOT EDIT MANUALLY
|
||||||
|
|
||||||
|
# Template is at: .github/templates/linux_binary_build_workflow.yml.j2
|
||||||
|
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||||
|
name: linux-binary-manywheel-rocm
|
||||||
|
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
tags:
|
||||||
|
- 'ciflow/binaries/*'
|
||||||
|
- 'ciflow/binaries_wheel/*'
|
||||||
|
- 'ciflow/rocm/*'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
|
||||||
|
env:
|
||||||
|
# Needed for conda builds
|
||||||
|
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
||||||
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
|
BINARY_ENV_FILE: /tmp/env
|
||||||
|
BUILD_ENVIRONMENT: linux-binary-manywheel-rocm
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
|
SKIP_ALL_TESTS: 0
|
||||||
|
concurrency:
|
||||||
|
group: linux-binary-manywheel-rocm-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
get-label-type:
|
||||||
|
if: github.repository_owner == 'pytorch'
|
||||||
|
name: get-label-type
|
||||||
|
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||||
|
with:
|
||||||
|
triggering_actor: ${{ github.triggering_actor }}
|
||||||
|
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||||
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
|
manywheel-py3_9-rocm6_4-build:
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: manywheel
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.4
|
||||||
|
GPU_ARCH_VERSION: 6.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||||
|
use_split_build: False
|
||||||
|
DESIRED_PYTHON: "3.9"
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
build_name: manywheel-py3_9-rocm6_4
|
||||||
|
build_environment: linux-binary-manywheel-rocm
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
manywheel-py3_9-rocm6_4-test: # Testing
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs:
|
||||||
|
- manywheel-py3_9-rocm6_4-build
|
||||||
|
- get-label-type
|
||||||
|
runs-on: linux.rocm.gpu.mi250
|
||||||
|
timeout-minutes: 240
|
||||||
|
env:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: manywheel
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.4
|
||||||
|
GPU_ARCH_VERSION: 6.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
SKIP_ALL_TESTS: 1
|
||||||
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||||
|
use_split_build: False
|
||||||
|
DESIRED_PYTHON: "3.9"
|
||||||
|
steps:
|
||||||
|
- name: Setup ROCm
|
||||||
|
uses: ./.github/actions/setup-rocm
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: manywheel-py3_9-rocm6_4
|
||||||
|
path: "${{ runner.temp }}/artifacts/"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: ROCm set GPU_FLAG
|
||||||
|
run: |
|
||||||
|
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||||
|
- name: configure aws credentials
|
||||||
|
id: aws_creds
|
||||||
|
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||||
|
aws-region: us-east-1
|
||||||
|
role-duration-seconds: 18000
|
||||||
|
- name: Calculate docker image
|
||||||
|
id: calculate-docker-image
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
||||||
|
docker-image-name: manylinux2_28-builder
|
||||||
|
custom-tag-prefix: rocm6.4
|
||||||
|
docker-build-dir: .ci/docker
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: Pull Docker image
|
||||||
|
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Test Pytorch binary
|
||||||
|
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||||
|
env:
|
||||||
|
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Teardown ROCm
|
||||||
|
uses: ./.github/actions/teardown-rocm
|
||||||
Loading…
Reference in New Issue
Block a user