mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[CI] Move all ROCm jobs to periodic frequency (#131637)
`inductor` and `rocm` workflows are the major contributors to the CI load on ROCm CI at the moment, resulting in huge backlogs: https://github.com/pytorch/pytorch/pull/131489#issue-2425804464 * Move rocm.yml to cron frequency * Move ROCm CI jobs from inductor.yml to inductor-rocm.yml * Introduce `ciflow/inductor-rocm` as PR label to manually invoke inductor jobs for ROCm (no automatic invoking to limit CI load) * After this PR, only `trunk` workflow jobs for ROCm will run on every commit and PR merge, but since they take 45min*3 time on average, I decided to leave them as-is since it will provide us some basic insulation against ROCm breakage. Pull Request resolved: https://github.com/pytorch/pytorch/pull/131637 Approved by: https://github.com/clee2000, https://github.com/atalman, https://github.com/huydhn
This commit is contained in:
parent
8aff6caf67
commit
05064f2827
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
|
|
@ -6,6 +6,7 @@ ciflow_push_tags:
|
|||
- ciflow/binaries_libtorch
|
||||
- ciflow/binaries_wheel
|
||||
- ciflow/inductor
|
||||
- ciflow/inductor-rocm
|
||||
- ciflow/inductor-perf-compare
|
||||
- ciflow/inductor-micro-benchmark
|
||||
- ciflow/inductor-cu124
|
||||
|
|
|
|||
47
.github/workflows/inductor-rocm.yml
vendored
Normal file
47
.github/workflows/inductor-rocm.yml
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
name: inductor-rocm
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs.
|
||||
# Also run less frequently on weekends.
|
||||
- cron: 45 0,4,8,12,16,20 * * 1-5
|
||||
- cron: 45 4,12 * * 0,6
|
||||
- cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests
|
||||
push:
|
||||
branches:
|
||||
# - main
|
||||
- release/*
|
||||
tags:
|
||||
- ciflow/inductor-rocm/*
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
linux-focal-rocm6_1-py3_8-inductor-build:
|
||||
name: rocm6.1-py3.8-inductor
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-focal-rocm6.1-py3.8
|
||||
docker-image-name: pytorch-linux-focal-rocm-n-py3
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2" },
|
||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2" },
|
||||
]}
|
||||
|
||||
linux-focal-rocm6_1-py3_8-inductor-test:
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
name: rocm6.1-py3.8-inductor
|
||||
uses: ./.github/workflows/_rocm-test.yml
|
||||
needs: linux-focal-rocm6_1-py3_8-inductor-build
|
||||
with:
|
||||
build-environment: linux-focal-rocm6.1-py3.8
|
||||
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-inductor-build.outputs.test-matrix }}
|
||||
24
.github/workflows/inductor.yml
vendored
24
.github/workflows/inductor.yml
vendored
|
|
@ -16,30 +16,6 @@ concurrency:
|
|||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
linux-focal-rocm6_1-py3_8-inductor-build:
|
||||
name: rocm6.1-py3.8-inductor
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-focal-rocm6.1-py3.8
|
||||
docker-image-name: pytorch-linux-focal-rocm-n-py3
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2" },
|
||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2" },
|
||||
]}
|
||||
|
||||
linux-focal-rocm6_1-py3_8-inductor-test:
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
name: rocm6.1-py3.8-inductor
|
||||
uses: ./.github/workflows/_rocm-test.yml
|
||||
needs: linux-focal-rocm6_1-py3_8-inductor-build
|
||||
with:
|
||||
build-environment: linux-focal-rocm6.1-py3.8
|
||||
docker-image: ${{ needs.linux-focal-rocm6_1-py3_8-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-rocm6_1-py3_8-inductor-build.outputs.test-matrix }}
|
||||
|
||||
linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
|
||||
name: cuda12.1-py3.10-gcc9-sm86
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
|
|
|
|||
8
.github/workflows/rocm.yml
vendored
8
.github/workflows/rocm.yml
vendored
|
|
@ -3,12 +3,18 @@ name: rocm
|
|||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
# - main
|
||||
- release/*
|
||||
tags:
|
||||
- ciflow/rocm/*
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs.
|
||||
# Also run less frequently on weekends.
|
||||
- cron: 45 0,8,16 * * 1-5
|
||||
- cron: 45 4 * * 0,6
|
||||
- cron: 45 4,12,20 * * 1-5
|
||||
- cron: 45 12 * * 0,6
|
||||
- cron: 29 8 * * * # about 1:29am PDT
|
||||
|
||||
concurrency:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user