Upgrade CI to ROCm5.3 (#88297)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/88297
Approved by: https://github.com/malfet
This commit is contained in:
Jithun Nair 2022-12-13 04:50:06 +00:00 committed by PyTorch MergeBot
parent 7498e23bd5
commit 181a82ffd2
7 changed files with 69 additions and 45 deletions

View File

@ -188,15 +188,6 @@ case "$image" in
VISION=yes
CONDA_CMAKE=yes
;;
pytorch-linux-focal-rocm5.1-py3.8)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.1.1
CONDA_CMAKE=yes
;;
pytorch-linux-focal-rocm5.2-py3.8)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
@ -204,6 +195,17 @@ case "$image" in
DB=yes
VISION=yes
ROCM_VERSION=5.2
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
;;
pytorch-linux-focal-rocm5.3-py3.8)
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.3
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
;;
pytorch-linux-focal-py3.7-gcc7)

View File

@ -37,8 +37,8 @@ jobs:
- docker-image-name: pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7
- docker-image-name: pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7
- docker-image-name: pytorch-linux-bionic-py3.7-clang9
- docker-image-name: pytorch-linux-focal-rocm5.1-py3.8
- docker-image-name: pytorch-linux-focal-rocm5.2-py3.8
- docker-image-name: pytorch-linux-focal-rocm5.3-py3.8
- docker-image-name: pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12
- docker-image-name: pytorch-linux-jammy-cuda11.7-cudnn8-py3.8-clang12
- docker-image-name: pytorch-linux-focal-py3-clang7-android-ndk-r19c

View File

@ -36,49 +36,49 @@ jobs:
test-matrix: ${{ needs.linux-bionic-cuda11_6-py3-gcc7-slow-gradcheck-build.outputs.test-matrix }}
timeout-minutes: 300
linux-focal-rocm5_2-py3_8-slow-build:
name: linux-focal-rocm5.2-py3.8-slow
linux-focal-rocm5_3-py3_8-slow-build:
name: linux-focal-rocm5.3-py3.8-slow
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image-name: pytorch-linux-focal-rocm5.2-py3.8
build-environment: linux-focal-rocm5.3-py3.8
docker-image-name: pytorch-linux-focal-rocm5.3-py3.8
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_2-py3_8-slow-test:
name: linux-focal-rocm5.2-py3.8-slow
linux-focal-rocm5_3-py3_8-slow-test:
name: linux-focal-rocm5.3-py3.8-slow
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_2-py3_8-slow-build
needs: linux-focal-rocm5_3-py3_8-slow-build
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image: ${{ needs.linux-focal-rocm5_2-py3_8-slow-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_2-py3_8-slow-build.outputs.test-matrix }}
build-environment: linux-focal-rocm5.3-py3.8
docker-image: ${{ needs.linux-focal-rocm5_3-py3_8-slow-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_3-py3_8-slow-build.outputs.test-matrix }}
secrets:
AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}
linux-focal-rocm5_2-py3_8-distributed-build:
name: linux-focal-rocm5.2-py3.8-distributed
linux-focal-rocm5_3-py3_8-distributed-build:
name: linux-focal-rocm5.3-py3.8-distributed
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image-name: pytorch-linux-focal-rocm5.2-py3.8
build-environment: linux-focal-rocm5.3-py3.8
docker-image-name: pytorch-linux-focal-rocm5.3-py3.8
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 2, runner: "linux.rocm.gpu" },
{ config: "distributed", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_2-py3_8-distributed-test:
name: linux-focal-rocm5.2-py3.8-distributed
linux-focal-rocm5_3-py3_8-distributed-test:
name: linux-focal-rocm5.3-py3.8-distributed
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_2-py3_8-distributed-build
needs: linux-focal-rocm5_3-py3_8-distributed-build
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image: ${{ needs.linux-focal-rocm5_2-py3_8-distributed-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_2-py3_8-distributed-build.outputs.test-matrix }}
build-environment: linux-focal-rocm5.3-py3.8
docker-image: ${{ needs.linux-focal-rocm5_3-py3_8-distributed-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_3-py3_8-distributed-build.outputs.test-matrix }}
secrets:
AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}

View File

@ -299,14 +299,14 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.7-gcc7
build-generates-artifacts: false
linux-focal-rocm5_2-py3_8-build:
linux-focal-rocm5_3-py3_8-build:
# don't run build twice on master
if: github.event_name == 'pull_request'
name: linux-focal-rocm5.2-py3.8
name: linux-focal-rocm5.3-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image-name: pytorch-linux-focal-rocm5.2-py3.8
build-environment: linux-focal-rocm5.3-py3.8
docker-image-name: pytorch-linux-focal-rocm5.3-py3.8
sync-tag: rocm-build
test-matrix: |
{ include: [

View File

@ -291,12 +291,12 @@ jobs:
cuda-version: "11.6"
test-matrix: ${{ needs.win-vs2019-cuda11_6-py3-build.outputs.test-matrix }}
linux-focal-rocm5_2-py3_8-build:
name: linux-focal-rocm5.2-py3.8
linux-focal-rocm5_3-py3_8-build:
name: linux-focal-rocm5.3-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image-name: pytorch-linux-focal-rocm5.2-py3.8
build-environment: linux-focal-rocm5.3-py3.8
docker-image-name: pytorch-linux-focal-rocm5.3-py3.8
sync-tag: rocm-build
test-matrix: |
{ include: [
@ -304,14 +304,14 @@ jobs:
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_2-py3_8-test:
name: linux-focal-rocm5.2-py3.8
linux-focal-rocm5_3-py3_8-test:
name: linux-focal-rocm5.3-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_2-py3_8-build
needs: linux-focal-rocm5_3-py3_8-build
with:
build-environment: linux-focal-rocm5.2-py3.8
docker-image: ${{ needs.linux-focal-rocm5_2-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_2-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm5.3-py3.8
docker-image: ${{ needs.linux-focal-rocm5_3-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_3-py3_8-build.outputs.test-matrix }}
secrets:
AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}

View File

@ -4009,6 +4009,8 @@ class TestLinalg(TestCase):
@precisionOverride({torch.float32: 1e-1, torch.complex64: 1e-1,
torch.float64: 1e-8, torch.complex128: 1e-8})
def test_linalg_solve_triangular(self, device, dtype):
if TEST_WITH_ROCM and dtype is torch.float32:
raise unittest.SkipTest("Skipping for ROCm for Magma backend; unskip when hipSolver backend is enabled")
# This exercises the API + BLAS CPU + batched cuBLAS
ks = (3, 1, 0)
ns = (5, 0)

View File

@ -10236,7 +10236,11 @@ op_db: List[OpInfo] = [
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_masked_select,
error_inputs_func=error_inputs_masked_select),
error_inputs_func=error_inputs_masked_select,
skips=(
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values', dtypes=[torch.bool]),
)),
OpInfo('matrix_exp',
dtypes=floating_and_complex_types_and(torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16,
@ -12918,6 +12922,12 @@ op_db: List[OpInfo] = [
# For `chalf`, reference computation in `numpy` is computed in `cfloat`.
# Output of `chalf` saturates to `inf` quicker than reference due to its small range
# which leads to failure of this test.
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_comprehensive',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_complex_half_reference_testing',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_batch_vs_slicing',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_non_contig',
@ -14683,6 +14693,8 @@ op_db: List[OpInfo] = [
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Might need to skip until ROCm5.5
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_multiple_devices', dtypes=[torch.float32, torch.int64]),
)),
OpInfo('randint_like',
dtypes=all_types_and(torch.half, torch.bfloat16),
@ -15620,6 +15632,8 @@ op_db: List[OpInfo] = [
# TODO: implement csr.to_sparse(sample_dim) where sampled_dim is 1.
DecorateInfo(unittest.skip("csr.to_sparse(1) not implemented. Skipped!"),
'TestSparseCSR', 'test_sparse_csr_consistency'),
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values', dtypes=[torch.bool]),
)
),
OpInfo('logcumsumexp',
@ -15854,6 +15868,8 @@ op_db: List[OpInfo] = [
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# Can't find schemas for this operator for some reason
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values', dtypes=[torch.bool]),
)),
# Following tests are for jiterator's python interface
# Jiterator can be used to author elementwise CUDA kernel
@ -16370,6 +16386,10 @@ op_db: List[OpInfo] = [
supports_out=False,
supports_autograd=False,
sample_inputs_func=sample_inputs_argwhere,
skips=(
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values', dtypes=[torch.bool]),
),
),
ReductionOpInfo(
'all',