[ROCm] Upgrade ROCm CI to ROCm6.4 (#151368)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/151368
Approved by: https://github.com/jeffdaily

Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
Jithun Nair 2025-05-02 17:21:18 +00:00 committed by PyTorch MergeBot
parent f65fb0a23d
commit 844842dfbf
23 changed files with 195 additions and 120 deletions

View File

@ -241,11 +241,11 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-rocm-n-1-py3)
pytorch-linux-jammy-rocm-n-1-py3)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
ROCM_VERSION=6.2.4
ROCM_VERSION=6.3
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
@ -254,11 +254,11 @@ case "$image" in
UCC_COMMIT=${_UCC_COMMIT}
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-rocm-n-py3)
pytorch-linux-jammy-rocm-n-py3)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
ROCM_VERSION=6.3
ROCM_VERSION=6.4
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes

View File

@ -66,17 +66,25 @@ EOF
done
# ROCm 6.3 had a regression where initializing static code objects had significant overhead
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
# ROCm 6.4 did not yet fix the regression, also HIP branch names are different
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]] || [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
HIP_BRANCH=rocm-6.3.x
VER_STR=6.3
elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
HIP_BRANCH=release/rocm-rel-6.4
VER_STR=6.4
fi
# clr build needs CppHeaderParser but can only find it using conda's python
/opt/conda/bin/python -m pip install CppHeaderParser
git clone https://github.com/ROCm/HIP -b rocm-6.3.x
git clone https://github.com/ROCm/HIP -b $HIP_BRANCH
HIP_COMMON_DIR=$(readlink -f HIP)
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-${VER_STR}-statco-hotfix
mkdir -p clr/build
pushd clr/build
cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
make -j
cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.*
cp hipamd/lib/libamdhip64.so.${VER_STR}.* /opt/rocm/lib/libamdhip64.so.${VER_STR}.*
popd
rm -rf HIP clr
fi

View File

@ -5,6 +5,12 @@ description: Set up ROCm host for CI
runs:
using: composite
steps:
- name: Runner ROCm version
if: always()
shell: bash
run: |
dpkg -l | grep -E " rocm"
- name: Stop all running docker containers
if: always()
shell: bash

View File

@ -61,8 +61,8 @@ jobs:
pytorch-linux-focal-py3.11-clang10,
pytorch-linux-focal-py3.12-clang10,
pytorch-linux-focal-py3.13-clang10,
pytorch-linux-focal-rocm-n-1-py3,
pytorch-linux-focal-rocm-n-py3,
pytorch-linux-jammy-rocm-n-1-py3,
pytorch-linux-jammy-rocm-n-py3,
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-clang12,
pytorch-linux-jammy-py3.9-gcc11,
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks,

View File

@ -41,7 +41,7 @@ jobs:
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-name: pytorch-linux-jammy-rocm-n-py3
push: false
- name: Pull docker image

View File

@ -78,13 +78,13 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-rocm-py3_10-inductor-benchmark-build:
linux-jammy-rocm-py3_10-inductor-benchmark-build:
if: github.repository_owner == 'pytorch'
name: rocm-py3_10-inductor-benchmark-build
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm-py3_10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3_10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.2" },
@ -102,18 +102,18 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-inductor-benchmark-test:
linux-jammy-rocm-py3_10-inductor-benchmark-test:
permissions:
id-token: write
contents: read
name: rocm-py3_10-inductor-benchmark-test
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm-py3_10-inductor-benchmark-build
needs: linux-jammy-rocm-py3_10-inductor-benchmark-build
with:
build-environment: linux-focal-rocm-py3_10
build-environment: linux-jammy-rocm-py3_10
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
docker-image: ${{ needs.linux-focal-rocm-py3_10-inductor-benchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-inductor-benchmark-build.outputs.test-matrix }}
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-benchmark-build.outputs.test-matrix }}
timeout-minutes: 720
# Disable monitor in perf tests for more investigation
disable-monitor: true

View File

@ -67,13 +67,13 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda12_6-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
secrets: inherit
linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build:
linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build:
if: github.repository_owner == 'pytorch'
name: rocm-py3_10-periodic-dynamo-benchmarks
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm-py3_10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3_10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
@ -95,17 +95,17 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-test:
linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-test:
permissions:
id-token: write
contents: read
name: rocm-py3_10-periodic-dynamo-benchmarks
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build
needs: linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build
with:
build-environment: linux-focal-rocm-py3_10
docker-image: ${{ needs.linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3_10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
secrets: inherit
linux-focal-cuda12_6-py3_10-gcc9-inductor-build-gcp:

View File

@ -36,14 +36,14 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-rocm-py3_10-inductor-build:
linux-jammy-rocm-py3_10-inductor-build:
name: rocm-py3.10-inductor
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
@ -51,15 +51,15 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-inductor-test:
linux-jammy-rocm-py3_10-inductor-test:
permissions:
id-token: write
contents: read
name: rocm-py3.10-inductor
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm-py3_10-inductor-build
needs: linux-jammy-rocm-py3_10-inductor-build
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.test-matrix }}
secrets: inherit

View File

@ -29,14 +29,14 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-rocm-py3_10-inductor-build:
linux-jammy-rocm-py3_10-inductor-build:
name: rocm-py3.10-inductor
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2" },
@ -44,15 +44,15 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-inductor-test:
linux-jammy-rocm-py3_10-inductor-test:
permissions:
id-token: write
contents: read
name: rocm-py3.10-inductor
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm-py3_10-inductor-build
needs: linux-jammy-rocm-py3_10-inductor-build
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-inductor-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-inductor-build.outputs.test-matrix }}
secrets: inherit

View File

@ -49,14 +49,14 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-rocm-py3_10-build:
name: linux-focal-rocm-py3.10
linux-jammy-rocm-py3_10-build:
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
@ -65,17 +65,17 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-test:
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm-py3_10-build
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit

View File

@ -140,14 +140,14 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-debug-build.outputs.test-matrix }}
secrets: inherit
linux-focal-rocm-py3_10-build:
name: linux-focal-rocm-py3.10
linux-jammy-rocm-py3_10-build:
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
@ -156,19 +156,19 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-test:
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm-py3_10-build
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit
linux-focal-cuda12_6-py3-gcc11-slow-gradcheck-build:

View File

@ -413,16 +413,16 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-build:
linux-jammy-rocm-py3_10-build:
# don't run build twice on main
if: github.event_name == 'pull_request'
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [

View File

@ -36,15 +36,15 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-rocm-py3_10-build:
linux-jammy-rocm-py3_10-build:
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
@ -57,17 +57,17 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-test:
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm-py3_10-build
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit

View File

@ -26,13 +26,13 @@ jobs:
id-token: write
contents: read
linux-focal-rocm-py3_10-build:
linux-jammy-rocm-py3_10-build:
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
@ -45,17 +45,17 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-test:
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm-py3_10-build
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit

View File

@ -103,14 +103,14 @@ jobs:
test-matrix: ${{ needs.linux-focal-py3_9-clang10-build.outputs.test-matrix }}
secrets: inherit
linux-focal-rocm-py3_10-build:
name: linux-focal-rocm-py3.10
linux-jammy-rocm-py3_10-build:
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2", owners: ["module:rocm"] },
@ -118,19 +118,19 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-test:
linux-jammy-rocm-py3_10-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm-py3_10-build
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-py3_10-clang15-asan-build:

View File

@ -165,15 +165,15 @@ jobs:
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
secrets: inherit
linux-focal-rocm-py3_10-build:
linux-jammy-rocm-py3_10-build:
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }}
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-rocm-py3.10
docker-image-name: pytorch-linux-focal-rocm-n-py3
build-environment: linux-jammy-rocm-py3.10
docker-image-name: pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
@ -183,20 +183,20 @@ jobs:
]}
secrets: inherit
linux-focal-rocm-py3_10-test:
linux-jammy-rocm-py3_10-test:
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }}
permissions:
id-token: write
contents: read
name: linux-focal-rocm-py3.10
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-focal-rocm-py3_10-build
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-focal-rocm-py3.10
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
secrets: inherit

View File

@ -378,6 +378,15 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
template <typename Dtype, typename C_Dtype = Dtype>
static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
#if defined(USE_ROCM) && ROCM_VERSION == 60400
// regression in ROCm 6.4, planned fixed in 6.4.1, hipblaslt TT fp32 calculation errors
// best to disallow hipblaslt for this specific case
if constexpr (std::is_same_v<Dtype, float>) {
if (_cublasOpFromChar(transa) == CUBLAS_OP_T && _cublasOpFromChar(transb) == CUBLAS_OP_T) {
return false;
}
}
#endif
cudaDataType_t abType = CUDA_R_32F;
cudaDataType_t cType = CUDA_R_32F;
cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;

View File

@ -613,6 +613,14 @@ auto GetHipBlasLtTypeStringAndOps() {
auto b_datatype = HipDataTypeFor<BT>();
auto in_out_datatype = HipDataTypeFor<CT>();
std::vector<hipblasLtMatmulHeuristicResult_t> heuristic_result;
#if ROCM_VERSION == 60400
// hipblaslt TT fp32 regression on ROCm 6.4, cannot use
if ((a_datatype == HIP_R_32F || b_datatype == HIP_R_32F || in_out_datatype == HIP_R_32F)
&& (transa_outer == HIPBLAS_OP_T && transb_outer == HIPBLAS_OP_T)) {
std::vector<std::pair<std::string, std::unique_ptr<Callable<ParamsT>>>> ignore;
return ignore;
}
#endif
hipblasComputeType_t computeType = HIPBLAS_COMPUTE_32F;
if (at::globalContext().allowTF32CuBLAS()) {

View File

@ -346,7 +346,15 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
static bool disable_addmm_cuda_lt = getDisableAddmmCudaLt();
#endif
// if lt path fails, we recurse back into this function here and force the lt path to off
disable_addmm_cuda_lt |= disable_addmm_cuda_lt_override;
// we cannot update varible disable_addmm_cuda_lt from above since it is static and would be permanent
bool disable_addmm_cuda_lt_final = disable_addmm_cuda_lt || disable_addmm_cuda_lt_override;
#if defined(USE_ROCM) && ROCM_VERSION == 60400
// hipblaslt TT fp32 regression on ROCm 6.4, cannot use
cublasCommonArgs _args(mat1, mat2, result);
if (_args.transa == 't' && _args.transb == 't') {
disable_addmm_cuda_lt_final = true;
}
#endif
at::ScalarType scalar_type = mat1.scalar_type();
bool is_float_output_with_half_input = (scalar_type == at::ScalarType::Half || scalar_type == at::ScalarType::BFloat16) && result.scalar_type() == at::ScalarType::Float;
c10::MaybeOwned<Tensor> self_;
@ -360,7 +368,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
// the last two conditions is to skip 16b transA and non-trans-B having
// leading dim >> rows when they are sliced from a large tensor
// see fbcode/caffe2/test/test_linalg.py:test_corner_cases_of_cublasltmatmul
if (!disable_addmm_cuda_lt) {
if (!disable_addmm_cuda_lt_final) {
useLtInterface = beta.toComplexDouble() == 1.0 && self.dim() == 1 &&
result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] &&
self.is_contiguous() && result.is_contiguous() &&

View File

@ -26,7 +26,7 @@ from torch.testing._internal.common_fsdp import (
patch_reduce_scatter,
reduce_scatter_with_assert,
)
from torch.testing._internal.common_utils import run_tests
from torch.testing._internal.common_utils import run_tests, skipIfRocm
class TestFullyShardMixedPrecisionTraining(FSDPTest):
@ -81,6 +81,7 @@ class TestFullyShardMixedPrecisionTraining(FSDPTest):
use_shard_placement_fn_vals.append(True)
return use_shard_placement_fn_vals
@skipIfRocm # regressed in ROCm 6.4, but ROCm 6.5 fixes it
@skip_if_lt_x_gpu(2)
@requires_nccl_version((2, 10), "Need NCCL 2.10+ for bf16 collectives")
def test_compute_dtype(self):
@ -160,6 +161,7 @@ class TestFullyShardMixedPrecisionTraining(FSDPTest):
self.assertEqual(fsdp_loss, ref_loss)
check_sharded_parity(self, ref_model, model)
@skipIfRocm # regressed in ROCm 6.4, but ROCm 6.5 fixes it
@skip_if_lt_x_gpu(2)
@requires_nccl_version((2, 10), "Need NCCL 2.10+ for bf16 collectives")
def test_reduce_dtype(self):

View File

@ -367,7 +367,9 @@ class TestInductorDynamic(TestCase):
@torch._dynamo.config.patch(capture_scalar_outputs=True)
@torch._inductor.config.patch(implicit_fallbacks=True)
def test_item_to_inputs_kernel_nobreak(self, device):
@torch.library.custom_op("test::foo", mutates_args=())
@torch.library.custom_op(
"test_item_to_inputs_kernel_nobreak::foo", mutates_args=()
)
def foo(x: torch.Tensor, y: int) -> torch.Tensor:
return x.clone()
@ -378,7 +380,7 @@ class TestInductorDynamic(TestCase):
@torch.compile(fullgraph=True)
def f(x, r):
y = x.item()
return torch.ops.test.foo(r, y)
return torch.ops.test_item_to_inputs_kernel_nobreak.foo(r, y)
f(torch.tensor([3], device=device), torch.randn(10, device=device))
@ -591,7 +593,9 @@ class TestInductorDynamic(TestCase):
)
@torch._inductor.config.patch(implicit_fallbacks=True)
def test_multi_output_unbacked_custom_op(self, device):
@torch.library.custom_op("test::foo", mutates_args=())
@torch.library.custom_op(
"test_multi_output_unbacked_custom_op::foo", mutates_args=()
)
def foo(x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
return torch.empty(2, device=x.device), torch.empty(3, device=x.device)
@ -603,7 +607,7 @@ class TestInductorDynamic(TestCase):
@torch.compile(fullgraph=True)
def f(x):
a, b = torch.ops.test.foo(x)
a, b = torch.ops.test_multi_output_unbacked_custom_op.foo(x)
return a.sum() + b.sum()
f(torch.tensor([3], device=device))

View File

@ -14123,6 +14123,9 @@ op_db: list[OpInfo] = [
check_batched_forward_grad=False,
supports_out=False,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('masked_select',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
@ -15567,6 +15570,9 @@ op_db: list[OpInfo] = [
# NOTE: this failure may not reproduce consistently on different systems
# false INTERNAL ASSERT FAILED at "...torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185
DecorateInfo(unittest.skip("Internal assert failed!"), 'TestJit', 'test_variant_consistency_jit'),
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
@ -18633,7 +18639,12 @@ op_db: list[OpInfo] = [
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter,
error_inputs_func=error_inputs_scatter_and_scatter_add),
error_inputs_func=error_inputs_scatter_and_scatter_add,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
UnaryUfuncInfo(
'bfloat16',
op=lambda x, *args, **kwargs: x.bfloat16(*args, **kwargs),
@ -19422,7 +19433,11 @@ op_db: list[OpInfo] = [
error_inputs_func=error_inputs_scatter_and_scatter_add,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
),
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('stack',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_stack,
@ -19433,8 +19448,7 @@ op_db: list[OpInfo] = [
# https://github.com/pytorch/pytorch/issues/77046
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
),
),
)),
OpInfo('_chunk_cat',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_chunk_cat,
@ -20014,13 +20028,23 @@ op_db: list[OpInfo] = [
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_tril_triu,
sample_inputs_func=sample_inputs_tril_triu),
sample_inputs_func=sample_inputs_tril_triu,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('triu',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_tril_triu,
sample_inputs_func=sample_inputs_tril_triu),
sample_inputs_func=sample_inputs_tril_triu,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('triu_indices',
dtypes=_dispatch_dtypes((torch.int32, torch.int64)),
sample_inputs_func=sample_inputs_trilu_indices,
@ -21598,6 +21622,11 @@ op_db: list[OpInfo] = [
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter_reduce,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
),
),
OpInfo(
'scatter_reduce',

View File

@ -1442,6 +1442,7 @@ op_db: list[OpInfo] = [
device_type="cpu",
dtypes=(torch.complex128,),
),
skipCUDAIfRocm, # regression in ROCm 6.4
],
),
OpInfo(