mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
PR #32960: [ROCm] Refactor testing scripts
Imported from GitHub PR https://github.com/openxla/xla/pull/32960
📝 Summary of Changes
(Partially) upstreaming changes from: https://github.com/ROCm/xla/pull/323, 9d358b9b26, and https://github.com/ROCm/xla/pull/385. It skips some asan/tsan changes for now.
🎯 Justification
These changes are ROCm specific and helps with rocm internal CI validation pipelines.
🚀 Kind of Contribution
🐛 Bug Fix, ♻️ Cleanup, 🧪 Tests
📊 Benchmark (for Performance Improvements)
/
🧪 Unit Tests:
/
🧪 Execution Tests:
/
Copybara import of the project:
--
804ff1b6a6fbba86a3e0a09d739179a4eb4f197d by Milica Makevic <Milica.Makevic@amd.com>:
Add missing cuda-only tag to cuda test
--
44ce7a2d56c9f0c80405447f431ae1e5a33f42e1 by Milica Makevic <Milica.Makevic@amd.com>:
Refactor test scripts
--
fb783c968e9d2ff5d92357908d99e4952235c2bc by Milica Makevic <Milica.Makevic@amd.com>:
Cover more mgpu tests
--
1f53712274f76202241bd3631dbf065826c0b960 by Milica Makevic <Milica.Makevic@amd.com>:
Switch from rocm_gcc to rocm_ci for sgpu tests
--
00e0c8ee2a763680f5a3665dab62202ab230731d by Milica Makevic <Milica.Makevic@amd.com>:
Changing file permissions
--
003c062a8900c12b73c0972e8d406f2661a27aba by Milica Makevic <Milica.Makevic@amd.com>:
Remove unnecessary import
--
214599355f40f1b65e0540daf0b9829d2c950115 by Harsha HS <Harsha.HavanurShamsundara@amd.com>:
Add license header
Merging this change closes #32960
PiperOrigin-RevId: 822245565
This commit is contained in:
parent
7a107e3571
commit
47cd01d4a5
39
third_party/xla/build_tools/rocm/rocm_tag_filters.sh
vendored
Executable file
39
third_party/xla/build_tools/rocm/rocm_tag_filters.sh
vendored
Executable file
|
|
@ -0,0 +1,39 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
TAG_FILTERS=(
|
||||||
|
-no_gpu
|
||||||
|
-requires-gpu-intel
|
||||||
|
-requires-gpu-nvidia
|
||||||
|
-cuda-only
|
||||||
|
-oneapi-only
|
||||||
|
-requires-gpu-sm60
|
||||||
|
-requires-gpu-sm60-only
|
||||||
|
-requires-gpu-sm70
|
||||||
|
-requires-gpu-sm70-only
|
||||||
|
-requires-gpu-sm80
|
||||||
|
-requires-gpu-sm80-only
|
||||||
|
-requires-gpu-sm86
|
||||||
|
-requires-gpu-sm86-only
|
||||||
|
-requires-gpu-sm89
|
||||||
|
-requires-gpu-sm89-only
|
||||||
|
-requires-gpu-sm90
|
||||||
|
-requires-gpu-sm90-only
|
||||||
|
)
|
||||||
|
|
||||||
|
echo $(IFS=, ; echo "${TAG_FILTERS[*]}")
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
# Test-related settings.
|
# Test-related settings.
|
||||||
try-import /usertools/rocm.bazelrc
|
|
||||||
|
|
||||||
build:rocm_dev --remote_upload_local_results=false
|
build:rocm_dev --remote_upload_local_results=false
|
||||||
build:rocm_dev --remote_cache="https://wardite.cluster.engflow.com"
|
build:rocm_dev --remote_cache="https://wardite.cluster.engflow.com"
|
||||||
|
|
@ -30,3 +29,48 @@ build:tsan --//build_tools/rocm:sanitizer=tsan
|
||||||
build:asan --test_env=ASAN_OPTIONS=suppressions=build_tools/rocm/asan_ignore_list.txt:use_sigaltstack=0
|
build:asan --test_env=ASAN_OPTIONS=suppressions=build_tools/rocm/asan_ignore_list.txt:use_sigaltstack=0
|
||||||
build:asan --test_env=LSAN_OPTIONS=suppressions=build_tools/rocm/lsan_ignore_list.txt:use_sigaltstack=0
|
build:asan --test_env=LSAN_OPTIONS=suppressions=build_tools/rocm/lsan_ignore_list.txt:use_sigaltstack=0
|
||||||
build:asan --//build_tools/rocm:sanitizer=asan
|
build:asan --//build_tools/rocm:sanitizer=asan
|
||||||
|
|
||||||
|
test:xla_sgpu -- \
|
||||||
|
//xla/... \
|
||||||
|
-//xla/backends/gpu/collectives:gpu_clique_key_test \
|
||||||
|
-//xla/backends/gpu/collectives:nccl_communicator_test \
|
||||||
|
-//xla/service:collective_ops_utils_test \
|
||||||
|
-//xla/service:collective_pipeliner_test \
|
||||||
|
-//xla/service:collective_permute_cycle_test \
|
||||||
|
-//xla/service:batched_gather_scatter_normalizer_test \
|
||||||
|
-//xla/service:all_reduce_simplifier_test \
|
||||||
|
-//xla/service:all_gather_simplifier_test \
|
||||||
|
-//xla/service:reduce_scatter_decomposer_test \
|
||||||
|
-//xla/service:reduce_scatter_reassociate_test \
|
||||||
|
-//xla/service:reduce_scatter_combiner_test \
|
||||||
|
-//xla/service:scatter_simplifier_test \
|
||||||
|
-//xla/service:sharding_propagation_test \
|
||||||
|
-//xla/service:sharding_remover_test \
|
||||||
|
-//xla/service:p2p_schedule_preparation_test \
|
||||||
|
-//xla/pjrt/distributed:topology_util_test \
|
||||||
|
-//xla/pjrt/distributed:client_server_test
|
||||||
|
|
||||||
|
test:xla_mgpu -- \
|
||||||
|
//xla/tests:collective_ops_e2e_test \
|
||||||
|
//xla/tests:collective_ops_test \
|
||||||
|
//xla/tests:collective_pipeline_parallelism_test \
|
||||||
|
//xla/tests:replicated_io_feed_test \
|
||||||
|
//xla/backends/gpu/collectives:gpu_clique_key_test \
|
||||||
|
//xla/backends/gpu/collectives:nccl_communicator_test \
|
||||||
|
//xla/backends/gpu/runtime:all_reduce_test \
|
||||||
|
//xla/service:collective_ops_utils_test \
|
||||||
|
//xla/service:collective_pipeliner_test \
|
||||||
|
//xla/service:collective_permute_cycle_test \
|
||||||
|
//xla/service:batched_gather_scatter_normalizer_test \
|
||||||
|
//xla/service:all_reduce_simplifier_test \
|
||||||
|
//xla/service:all_gather_simplifier_test \
|
||||||
|
//xla/service:reduce_scatter_decomposer_test \
|
||||||
|
//xla/service:reduce_scatter_reassociate_test \
|
||||||
|
//xla/service:reduce_scatter_combiner_test \
|
||||||
|
//xla/service:scatter_simplifier_test \
|
||||||
|
//xla/service:sharding_propagation_test \
|
||||||
|
//xla/service:sharding_remover_test \
|
||||||
|
//xla/service:p2p_schedule_preparation_test \
|
||||||
|
//xla/tools/multihost_hlo_runner:functional_hlo_runner_test \
|
||||||
|
//xla/pjrt/distributed:topology_util_test \
|
||||||
|
//xla/pjrt/distributed:client_server_test
|
||||||
41
third_party/xla/build_tools/rocm/run_xla.sh
vendored
41
third_party/xla/build_tools/rocm/run_xla.sh
vendored
|
|
@ -37,30 +37,23 @@ echo ""
|
||||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s) for gpu ${AMD_GPU_GFX_ID}."
|
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s) for gpu ${AMD_GPU_GFX_ID}."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
|
||||||
if [[ -n $1 ]]; then
|
|
||||||
ROCM_INSTALL_DIR=$1
|
|
||||||
else
|
|
||||||
if [[ -z "${ROCM_PATH}" ]]; then
|
|
||||||
ROCM_INSTALL_DIR=/opt/rocm/
|
|
||||||
else
|
|
||||||
ROCM_INSTALL_DIR=$ROCM_PATH
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
export PYTHON_BIN_PATH=`which python3`
|
export PYTHON_BIN_PATH=`which python3`
|
||||||
export TF_NEED_ROCM=1
|
export TF_NEED_ROCM=1
|
||||||
export ROCM_PATH=$ROCM_INSTALL_DIR
|
export ROCM_PATH=/opt/rocm
|
||||||
TAGS_FILTER="gpu,requires-gpu-amd,-multi_gpu,-requires-gpu-nvidia,-requires-gpu-intel,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-cuda-only,-oneapi-only"
|
|
||||||
UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})"
|
|
||||||
TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}"
|
|
||||||
|
|
||||||
bazel \
|
SCRIPT_DIR=$(realpath $(dirname $0))
|
||||||
test \
|
TAG_FILTERS=$($SCRIPT_DIR/rocm_tag_filters.sh),gpu,-multi_gpu,-multi_gpu_h100,requires-gpu-amd,,-skip_rocprofiler_sdk,-no_oss,-oss_excluded,-oss_serial
|
||||||
--define xnn_enable_avxvnniint8=false --define xnn_enable_avx512fp16=false \
|
|
||||||
--config=rocm_gcc \
|
if [ ! -d /tf/pkg ]; then
|
||||||
--build_tag_filters=${TAGS_FILTER} \
|
mkdir -p /tf/pkg
|
||||||
--test_tag_filters=${TAGS_FILTER} \
|
fi
|
||||||
|
|
||||||
|
bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
|
||||||
|
--config=rocm_ci \
|
||||||
|
--config=xla_sgpu \
|
||||||
|
--build_tag_filters=$TAG_FILTERS \
|
||||||
|
--test_tag_filters=$TAG_FILTERS \
|
||||||
|
--profile=/tf/pkg/profile.json.gz \
|
||||||
--test_timeout=920,2400,7200,9600 \
|
--test_timeout=920,2400,7200,9600 \
|
||||||
--test_sharding_strategy=disabled \
|
--test_sharding_strategy=disabled \
|
||||||
--test_output=errors \
|
--test_output=errors \
|
||||||
|
|
@ -70,8 +63,6 @@ bazel \
|
||||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||||
--action_env=TF_ROCM_AMDGPU_TARGETS=${AMD_GPU_GFX_ID} \
|
--action_env=TF_ROCM_AMDGPU_TARGETS=${AMD_GPU_GFX_ID} \
|
||||||
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
|
--action_env=XLA_FLAGS="--xla_gpu_enable_llvm_module_compilation_parallelism=true --xla_gpu_force_compilation_parallelism=16" \
|
||||||
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
|
|
||||||
--repo_env="ROCM_PATH=$ROCM_PATH" \
|
--repo_env="ROCM_PATH=$ROCM_PATH" \
|
||||||
--run_under=//build_tools/ci:parallel_gpu_execute \
|
--run_under=//build_tools/ci:parallel_gpu_execute
|
||||||
-- //xla/...
|
|
||||||
|
|
|
||||||
|
|
@ -18,16 +18,22 @@
|
||||||
set -e
|
set -e
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(realpath $(dirname $0))
|
||||||
|
TAG_FILTERS=$($SCRIPT_DIR/rocm_tag_filters.sh),gpu,-multi_gpu,-multi_gpu_h100,requires-gpu-amd,,-skip_rocprofiler_sdk,-no_oss,-oss_excluded,-oss_serial
|
||||||
|
|
||||||
|
if [ ! -d /tf/pkg ]; then
|
||||||
|
mkdir -p /tf/pkg
|
||||||
|
fi
|
||||||
|
|
||||||
SCRIPT_DIR=$(dirname $0)
|
SCRIPT_DIR=$(dirname $0)
|
||||||
bazel --bazelrc="$SCRIPT_DIR/rocm_xla.bazelrc" test \
|
bazel --bazelrc="$SCRIPT_DIR/rocm_xla.bazelrc" test \
|
||||||
"$@" \
|
"$@" \
|
||||||
--test_tag_filters=gpu,requires-gpu-amd,-requires-gpu-nvidia,-requires-gpu-intel,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm,-requires-gpu-sm60,-requires-gpu-sm60-only,-requires-gpu-sm70,-requires-gpu-sm70-only,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm86,-requires-gpu-sm86-only,-requires-gpu-sm89,-requires-gpu-sm89-only,-requires-gpu-sm90,-requires-gpu-sm90-only \
|
--build_tag_filters=$TAG_FILTERS \
|
||||||
--build_tag_filters=gpu,requires-gpu-amd,-requires-gpu-nvidia,-requires-gpu-intel,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm,-requires-gpu-sm60,-requires-gpu-sm60-only,-requires-gpu-sm70,-requires-gpu-sm70-only,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm86,-requires-gpu-sm86-only,-requires-gpu-sm89,-requires-gpu-sm89-only,-requires-gpu-sm90,-requires-gpu-sm90-only \
|
--test_tag_filters=$TAG_FILTERS \
|
||||||
--profile=/tf/pkg/profile.json.gz \
|
--profile=/tf/pkg/profile.json.gz \
|
||||||
--keep_going \
|
--keep_going \
|
||||||
--test_env=TF_TESTS_PER_GPU=1 \
|
--test_env=TF_TESTS_PER_GPU=1 \
|
||||||
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
|
--action_env=XLA_FLAGS="--xla_gpu_enable_llvm_module_compilation_parallelism=true --xla_gpu_force_compilation_parallelism=16" \
|
||||||
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
|
|
||||||
--test_output=errors \
|
--test_output=errors \
|
||||||
--local_test_jobs=2 \
|
--local_test_jobs=2 \
|
||||||
--run_under=//build_tools/rocm:parallel_gpu_execute
|
--run_under=//build_tools/rocm:parallel_gpu_execute
|
||||||
|
|
|
||||||
|
|
@ -53,31 +53,23 @@ echo ""
|
||||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s) for gpu ${AMD_GPU_GFX_ID}."
|
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s) for gpu ${AMD_GPU_GFX_ID}."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
|
||||||
if [[ -n $1 ]]; then
|
|
||||||
ROCM_INSTALL_DIR=$1
|
|
||||||
else
|
|
||||||
if [[ -z "${ROCM_PATH}" ]]; then
|
|
||||||
ROCM_INSTALL_DIR=/opt/rocm/
|
|
||||||
else
|
|
||||||
ROCM_INSTALL_DIR=$ROCM_PATH
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
export PYTHON_BIN_PATH=`which python3`
|
export PYTHON_BIN_PATH=`which python3`
|
||||||
export TF_NEED_ROCM=1
|
export TF_NEED_ROCM=1
|
||||||
export ROCM_PATH=$ROCM_INSTALL_DIR
|
export ROCM_PATH=/opt/rocm/
|
||||||
TAGS_FILTER="-requires-gpu-nvidia,-oss_excluded,-oss_serial"
|
|
||||||
UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})"
|
|
||||||
TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}"
|
|
||||||
|
|
||||||
bazel \
|
SCRIPT_DIR=$(realpath $(dirname $0))
|
||||||
test \
|
TAG_FILTERS="$($SCRIPT_DIR/rocm_tag_filters.sh)"
|
||||||
--define xnn_enable_avxvnniint8=false \
|
|
||||||
--define xnn_enable_avx512fp16=false \
|
if [ ! -d /tf/pkg ]; then
|
||||||
--config=rocm_gcc \
|
mkdir -p /tf/pkg
|
||||||
--build_tag_filters=${TAGS_FILTER} \
|
fi
|
||||||
--test_tag_filters=${TAGS_FILTER} \
|
|
||||||
|
bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
|
||||||
|
--config=rocm_ci \
|
||||||
|
--config=xla_mgpu \
|
||||||
|
--build_tag_filters=${TAG_FILTERS} \
|
||||||
|
--test_tag_filters=${TAG_FILTERS} \
|
||||||
|
--profile=/tf/pkg/profile.json.gz \
|
||||||
--test_timeout=920,2400,7200,9600 \
|
--test_timeout=920,2400,7200,9600 \
|
||||||
--test_sharding_strategy=disabled \
|
--test_sharding_strategy=disabled \
|
||||||
--test_output=errors \
|
--test_output=errors \
|
||||||
|
|
@ -90,12 +82,4 @@ bazel \
|
||||||
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
|
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
|
||||||
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
|
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
|
||||||
--action_env=NCCL_MAX_NCHANNELS=1 \
|
--action_env=NCCL_MAX_NCHANNELS=1 \
|
||||||
--repo_env="ROCM_PATH=$ROCM_PATH" \
|
--repo_env="ROCM_PATH=$ROCM_PATH"
|
||||||
-- //xla/tests:collective_ops_e2e_test \
|
|
||||||
//xla/tests:collective_ops_test \
|
|
||||||
//xla/tests:collective_pipeline_parallelism_test \
|
|
||||||
//xla/tests:replicated_io_feed_test \
|
|
||||||
//xla/tools/multihost_hlo_runner:functional_hlo_runner_test \
|
|
||||||
//xla/pjrt/distributed:topology_util_test \
|
|
||||||
//xla/pjrt/distributed:client_server_test \
|
|
||||||
//xla/backends/gpu/runtime:all_reduce_test
|
|
||||||
|
|
|
||||||
|
|
@ -166,6 +166,7 @@ cuda_library(
|
||||||
testonly = 1,
|
testonly = 1,
|
||||||
srcs = ["cuda_test.cu.cc"],
|
srcs = ["cuda_test.cu.cc"],
|
||||||
hdrs = ["cuda_test.h"],
|
hdrs = ["cuda_test.h"],
|
||||||
|
tags = ["cuda-only"],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
"@com_google_googletest//:gtest_for_library",
|
"@com_google_googletest//:gtest_for_library",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user