[ROCM][CI] Introduce tests-to-include as rocm-test workflow input (#110511)

Fixes https://github.com/pytorch/pytorch/issues/110181

Pull Request resolved: https://github.com/pytorch/pytorch/pull/110511
Approved by: https://github.com/huydhn
This commit is contained in:
Jithun Nair 2023-11-13 21:25:49 +00:00 committed by PyTorch MergeBot
parent 2ea3d64f47
commit b01e89587e
3 changed files with 46 additions and 3 deletions

View File

@ -80,6 +80,11 @@ if [[ "$BUILD_ENVIRONMENT" != *bazel* ]]; then
CUSTOM_TEST_ARTIFACT_BUILD_DIR=$(realpath "${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}") CUSTOM_TEST_ARTIFACT_BUILD_DIR=$(realpath "${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}")
fi fi
# Reduce set of tests to include when running run_test.py
if [[ -n $TESTS_TO_INCLUDE ]]; then
echo "Setting INCLUDE_CLAUSE"
INCLUDE_CLAUSE="--include $TESTS_TO_INCLUDE"
fi
# shellcheck source=./common.sh # shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh" source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
@ -228,13 +233,16 @@ test_python_shard() {
exit 1 exit 1
fi fi
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS" --verbose # Bare --include flag is not supported and quoting for lint ends up with flag not being interpreted correctly
# shellcheck disable=SC2086
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose
assert_git_not_dirty assert_git_not_dirty
} }
test_python() { test_python() {
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --verbose # shellcheck disable=SC2086
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose
assert_git_not_dirty assert_git_not_dirty
} }
@ -681,7 +689,8 @@ test_vulkan() {
test_distributed() { test_distributed() {
echo "Testing distributed python tests" echo "Testing distributed python tests"
time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose # shellcheck disable=SC2086
time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" $INCLUDE_CLAUSE --verbose
assert_git_not_dirty assert_git_not_dirty
if [[ ("$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm*) && "$SHARD_NUMBER" == 1 ]]; then if [[ ("$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm*) && "$SHARD_NUMBER" == 1 ]]; then
@ -1092,6 +1101,10 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then
test_libtorch test_libtorch
elif [[ "${TEST_CONFIG}" = docs_test ]]; then elif [[ "${TEST_CONFIG}" = docs_test ]]; then
test_docs_test test_docs_test
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
install_torchvision
test_python
test_aten
else else
install_torchvision install_torchvision
install_monkeytype install_monkeytype

View File

@ -32,6 +32,12 @@ on:
default: 300 default: 300
description: | description: |
Set the maximum (in minutes) how long the workflow should take to finish Set the maximum (in minutes) how long the workflow should take to finish
tests-to-include:
required: false
type: string
default: ""
description: |
List of tests to include (empty string implies default list)
env: env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
@ -136,6 +142,7 @@ jobs:
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }} PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }} timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
run: | run: |
set -x set -x
@ -180,6 +187,7 @@ jobs:
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \ -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \ -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \ -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-e TESTS_TO_INCLUDE \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--ulimit stack=10485760:83886080 \ --ulimit stack=10485760:83886080 \
--ulimit core=0 \ --ulimit core=0 \

View File

@ -174,3 +174,25 @@ jobs:
{ config: "default", shard: 6, num_shards: 6, runner: "windows.g5.4xlarge.nvidia.gpu" }, { config: "default", shard: 6, num_shards: 6, runner: "windows.g5.4xlarge.nvidia.gpu" },
{ config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge.nonephemeral" }, { config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge.nonephemeral" },
]} ]}
linux-focal-rocm5_7-py3_8-build:
name: linux-focal-rocm5.7-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.7-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_7-py3_8-test:
name: linux-focal-rocm5.7-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_7-py3_8-build
with:
build-environment: linux-focal-rocm5.7-py3.8
docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd"