[Experimentation] Add TSAN build and test (#85313)

Some parts of the PR are adopted from the previously abandoned https://github.com/pytorch/pytorch/pull/36694.  This PR is the first part to setup TSAN jobs in the CI.  The data race warnings from TSAN will need to be reviewed later in a separate PR.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/85313
Approved by: https://github.com/osalpekar
This commit is contained in:
Huy Do 2022-10-11 19:34:44 +00:00 committed by PyTorch MergeBot
parent 92562046e9
commit 7f02f2ac0c
7 changed files with 78 additions and 4 deletions

View File

@ -27,6 +27,7 @@ VALID_TEST_CONFIG_LABELS = {f"{PREFIX}{label}" for label in {
"nogpu_AVX512",
"nogpu_NO_AVX2",
"slow",
"tsan",
"xla",
}}

View File

@ -120,6 +120,26 @@ jobs:
docker-image: ${{ needs.linux-bionic-py3_7-clang9-slow-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-py3_7-clang9-slow-build.outputs.test-matrix }}
linux-focal-py3_7-clang7-tsan-build:
name: linux-focal-py3.7-clang7-tsan
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-py3.7-clang7-tsan
docker-image-name: pytorch-linux-focal-py3-clang7-asan
test-matrix: |
{ include: [
{ config: "tsan", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
]}
linux-focal-py3_7-clang7-tsan-test:
name: linux-focal-py3.7-clang7-tsan
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-py3_7-clang7-tsan-build
with:
build-environment: linux-focal-py3.7-clang7-tsan
docker-image: ${{ needs.linux-focal-py3_7-clang7-tsan-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-py3_7-clang7-tsan-build.outputs.test-matrix }}
ios-12-5-1-x86-64:
name: ios-12-5-1-x86-64
uses: ./.github/workflows/_ios-build-test.yml

29
.jenkins/pytorch/build-tsan.sh Executable file
View File

@ -0,0 +1,29 @@
#!/bin/bash
# Required environment variable: $BUILD_ENVIRONMENT
# (This is set by default in the Docker images we build, so you don't
# need to set it yourself.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
echo "Clang version:"
clang --version
python tools/stats/export_test_times.py
if [ -n "$(which conda)" ]; then
export CMAKE_PREFIX_PATH=/opt/conda
fi
CC="clang" CXX="clang++" LDSHARED="clang --shared" \
CFLAGS="-fsanitize=thread" \
USE_TSAN=1 USE_CUDA=0 USE_MKLDNN=0 \
python setup.py bdist_wheel
python -mpip install dist/*.whl
print_sccache_stats
assert_git_not_dirty

View File

@ -15,6 +15,10 @@ if [[ "$BUILD_ENVIRONMENT" == *-clang7-asan* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *-clang7-tsan* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-tsan.sh" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
fi

View File

@ -186,6 +186,10 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
(cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)")
fi
if [[ "$BUILD_ENVIRONMENT" == *-tsan* ]]; then
export PYTORCH_TEST_WITH_TSAN=1
fi
if [[ $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then
export ATEN_CPU_CAPABILITY=default
elif [[ $TEST_CONFIG == 'nogpu_AVX512' ]]; then
@ -333,8 +337,11 @@ test_libtorch() {
TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch
mkdir -p $TEST_REPORTS_DIR
if [[ "$BUILD_ENVIRONMENT" != *-tsan* ]]; then
# Run JIT cpp tests
python test/cpp/jit/tests_setup.py setup
fi
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
"$TORCH_BIN_DIR"/test_jit --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml
else
@ -348,7 +355,10 @@ test_libtorch() {
"$TORCH_BIN_DIR"/test_lazy --gtest_output=xml:$TEST_REPORTS_DIR/test_lazy.xml
fi
if [[ "$BUILD_ENVIRONMENT" != *-tsan* ]]; then
python test/cpp/jit/tests_setup.py shutdown
fi
# Wait for background download to finish
wait
# Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy.
@ -659,7 +669,7 @@ test_docs_test() {
.jenkins/pytorch/docs-test.sh
}
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* || "${BUILD_ENVIRONMENT}" == *-tsan* ]]; then
(cd test && python -c "import torch; print(torch.__config__.show())")
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
fi
@ -720,6 +730,10 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
test_bazel
elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then
test_libtorch
elif [[ "${BUILD_ENVIRONMENT}" == *-tsan* ]]; then
# TODO: TSAN check is currently failing with 415 data race warnings. This will
# be addressed later, the first PR can be merged first to setup the CI jobs
test_libtorch || true
elif [[ "${TEST_CONFIG}" = docs_test ]]; then
test_docs_test
elif [[ "${TEST_CONFIG}" == *functorch* ]]; then

View File

@ -937,6 +937,11 @@ if(USE_ASAN)
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fsanitize=address")
endif()
if(USE_TSAN)
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fsanitize=thread")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fsanitize=thread")
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
include(CheckCSourceCompiles)
check_c_source_compiles("#include <arm_neon.h>

View File

@ -70,6 +70,7 @@ function(caffe2_print_configuration_summary)
message(STATUS " LAPACK : ${LAPACK_INFO}")
endif()
message(STATUS " USE_ASAN : ${USE_ASAN}")
message(STATUS " USE_TSAN : ${USE_TSAN}")
message(STATUS " USE_CPP_CODE_COVERAGE : ${USE_CPP_CODE_COVERAGE}")
message(STATUS " USE_CUDA : ${USE_CUDA}")
if(${USE_CUDA})