Introduce TORCH_DISABLE_GPU_ASSERTS (#84190)

- Asserts for CUDA are enabled by default
- Disabled for ROCm by default by setting `TORCH_DISABLE_GPU_ASSERTS` to `ON`
- Can be enabled for ROCm by setting above variable to`OFF` during build or can be forcefully enabled by setting `ROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON`

This is follow up changes as per comment in PR #81790, comment [link](https://github.com/pytorch/pytorch/pull/81790#issuecomment-1215929021)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/84190
Approved by: https://github.com/jeffdaily, https://github.com/malfet
This commit is contained in:
Pruthvi Madugundu 2022-11-04 04:43:05 +00:00 committed by PyTorch MergeBot
parent 70b00b1383
commit fbd08fb358
7 changed files with 31 additions and 21 deletions

View File

@ -285,6 +285,7 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER})
endif()
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ZSTD "Use ZSTD" OFF)
option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF)
# Ensure that an ITT build is the default for x86 CPUs
cmake_dependent_option(
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON

View File

@ -326,9 +326,8 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
// CUDA_KERNEL_ASSERT checks the assertion
// even when NDEBUG is defined. This is useful for important assertions in CUDA
// code that would otherwise be suppressed when building Release.
#if defined(__ANDROID__) || defined(__APPLE__) || \
(defined(USE_ROCM) && ROCM_VERSION < 40100) || \
(defined(USE_ROCM) && defined(ROCM_DISABLE_GPU_ASSERTS))
#if defined(__ANDROID__) || defined(__APPLE__) || \
(defined(USE_ROCM) && ROCM_VERSION < 40100)
// Those platforms do not support assert()
#define CUDA_KERNEL_ASSERT(cond)
#define SYCL_KERNEL_ASSERT(cond)
@ -368,7 +367,9 @@ extern SYCL_EXTERNAL void __assert_fail(
unsigned int line,
const char* func);
#else // __SYCL_DEVICE_ONLY__
#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)))
#if ( \
defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \
!defined(TORCH_DISABLE_GPU_ASSERTS))
// CUDA supports __assert_fail function which are common for both device
// and host side code.
__host__ __device__
@ -386,7 +387,7 @@ __host__ __device__
const char* function) throw() __attribute__((__noreturn__));
#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \
!defined(ROCM_DISABLE_GPU_ASSERTS)
!defined(TORCH_DISABLE_GPU_ASSERTS)
// ROCm supports __assert_fail only as a device side function.
__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail(
const char* assertion,

View File

@ -44,6 +44,7 @@ static_assert(
#cmakedefine CAFFE2_USE_NVTX
#cmakedefine CAFFE2_USE_ITT
#cmakedefine CAFFE2_USE_TRT
#cmakedefine TORCH_DISABLE_GPU_ASSERTS
#ifndef EIGEN_MPL2_ONLY
#cmakedefine EIGEN_MPL2_ONLY
@ -85,4 +86,5 @@ static_assert(
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
{"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \
}

View File

@ -1248,6 +1248,16 @@ if(ANDROID)
list(APPEND Caffe2_DEPENDENCY_LIBS log)
endif()
# ---[ Kernel asserts
# Kernel asserts are enabled by default for CUDA and disabled for ROCm.
# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS
if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS)
message(STATUS "Forcefully enabling kernel asserts on ROCM")
elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS)
message(STATUS "Disabling kernel asserts for ROCm")
caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON)
endif()
# ---[ LLVM
if(USE_LLVM)
message(STATUS "Looking for LLVM in ${USE_LLVM}")

View File

@ -199,4 +199,5 @@ function(caffe2_print_configuration_summary)
# coreml
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")
message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}")
message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}")
endfunction()

View File

@ -143,9 +143,6 @@ message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
# Add HIP to the CMAKE Module Path
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})
#Disable kernel assert due to performance regression
set(ROCM_ENABLE_KERNEL_ASSERTS FALSE CACHE BOOL "Kernel asserts are disabled by default for ROCm")
macro(find_package_and_print_version PACKAGE_NAME)
find_package("${PACKAGE_NAME}" ${ARGN})
message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
@ -286,19 +283,6 @@ if(HIP_FOUND)
find_package_and_print_version(hipcub REQUIRED)
find_package_and_print_version(rocthrust REQUIRED)
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
if(ROCM_ENABLE_KERNEL_ASSERTS)
message("ROCm version >= 4.1; enabling asserts")
else()
add_definitions(-DROCM_DISABLE_GPU_ASSERTS)
message("ROCm version >= 4.1; kernel asserts are disabled")
endif()
else()
# Disable Asserts In Code (Can't use asserts on HIP stack.)
add_definitions(-DNDEBUG)
message("ROCm version < 4.1; disablng asserts")
endif()
if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()

View File

@ -144,3 +144,14 @@ Refer to CUDA Semantics doc
---------------------------
For any sections not listed here, please refer to the CUDA semantics doc: :ref:`cuda-semantics`
Enabling kernel asserts
-----------------------
Kernel asserts are supported on ROCm, but they are disabled due to performance overhead. It can be enabled
by recompiling the PyTorch from source.
Please add below line as an argument to cmake command parameters::
-DROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON