From fbd08fb358b643386edd4dd28b9c747aab4ba8c1 Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Fri, 4 Nov 2022 04:43:05 +0000 Subject: [PATCH] Introduce TORCH_DISABLE_GPU_ASSERTS (#84190) - Asserts for CUDA are enabled by default - Disabled for ROCm by default by setting `TORCH_DISABLE_GPU_ASSERTS` to `ON` - Can be enabled for ROCm by setting above variable to`OFF` during build or can be forcefully enabled by setting `ROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON` This is follow up changes as per comment in PR #81790, comment [link](https://github.com/pytorch/pytorch/pull/81790#issuecomment-1215929021) Pull Request resolved: https://github.com/pytorch/pytorch/pull/84190 Approved by: https://github.com/jeffdaily, https://github.com/malfet --- CMakeLists.txt | 1 + c10/macros/Macros.h | 11 ++++++----- caffe2/core/macros.h.in | 2 ++ cmake/Dependencies.cmake | 10 ++++++++++ cmake/Summary.cmake | 1 + cmake/public/LoadHIP.cmake | 16 ---------------- docs/source/notes/hip.rst | 11 +++++++++++ 7 files changed, 31 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b1e0f517faf..6efd3f2df93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -285,6 +285,7 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER}) endif() option(USE_ZMQ "Use ZMQ" OFF) option(USE_ZSTD "Use ZSTD" OFF) +option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF) # Ensure that an ITT build is the default for x86 CPUs cmake_dependent_option( USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h index e77fa0fde2e..31cd2219d10 100644 --- a/c10/macros/Macros.h +++ b/c10/macros/Macros.h @@ -326,9 +326,8 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; // CUDA_KERNEL_ASSERT checks the assertion // even when NDEBUG is defined. This is useful for important assertions in CUDA // code that would otherwise be suppressed when building Release. -#if defined(__ANDROID__) || defined(__APPLE__) || \ - (defined(USE_ROCM) && ROCM_VERSION < 40100) || \ - (defined(USE_ROCM) && defined(ROCM_DISABLE_GPU_ASSERTS)) +#if defined(__ANDROID__) || defined(__APPLE__) || \ + (defined(USE_ROCM) && ROCM_VERSION < 40100) // Those platforms do not support assert() #define CUDA_KERNEL_ASSERT(cond) #define SYCL_KERNEL_ASSERT(cond) @@ -368,7 +367,9 @@ extern SYCL_EXTERNAL void __assert_fail( unsigned int line, const char* func); #else // __SYCL_DEVICE_ONLY__ -#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__))) +#if ( \ + defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \ + !defined(TORCH_DISABLE_GPU_ASSERTS)) // CUDA supports __assert_fail function which are common for both device // and host side code. __host__ __device__ @@ -386,7 +387,7 @@ __host__ __device__ const char* function) throw() __attribute__((__noreturn__)); #if (defined(__HIP_ARCH__) || defined(__HIP__)) && \ - !defined(ROCM_DISABLE_GPU_ASSERTS) + !defined(TORCH_DISABLE_GPU_ASSERTS) // ROCm supports __assert_fail only as a device side function. __device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( const char* assertion, diff --git a/caffe2/core/macros.h.in b/caffe2/core/macros.h.in index 9c9f7345756..2d9f03e94c0 100644 --- a/caffe2/core/macros.h.in +++ b/caffe2/core/macros.h.in @@ -44,6 +44,7 @@ static_assert( #cmakedefine CAFFE2_USE_NVTX #cmakedefine CAFFE2_USE_ITT #cmakedefine CAFFE2_USE_TRT +#cmakedefine TORCH_DISABLE_GPU_ASSERTS #ifndef EIGEN_MPL2_ONLY #cmakedefine EIGEN_MPL2_ONLY @@ -85,4 +86,5 @@ static_assert( {"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \ {"USE_ITT", "${CAFFE2_USE_ITT}"}, \ {"USE_TRT", "${CAFFE2_USE_TRT}"}, \ + {"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \ } diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index e232fcb624c..cf3c2c2caaf 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1248,6 +1248,16 @@ if(ANDROID) list(APPEND Caffe2_DEPENDENCY_LIBS log) endif() +# ---[ Kernel asserts +# Kernel asserts are enabled by default for CUDA and disabled for ROCm. +# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS +if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS) + message(STATUS "Forcefully enabling kernel asserts on ROCM") +elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS) + message(STATUS "Disabling kernel asserts for ROCm") + caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON) +endif() + # ---[ LLVM if(USE_LLVM) message(STATUS "Looking for LLVM in ${USE_LLVM}") diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index fd6444680e2..279d72a41e6 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -199,4 +199,5 @@ function(caffe2_print_configuration_summary) # coreml message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}") message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}") + message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}") endfunction() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake index 89a61b62428..b51284115f1 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -143,9 +143,6 @@ message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}") # Add HIP to the CMAKE Module Path set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH}) -#Disable kernel assert due to performance regression -set(ROCM_ENABLE_KERNEL_ASSERTS FALSE CACHE BOOL "Kernel asserts are disabled by default for ROCm") - macro(find_package_and_print_version PACKAGE_NAME) find_package("${PACKAGE_NAME}" ${ARGN}) message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}") @@ -286,19 +283,6 @@ if(HIP_FOUND) find_package_and_print_version(hipcub REQUIRED) find_package_and_print_version(rocthrust REQUIRED) - if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0") - if(ROCM_ENABLE_KERNEL_ASSERTS) - message("ROCm version >= 4.1; enabling asserts") - else() - add_definitions(-DROCM_DISABLE_GPU_ASSERTS) - message("ROCm version >= 4.1; kernel asserts are disabled") - endif() - else() - # Disable Asserts In Code (Can't use asserts on HIP stack.) - add_definitions(-DNDEBUG) - message("ROCm version < 4.1; disablng asserts") - endif() - if(HIP_COMPILER STREQUAL clang) set(hip_library_name amdhip64) else() diff --git a/docs/source/notes/hip.rst b/docs/source/notes/hip.rst index a9c94e2a4fe..c54e2014897 100644 --- a/docs/source/notes/hip.rst +++ b/docs/source/notes/hip.rst @@ -144,3 +144,14 @@ Refer to CUDA Semantics doc --------------------------- For any sections not listed here, please refer to the CUDA semantics doc: :ref:`cuda-semantics` + + +Enabling kernel asserts +----------------------- + +Kernel asserts are supported on ROCm, but they are disabled due to performance overhead. It can be enabled +by recompiling the PyTorch from source. + +Please add below line as an argument to cmake command parameters:: + + -DROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON