mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Introduce TORCH_DISABLE_GPU_ASSERTS (#84190)
- Asserts for CUDA are enabled by default - Disabled for ROCm by default by setting `TORCH_DISABLE_GPU_ASSERTS` to `ON` - Can be enabled for ROCm by setting above variable to`OFF` during build or can be forcefully enabled by setting `ROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON` This is follow up changes as per comment in PR #81790, comment [link](https://github.com/pytorch/pytorch/pull/81790#issuecomment-1215929021) Pull Request resolved: https://github.com/pytorch/pytorch/pull/84190 Approved by: https://github.com/jeffdaily, https://github.com/malfet
This commit is contained in:
parent
70b00b1383
commit
fbd08fb358
|
|
@ -285,6 +285,7 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER})
|
||||||
endif()
|
endif()
|
||||||
option(USE_ZMQ "Use ZMQ" OFF)
|
option(USE_ZMQ "Use ZMQ" OFF)
|
||||||
option(USE_ZSTD "Use ZSTD" OFF)
|
option(USE_ZSTD "Use ZSTD" OFF)
|
||||||
|
option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF)
|
||||||
# Ensure that an ITT build is the default for x86 CPUs
|
# Ensure that an ITT build is the default for x86 CPUs
|
||||||
cmake_dependent_option(
|
cmake_dependent_option(
|
||||||
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
|
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
|
||||||
|
|
|
||||||
|
|
@ -326,9 +326,8 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
|
||||||
// CUDA_KERNEL_ASSERT checks the assertion
|
// CUDA_KERNEL_ASSERT checks the assertion
|
||||||
// even when NDEBUG is defined. This is useful for important assertions in CUDA
|
// even when NDEBUG is defined. This is useful for important assertions in CUDA
|
||||||
// code that would otherwise be suppressed when building Release.
|
// code that would otherwise be suppressed when building Release.
|
||||||
#if defined(__ANDROID__) || defined(__APPLE__) || \
|
#if defined(__ANDROID__) || defined(__APPLE__) || \
|
||||||
(defined(USE_ROCM) && ROCM_VERSION < 40100) || \
|
(defined(USE_ROCM) && ROCM_VERSION < 40100)
|
||||||
(defined(USE_ROCM) && defined(ROCM_DISABLE_GPU_ASSERTS))
|
|
||||||
// Those platforms do not support assert()
|
// Those platforms do not support assert()
|
||||||
#define CUDA_KERNEL_ASSERT(cond)
|
#define CUDA_KERNEL_ASSERT(cond)
|
||||||
#define SYCL_KERNEL_ASSERT(cond)
|
#define SYCL_KERNEL_ASSERT(cond)
|
||||||
|
|
@ -368,7 +367,9 @@ extern SYCL_EXTERNAL void __assert_fail(
|
||||||
unsigned int line,
|
unsigned int line,
|
||||||
const char* func);
|
const char* func);
|
||||||
#else // __SYCL_DEVICE_ONLY__
|
#else // __SYCL_DEVICE_ONLY__
|
||||||
#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)))
|
#if ( \
|
||||||
|
defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \
|
||||||
|
!defined(TORCH_DISABLE_GPU_ASSERTS))
|
||||||
// CUDA supports __assert_fail function which are common for both device
|
// CUDA supports __assert_fail function which are common for both device
|
||||||
// and host side code.
|
// and host side code.
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
|
|
@ -386,7 +387,7 @@ __host__ __device__
|
||||||
const char* function) throw() __attribute__((__noreturn__));
|
const char* function) throw() __attribute__((__noreturn__));
|
||||||
|
|
||||||
#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \
|
#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \
|
||||||
!defined(ROCM_DISABLE_GPU_ASSERTS)
|
!defined(TORCH_DISABLE_GPU_ASSERTS)
|
||||||
// ROCm supports __assert_fail only as a device side function.
|
// ROCm supports __assert_fail only as a device side function.
|
||||||
__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail(
|
__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail(
|
||||||
const char* assertion,
|
const char* assertion,
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ static_assert(
|
||||||
#cmakedefine CAFFE2_USE_NVTX
|
#cmakedefine CAFFE2_USE_NVTX
|
||||||
#cmakedefine CAFFE2_USE_ITT
|
#cmakedefine CAFFE2_USE_ITT
|
||||||
#cmakedefine CAFFE2_USE_TRT
|
#cmakedefine CAFFE2_USE_TRT
|
||||||
|
#cmakedefine TORCH_DISABLE_GPU_ASSERTS
|
||||||
|
|
||||||
#ifndef EIGEN_MPL2_ONLY
|
#ifndef EIGEN_MPL2_ONLY
|
||||||
#cmakedefine EIGEN_MPL2_ONLY
|
#cmakedefine EIGEN_MPL2_ONLY
|
||||||
|
|
@ -85,4 +86,5 @@ static_assert(
|
||||||
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
|
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
|
||||||
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
|
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
|
||||||
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
|
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
|
||||||
|
{"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1248,6 +1248,16 @@ if(ANDROID)
|
||||||
list(APPEND Caffe2_DEPENDENCY_LIBS log)
|
list(APPEND Caffe2_DEPENDENCY_LIBS log)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# ---[ Kernel asserts
|
||||||
|
# Kernel asserts are enabled by default for CUDA and disabled for ROCm.
|
||||||
|
# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS
|
||||||
|
if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS)
|
||||||
|
message(STATUS "Forcefully enabling kernel asserts on ROCM")
|
||||||
|
elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS)
|
||||||
|
message(STATUS "Disabling kernel asserts for ROCm")
|
||||||
|
caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
# ---[ LLVM
|
# ---[ LLVM
|
||||||
if(USE_LLVM)
|
if(USE_LLVM)
|
||||||
message(STATUS "Looking for LLVM in ${USE_LLVM}")
|
message(STATUS "Looking for LLVM in ${USE_LLVM}")
|
||||||
|
|
|
||||||
|
|
@ -199,4 +199,5 @@ function(caffe2_print_configuration_summary)
|
||||||
# coreml
|
# coreml
|
||||||
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")
|
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")
|
||||||
message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}")
|
message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}")
|
||||||
|
message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}")
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
|
||||||
|
|
@ -143,9 +143,6 @@ message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
|
||||||
# Add HIP to the CMAKE Module Path
|
# Add HIP to the CMAKE Module Path
|
||||||
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})
|
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})
|
||||||
|
|
||||||
#Disable kernel assert due to performance regression
|
|
||||||
set(ROCM_ENABLE_KERNEL_ASSERTS FALSE CACHE BOOL "Kernel asserts are disabled by default for ROCm")
|
|
||||||
|
|
||||||
macro(find_package_and_print_version PACKAGE_NAME)
|
macro(find_package_and_print_version PACKAGE_NAME)
|
||||||
find_package("${PACKAGE_NAME}" ${ARGN})
|
find_package("${PACKAGE_NAME}" ${ARGN})
|
||||||
message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
|
message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
|
||||||
|
|
@ -286,19 +283,6 @@ if(HIP_FOUND)
|
||||||
find_package_and_print_version(hipcub REQUIRED)
|
find_package_and_print_version(hipcub REQUIRED)
|
||||||
find_package_and_print_version(rocthrust REQUIRED)
|
find_package_and_print_version(rocthrust REQUIRED)
|
||||||
|
|
||||||
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
|
|
||||||
if(ROCM_ENABLE_KERNEL_ASSERTS)
|
|
||||||
message("ROCm version >= 4.1; enabling asserts")
|
|
||||||
else()
|
|
||||||
add_definitions(-DROCM_DISABLE_GPU_ASSERTS)
|
|
||||||
message("ROCm version >= 4.1; kernel asserts are disabled")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
# Disable Asserts In Code (Can't use asserts on HIP stack.)
|
|
||||||
add_definitions(-DNDEBUG)
|
|
||||||
message("ROCm version < 4.1; disablng asserts")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(HIP_COMPILER STREQUAL clang)
|
if(HIP_COMPILER STREQUAL clang)
|
||||||
set(hip_library_name amdhip64)
|
set(hip_library_name amdhip64)
|
||||||
else()
|
else()
|
||||||
|
|
|
||||||
|
|
@ -144,3 +144,14 @@ Refer to CUDA Semantics doc
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
For any sections not listed here, please refer to the CUDA semantics doc: :ref:`cuda-semantics`
|
For any sections not listed here, please refer to the CUDA semantics doc: :ref:`cuda-semantics`
|
||||||
|
|
||||||
|
|
||||||
|
Enabling kernel asserts
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Kernel asserts are supported on ROCm, but they are disabled due to performance overhead. It can be enabled
|
||||||
|
by recompiling the PyTorch from source.
|
||||||
|
|
||||||
|
Please add below line as an argument to cmake command parameters::
|
||||||
|
|
||||||
|
-DROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user