pytorch/c10/cuda/CUDAException.h
Richard Barnes 1249d490de Add additional CUDA error handling macros (#74865)
Summary:
Introduces additional ways of handling CUDA errors that allow automated linters to detect if errors are being handled.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/74865

Test Plan: Sandcastle

Reviewed By: ngimel

Differential Revision: D35194530

fbshipit-source-id: f4fe61594edbfd81e97a4b605935961b893df167
(cherry picked from commit 919ddf677c5b9b46c5e493ed64346a5f2527bf08)
2022-03-29 18:03:03 +00:00

90 lines
4.1 KiB
C++

#pragma once
#include <c10/cuda/CUDAMacros.h>
#include <c10/cuda/CUDAMiscFunctions.h>
#include <c10/macros/Macros.h>
#include <c10/util/Exception.h>
#include <cuda.h>
// Note [CHECK macro]
// ~~~~~~~~~~~~~~~~~~
// This is a macro so that AT_ERROR can get accurate __LINE__
// and __FILE__ information. We could split this into a short
// macro and a function implementation if we pass along __LINE__
// and __FILE__, but no one has found this worth doing.
// Used to denote errors from CUDA framework.
// This needs to be declared here instead util/Exception.h for proper conversion
// during hipify.
namespace c10 {
class C10_CUDA_API CUDAError : public c10::Error {
using Error::Error;
};
} // namespace c10
// For CUDA Runtime API
#ifdef STRIP_ERROR_MESSAGES
#define C10_CUDA_CHECK(EXPR) \
do { \
cudaError_t __err = EXPR; \
if (__err != cudaSuccess) { \
throw c10::CUDAError( \
{__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
TORCH_CHECK_MSG(false, "")); \
} \
} while (0)
#else
#define C10_CUDA_CHECK(EXPR) \
do { \
cudaError_t __err = EXPR; \
if (__err != cudaSuccess) { \
auto error_unused C10_UNUSED = cudaGetLastError(); \
(void)error_unused; \
auto _cuda_check_suffix = c10::cuda::get_cuda_check_suffix(); \
throw c10::CUDAError( \
{__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
TORCH_CHECK_MSG( \
false, \
"", \
"CUDA error: ", \
cudaGetErrorString(__err), \
_cuda_check_suffix)); \
} \
} while (0)
#endif
#define C10_CUDA_CHECK_WARN(EXPR) \
do { \
cudaError_t __err = EXPR; \
if (__err != cudaSuccess) { \
auto error_unused C10_UNUSED = cudaGetLastError(); \
(void)error_unused; \
TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
} \
} while (0)
// Indicates that a CUDA error is handled in a non-standard way
#define C10_CUDA_ERROR_HANDLED(EXPR) EXPR
// Intentionally ignore a CUDA error
#define C10_CUDA_IGNORE_ERROR(EXPR) \
do { \
cudaError_t __err = EXPR; \
if (__err != cudaSuccess) { \
cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
(void)error_unused; \
} \
} while (0)
// Clear the last CUDA error
#define C10_CUDA_CLEAR_ERROR() \
do { \
cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
(void)error_unused; \
} while (0)
// This should be used directly after every kernel launch to ensure
// the launch happened correctly and provide an early, close-to-source
// diagnostic if it didn't.
#define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())