mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/33574 Sprinkle with Clang identification macro places that otherwise would cause build errors when Clang is used to drive the CUDA compilation. Note: `__clang__` is defined when either Clang is used as host compiler by NVCC or when Clang drives the compilation. `__CUDA__` is defined only for the latter case. Test Plan: ```lang=bash buck build mode/opt -c fbcode.cuda_use_clang=true //fblearner/flow/projects/dper:workflow buck build mode/opt //fblearner/flow/projects/dper:workflow ``` Reviewed By: BIT-silence Differential Revision: D20007440 fbshipit-source-id: 53caa70695b99461a3910d41dc71a9f6d0728a75
47 lines
928 B
C++
47 lines
928 B
C++
#pragma once
|
|
|
|
#include <caffe2/core/types.h>
|
|
|
|
#ifdef __CUDA_ARCH__
|
|
// Proxy for including cuda_fp16.h, because common_gpu.h
|
|
// has necessary diagnostic guards.
|
|
#include <caffe2/core/common_gpu.h>
|
|
#endif
|
|
#ifdef __HIP_DEVICE_COMPILE__
|
|
#include <caffe2/core/hip/common_gpu.h>
|
|
#endif
|
|
|
|
// See Note [hip-clang differences to hcc]
|
|
|
|
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || \
|
|
defined(__HIP__) || (defined(__clang__) && defined(__CUDA__))
|
|
#define CONVERSIONS_DECL __host__ __device__ inline
|
|
#else
|
|
#define CONVERSIONS_DECL inline
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#undef IN
|
|
#undef OUT
|
|
#endif
|
|
|
|
namespace caffe2 {
|
|
|
|
namespace convert {
|
|
|
|
template <typename IN, typename OUT>
|
|
CONVERSIONS_DECL OUT To(const IN in) {
|
|
return static_cast<OUT>(in);
|
|
}
|
|
|
|
template <typename OUT, typename IN>
|
|
CONVERSIONS_DECL OUT Get(IN x) {
|
|
return static_cast<OUT>(x);
|
|
}
|
|
|
|
}; // namespace convert
|
|
|
|
}; // namespace caffe2
|
|
|
|
#undef CONVERSIONS_DECL
|