mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
[PyTorch] Half: don't disable direct conversion to/from float on mobile (#130465)
As far as I can tell, `FCVT` (https://developer.arm.com/documentation/ddi0602/2024-06/SIMD-FP-Instructions/FCVT--Floating-point-convert-precision--scalar--?lang=en) is part of the base aarch64 instruction set, so it should work fine on mobile. Differential Revision: [D59589733](https://our.internmc.facebook.com/intern/diff/D59589733/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/130465 Approved by: https://github.com/ezyang, https://github.com/malfet
This commit is contained in:
parent
d727e2f2d1
commit
af4da0799c
|
|
@ -32,7 +32,7 @@ C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
|
|||
|
||||
namespace c10 {
|
||||
|
||||
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||
#if defined(__aarch64__) && !defined(__CUDACC__)
|
||||
/// Constructors
|
||||
inline Half::Half(float16_t value) : x(detail::fp16_to_bits(value)) {}
|
||||
inline Half::operator float16_t() const {
|
||||
|
|
@ -65,14 +65,14 @@ inline C10_HOST_DEVICE Half::operator float() const {
|
|||
#elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
return at::vec::half2float_scalar(x);
|
||||
#elif defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||
#elif defined(__aarch64__) && !defined(__CUDACC__)
|
||||
return detail::native_fp16_to_fp32_value(x);
|
||||
#else
|
||||
return detail::fp16_ieee_to_fp32_value(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* !defined(__aarch64__) || defined(C10_MOBILE) || defined(__CUDACC__) \
|
||||
#endif /* !defined(__aarch64__) || defined(__CUDACC__) \
|
||||
*/
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@
|
|||
#include <sycl/sycl.hpp> // for SYCL 2020
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||
#if defined(__aarch64__) && !defined(__CUDACC__)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
|
|
@ -330,7 +330,7 @@ inline uint16_t fp16_ieee_from_fp32_value(float f) {
|
|||
(shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign));
|
||||
}
|
||||
|
||||
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||
#if defined(__aarch64__) && !defined(__CUDACC__)
|
||||
inline float16_t fp16_from_bits(uint16_t h) {
|
||||
return c10::bit_cast<float16_t>(h);
|
||||
}
|
||||
|
|
@ -339,7 +339,7 @@ inline uint16_t fp16_to_bits(float16_t f) {
|
|||
return c10::bit_cast<uint16_t>(f);
|
||||
}
|
||||
|
||||
// According to https://godbolt.org/z/8s14GvEjo it would translate to single
|
||||
// According to https://godbolt.org/z/frExdbsWG it would translate to single
|
||||
// fcvt s0, h0
|
||||
inline float native_fp16_to_fp32_value(uint16_t h) {
|
||||
return static_cast<float>(fp16_from_bits(h));
|
||||
|
|
@ -368,7 +368,7 @@ struct alignas(2) Half {
|
|||
#endif
|
||||
|
||||
constexpr C10_HOST_DEVICE Half(unsigned short bits, from_bits_t) : x(bits) {}
|
||||
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||
#if defined(__aarch64__) && !defined(__CUDACC__)
|
||||
inline Half(float16_t value);
|
||||
inline operator float16_t() const;
|
||||
#else
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user