[PyTorch] Half: don't disable direct conversion to/from float on mobile (#130465)

As far as I can tell, `FCVT` (https://developer.arm.com/documentation/ddi0602/2024-06/SIMD-FP-Instructions/FCVT--Floating-point-convert-precision--scalar--?lang=en)
is part of the base aarch64 instruction set, so it should work fine on mobile.

Differential Revision: [D59589733](https://our.internmc.facebook.com/intern/diff/D59589733/)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/130465
Approved by: https://github.com/ezyang, https://github.com/malfet
This commit is contained in:
Scott Wolchok 2024-07-10 10:58:11 -07:00 committed by PyTorch MergeBot
parent d727e2f2d1
commit af4da0799c
2 changed files with 7 additions and 7 deletions

View File

@ -32,7 +32,7 @@ C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
namespace c10 {
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
#if defined(__aarch64__) && !defined(__CUDACC__)
/// Constructors
inline Half::Half(float16_t value) : x(detail::fp16_to_bits(value)) {}
inline Half::operator float16_t() const {
@ -65,14 +65,14 @@ inline C10_HOST_DEVICE Half::operator float() const {
#elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
return at::vec::half2float_scalar(x);
#elif defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
#elif defined(__aarch64__) && !defined(__CUDACC__)
return detail::native_fp16_to_fp32_value(x);
#else
return detail::fp16_ieee_to_fp32_value(x);
#endif
}
#endif /* !defined(__aarch64__) || defined(C10_MOBILE) || defined(__CUDACC__) \
#endif /* !defined(__aarch64__) || defined(__CUDACC__) \
*/
#if defined(__CUDACC__) || defined(__HIPCC__)

View File

@ -47,7 +47,7 @@
#include <sycl/sycl.hpp> // for SYCL 2020
#endif
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
#if defined(__aarch64__) && !defined(__CUDACC__)
#include <arm_neon.h>
#endif
@ -330,7 +330,7 @@ inline uint16_t fp16_ieee_from_fp32_value(float f) {
(shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign));
}
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
#if defined(__aarch64__) && !defined(__CUDACC__)
inline float16_t fp16_from_bits(uint16_t h) {
return c10::bit_cast<float16_t>(h);
}
@ -339,7 +339,7 @@ inline uint16_t fp16_to_bits(float16_t f) {
return c10::bit_cast<uint16_t>(f);
}
// According to https://godbolt.org/z/8s14GvEjo it would translate to single
// According to https://godbolt.org/z/frExdbsWG it would translate to single
// fcvt s0, h0
inline float native_fp16_to_fp32_value(uint16_t h) {
return static_cast<float>(fp16_from_bits(h));
@ -368,7 +368,7 @@ struct alignas(2) Half {
#endif
constexpr C10_HOST_DEVICE Half(unsigned short bits, from_bits_t) : x(bits) {}
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
#if defined(__aarch64__) && !defined(__CUDACC__)
inline Half(float16_t value);
inline operator float16_t() const;
#else