[PyTorch] Half: don't disable direct conversion to/from float on mobile (#130465)

As far as I can tell, `FCVT` (https://developer.arm.com/documentation/ddi0602/2024-06/SIMD-FP-Instructions/FCVT--Floating-point-convert-precision--scalar--?lang=en) is part of the base aarch64 instruction set, so it should work fine on mobile. Differential Revision: [D59589733](https://our.internmc.facebook.com/intern/diff/D59589733/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/130465 Approved by: https://github.com/ezyang, https://github.com/malfet
2025-12-06 12:20:52 +01:00 · 2024-07-10 10:58:11 -07:00 · 2024-07-10 10:58:11 -07:00 · af4da0799c
commit af4da0799c
parent d727e2f2d1
2 changed files with 7 additions and 7 deletions
--- a/c10/util/Half-inl.h
+++ b/c10/util/Half-inl.h
@ -32,7 +32,7 @@ C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")

 namespace c10 {

-#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
+#if defined(__aarch64__) && !defined(__CUDACC__)
 /// Constructors
 inline Half::Half(float16_t value) : x(detail::fp16_to_bits(value)) {}
 inline Half::operator float16_t() const {
@ -65,14 +65,14 @@ inline C10_HOST_DEVICE Half::operator float() const {
 #elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
    !defined(__APPLE__)
  return at::vec::half2float_scalar(x);
-#elif defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
+#elif defined(__aarch64__) && !defined(__CUDACC__)
  return detail::native_fp16_to_fp32_value(x);
 #else
  return detail::fp16_ieee_to_fp32_value(x);
 #endif
 }

-#endif /* !defined(__aarch64__) || defined(C10_MOBILE) || defined(__CUDACC__) \
+#endif /* !defined(__aarch64__) || defined(__CUDACC__) \
        */

 #if defined(__CUDACC__) || defined(__HIPCC__)
--- a/c10/util/Half.h
+++ b/c10/util/Half.h
@ -47,7 +47,7 @@
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif

-#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
+#if defined(__aarch64__) && !defined(__CUDACC__)
 #include <arm_neon.h>
 #endif

@ -330,7 +330,7 @@ inline uint16_t fp16_ieee_from_fp32_value(float f) {
      (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign));
 }

-#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
+#if defined(__aarch64__) && !defined(__CUDACC__)
 inline float16_t fp16_from_bits(uint16_t h) {
  return c10::bit_cast<float16_t>(h);
 }
@ -339,7 +339,7 @@ inline uint16_t fp16_to_bits(float16_t f) {
  return c10::bit_cast<uint16_t>(f);
 }

-// According to https://godbolt.org/z/8s14GvEjo it would translate to single
+// According to https://godbolt.org/z/frExdbsWG it would translate to single
 // fcvt s0, h0
 inline float native_fp16_to_fp32_value(uint16_t h) {
  return static_cast<float>(fp16_from_bits(h));
@ -368,7 +368,7 @@ struct alignas(2) Half {
 #endif

  constexpr C10_HOST_DEVICE Half(unsigned short bits, from_bits_t) : x(bits) {}
-#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
+#if defined(__aarch64__) && !defined(__CUDACC__)
  inline Half(float16_t value);
  inline operator float16_t() const;
 #else