mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Proposal of two float8 variants - e5m2 and e4m3 - based on https://arxiv.org/pdf/2209.05433.pdf Hide all Float8 operator implementations behind `#if !defined(C10_MOBILE)` guard to keep Android build size almost unchanged TODO: - Refactor duplicated code - Cleanup unbalanced pragma pop in dtype utils - Add native implementation on the CUDA size Co-authored-by: Nikita Shulga <nshulga@meta.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/104242 Approved by: https://github.com/albanD
40 lines
778 B
C++
40 lines
778 B
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
|
|
namespace c10::detail {
|
|
|
|
C10_HOST_DEVICE inline float fp32_from_bits(uint32_t w) {
|
|
#if defined(__OPENCL_VERSION__)
|
|
return as_float(w);
|
|
#elif defined(__CUDA_ARCH__)
|
|
return __uint_as_float((unsigned int)w);
|
|
#elif defined(__INTEL_COMPILER)
|
|
return _castu32_f32(w);
|
|
#else
|
|
union {
|
|
uint32_t as_bits;
|
|
float as_value;
|
|
} fp32 = {w};
|
|
return fp32.as_value;
|
|
#endif
|
|
}
|
|
|
|
C10_HOST_DEVICE inline uint32_t fp32_to_bits(float f) {
|
|
#if defined(__OPENCL_VERSION__)
|
|
return as_uint(f);
|
|
#elif defined(__CUDA_ARCH__)
|
|
return (uint32_t)__float_as_uint(f);
|
|
#elif defined(__INTEL_COMPILER)
|
|
return _castf32_u32(f);
|
|
#else
|
|
union {
|
|
float as_value;
|
|
uint32_t as_bits;
|
|
} fp32 = {f};
|
|
return fp32.as_bits;
|
|
#endif
|
|
}
|
|
|
|
} // namespace c10::detail
|