mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
This PR relates to the feature in [this feature submission](https://docs.google.com/document/d/1pF2T1xz54IPg1jG7FhykbrpbcJZVelQw0v8vBaoLkfs/edit). It has been based on #104242 which adds similar float8 types. These new types added in this PR are described in the paper at https://arxiv.org/abs/2206.02915. A brief description and comparison of the types with other float8 types can be also found in the [OpenXLA RFC](https://github.com/openxla/stablehlo/blob/main/rfcs/20230321-fp8_fnuz.md). Pull Request resolved: https://github.com/pytorch/pytorch/pull/107586 Approved by: https://github.com/seemethere, https://github.com/malfet
228 lines
5.7 KiB
C++
228 lines
5.7 KiB
C++
#pragma once
|
|
|
|
#include <c10/util/BFloat16.h>
|
|
#include <c10/util/Float8_e4m3fn.h>
|
|
#include <c10/util/Float8_e4m3fnuz.h>
|
|
#include <c10/util/Float8_e5m2.h>
|
|
#include <c10/util/Float8_e5m2fnuz.h>
|
|
#include <c10/util/Half.h>
|
|
#include <torch/csrc/Export.h>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
#ifdef __FreeBSD__
|
|
#include <sys/endian.h>
|
|
#include <sys/types.h>
|
|
#define thp_bswap16(x) bswap16(x)
|
|
#define thp_bswap32(x) bswap32(x)
|
|
#define thp_bswap64(x) bswap64(x)
|
|
#elif defined(__APPLE__)
|
|
#include <libkern/OSByteOrder.h>
|
|
#define thp_bswap16(x) OSSwapInt16(x)
|
|
#define thp_bswap32(x) OSSwapInt32(x)
|
|
#define thp_bswap64(x) OSSwapInt64(x)
|
|
#elif defined(__GNUC__) && !defined(__MINGW32__)
|
|
#include <byteswap.h>
|
|
#define thp_bswap16(x) bswap_16(x)
|
|
#define thp_bswap32(x) bswap_32(x)
|
|
#define thp_bswap64(x) bswap_64(x)
|
|
#elif defined _WIN32 || defined _WIN64
|
|
#define thp_bswap16(x) _byteswap_ushort(x)
|
|
#define thp_bswap32(x) _byteswap_ulong(x)
|
|
#define thp_bswap64(x) _byteswap_uint64(x)
|
|
#endif
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
#define to_be16(x) thp_bswap16(x)
|
|
#define from_be16(x) thp_bswap16(x)
|
|
#define to_be32(x) thp_bswap32(x)
|
|
#define from_be32(x) thp_bswap32(x)
|
|
#define to_be64(x) thp_bswap64(x)
|
|
#define from_be64(x) thp_bswap64(x)
|
|
#define to_le16(x) (x)
|
|
#define from_le16(x) (x)
|
|
#define to_le32(x) (x)
|
|
#define from_le32(x) (x)
|
|
#define to_le64(x) (x)
|
|
#define from_le64(x) (x)
|
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
#define to_be16(x) (x)
|
|
#define from_be16(x) (x)
|
|
#define to_be32(x) (x)
|
|
#define from_be32(x) (x)
|
|
#define to_be64(x) (x)
|
|
#define from_be64(x) (x)
|
|
#define to_le16(x) thp_bswap16(x)
|
|
#define from_le16(x) thp_bswap16(x)
|
|
#define to_le32(x) thp_bswap32(x)
|
|
#define from_le32(x) thp_bswap32(x)
|
|
#define to_le64(x) thp_bswap64(x)
|
|
#define from_le64(x) thp_bswap64(x)
|
|
#else
|
|
#error Unexpected or undefined __BYTE_ORDER__
|
|
#endif
|
|
|
|
namespace torch {
|
|
namespace utils {
|
|
|
|
enum THPByteOrder { THP_LITTLE_ENDIAN = 0, THP_BIG_ENDIAN = 1 };
|
|
|
|
TORCH_API THPByteOrder THP_nativeByteOrder();
|
|
|
|
TORCH_API void THP_decodeInt16Buffer(
|
|
int16_t* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeInt32Buffer(
|
|
int32_t* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeInt64Buffer(
|
|
int64_t* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeHalfBuffer(
|
|
c10::Half* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeFloatBuffer(
|
|
float* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeDoubleBuffer(
|
|
double* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeBoolBuffer(
|
|
bool* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeBFloat16Buffer(
|
|
at::BFloat16* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeComplexFloatBuffer(
|
|
c10::complex<float>* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
TORCH_API void THP_decodeComplexDoubleBuffer(
|
|
c10::complex<double>* dst,
|
|
const uint8_t* src,
|
|
bool do_byte_swap,
|
|
size_t len);
|
|
|
|
TORCH_API void THP_decodeInt16Buffer(
|
|
int16_t* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeInt32Buffer(
|
|
int32_t* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeInt64Buffer(
|
|
int64_t* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeHalfBuffer(
|
|
c10::Half* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeFloatBuffer(
|
|
float* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeDoubleBuffer(
|
|
double* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeBoolBuffer(
|
|
bool* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeBFloat16Buffer(
|
|
at::BFloat16* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeFloat8_e5m2Buffer(
|
|
at::Float8_e5m2* dst,
|
|
const uint8_t* src,
|
|
size_t len);
|
|
TORCH_API void THP_decodeFloat8_e4m3fnBuffer(
|
|
at::Float8_e4m3fn* dst,
|
|
const uint8_t* src,
|
|
size_t len);
|
|
TORCH_API void THP_decodeFloat8_e5m2fnuzBuffer(
|
|
at::Float8_e5m2fnuz* dst,
|
|
const uint8_t* src,
|
|
size_t len);
|
|
TORCH_API void THP_decodeFloat8_e4m3fnuzBuffer(
|
|
at::Float8_e4m3fnuz* dst,
|
|
const uint8_t* src,
|
|
size_t len);
|
|
TORCH_API void THP_decodeComplexFloatBuffer(
|
|
c10::complex<float>* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_decodeComplexDoubleBuffer(
|
|
c10::complex<double>* dst,
|
|
const uint8_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
|
|
TORCH_API void THP_encodeInt16Buffer(
|
|
uint8_t* dst,
|
|
const int16_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_encodeInt32Buffer(
|
|
uint8_t* dst,
|
|
const int32_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_encodeInt64Buffer(
|
|
uint8_t* dst,
|
|
const int64_t* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_encodeFloatBuffer(
|
|
uint8_t* dst,
|
|
const float* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_encodeDoubleBuffer(
|
|
uint8_t* dst,
|
|
const double* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_encodeComplexFloatBuffer(
|
|
uint8_t* dst,
|
|
const c10::complex<float>* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
TORCH_API void THP_encodeComplexDoubleBuffer(
|
|
uint8_t* dst,
|
|
const c10::complex<double>* src,
|
|
THPByteOrder order,
|
|
size_t len);
|
|
|
|
} // namespace utils
|
|
} // namespace torch
|