mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Cleans up type conversions, adds CPU test comparing with NumPy (#35374)
Summary: Per title. Follow-up to https://github.com/pytorch/pytorch/pull/35086. Pull Request resolved: https://github.com/pytorch/pytorch/pull/35374 Differential Revision: D20712443 Pulled By: mruberry fbshipit-source-id: 987089c14bff644fd6a636da5530dc260e1d1a68
This commit is contained in:
parent
1cc4e5c338
commit
21c94606b8
|
|
@ -19,14 +19,14 @@ void add_kernel(TensorIterator& iter, Scalar alpha_scalar) {
|
||||||
using scalar_t = bool;
|
using scalar_t = bool;
|
||||||
auto alpha = alpha_scalar.to<scalar_t>();
|
auto alpha = alpha_scalar.to<scalar_t>();
|
||||||
cpu_kernel(iter,
|
cpu_kernel(iter,
|
||||||
[=](scalar_t a, scalar_t b) -> scalar_t { return a + alpha * b; });
|
[=](scalar_t a, scalar_t b) __ubsan_ignore_undefined__ -> scalar_t { return a + alpha * b; });
|
||||||
} else {
|
} else {
|
||||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16, iter.dtype(), "add_cpu/sub_cpu", [&]() {
|
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16, iter.dtype(), "add_cpu/sub_cpu", [&]() {
|
||||||
auto alpha = alpha_scalar.to<scalar_t>();
|
auto alpha = alpha_scalar.to<scalar_t>();
|
||||||
auto alpha_vec = Vec256<scalar_t>(alpha);
|
auto alpha_vec = Vec256<scalar_t>(alpha);
|
||||||
cpu_kernel_vec(iter,
|
cpu_kernel_vec(iter,
|
||||||
[=](scalar_t a, scalar_t b) -> scalar_t { return a + alpha * b; },
|
[=](scalar_t a, scalar_t b) __ubsan_ignore_undefined__ -> scalar_t { return a + alpha * b; },
|
||||||
[=](Vec256<scalar_t> a, Vec256<scalar_t> b) {
|
[=](Vec256<scalar_t> a, Vec256<scalar_t> b) __ubsan_ignore_undefined__ {
|
||||||
return vec256::fmadd(b, alpha_vec, a);
|
return vec256::fmadd(b, alpha_vec, a);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,10 @@
|
||||||
|
|
||||||
#if defined(__clang__)
|
#if defined(__clang__)
|
||||||
#define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero")))
|
#define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero")))
|
||||||
#define __ubsan_ignore_float_cast_overflow__ __attribute__((no_sanitize("float-cast-overflow")))
|
#define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined")))
|
||||||
#else
|
#else
|
||||||
#define __ubsan_ignore_float_divide_by_zero__
|
#define __ubsan_ignore_float_divide_by_zero__
|
||||||
#define __ubsan_ignore_float_cast_overflow__
|
#define __ubsan_ignore_undefined__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Disable the copy and assignment operator for a class. Note that this will
|
// Disable the copy and assignment operator for a class. Note that this will
|
||||||
|
|
|
||||||
|
|
@ -7,44 +7,6 @@
|
||||||
|
|
||||||
namespace c10 {
|
namespace c10 {
|
||||||
|
|
||||||
// Note [Implicit conversion between signed and unsigned]
|
|
||||||
// C and C++ have a lovely set of implicit conversion rules, where casting
|
|
||||||
// signed integral values to unsigned integral values is always valid
|
|
||||||
// (it basically treats the value as if using modulo arithmetic), however
|
|
||||||
// converting negative floating point values to unsigned integral types
|
|
||||||
// is UB! This means that: (double)-1 -> (int64_t)-1 -> (uint8_t)255 is
|
|
||||||
// guaranteed to look like this, but we have (double)-1 -> (uint8_t)<ANYTHING>
|
|
||||||
// because it's UB. This also makes UBSan really angry.
|
|
||||||
//
|
|
||||||
// I think those rules are stupid and we really shouldn't conform to them.
|
|
||||||
// The structs below ensure that for all unsigned types we use (currently
|
|
||||||
// only uint8_t), we will do an intermediate convertion via int64_t,
|
|
||||||
// to ensure that any negative values are wrapped around correctly.
|
|
||||||
//
|
|
||||||
// Note that conversions from doubles to signed integral types that can't
|
|
||||||
// represent a particular value after truncating the fracitonal part are UB as well,
|
|
||||||
// but fixing them is not as simple as adding an int64_t intermediate, beacuse the
|
|
||||||
// int64_t -> <smaller signed type> conversion is UB for those large values anyway.
|
|
||||||
// I guess in that case we just have to live with that, but it's definitely less
|
|
||||||
// surprising than the thing above.
|
|
||||||
//
|
|
||||||
// For the curious:
|
|
||||||
// https://en.cppreference.com/w/cpp/language/implicit_conversion
|
|
||||||
// The relevant paragraph is "Floating-integral conversions".
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct inter_copy_type {
|
|
||||||
using type = T;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct inter_copy_type<uint8_t> {
|
|
||||||
using type = int64_t;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
using inter_copy_type_t = typename inter_copy_type<T>::type;
|
|
||||||
|
|
||||||
template<typename dest_t, typename src_t>
|
template<typename dest_t, typename src_t>
|
||||||
struct needs_real {
|
struct needs_real {
|
||||||
constexpr static bool value = (is_complex_t<src_t>::value && !is_complex_t<dest_t>::value);
|
constexpr static bool value = (is_complex_t<src_t>::value && !is_complex_t<dest_t>::value);
|
||||||
|
|
@ -64,13 +26,14 @@ struct maybe_real<true, src_t> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Note: deliberately ignores undefined behavior, consistent with NumPy.
|
||||||
|
// PyTorch's type conversions can cause a variety of undefined behavior,
|
||||||
|
// including float to integral overflow and signed to unsigned integer overflow.
|
||||||
template <typename dest_t, typename src_t>
|
template <typename dest_t, typename src_t>
|
||||||
struct static_cast_with_inter_type {
|
struct static_cast_with_inter_type {
|
||||||
C10_HOST_DEVICE __ubsan_ignore_float_cast_overflow__ static inline dest_t apply(src_t src) {
|
C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline dest_t apply(src_t src) {
|
||||||
constexpr bool real = needs_real<dest_t, src_t>::value;
|
constexpr bool real = needs_real<dest_t, src_t>::value;
|
||||||
return static_cast<dest_t>(
|
return static_cast<dest_t>(maybe_real<real, src_t>::apply(src));
|
||||||
static_cast<inter_copy_type_t<dest_t>>(maybe_real<real, src_t>::apply(src)));
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15358,9 +15358,34 @@ scipy_lobpcg | {:10.2e} | {:10.2e} | {:6} | N/A
|
||||||
# NumPy has the same behavior.
|
# NumPy has the same behavior.
|
||||||
@dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
|
@dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
|
||||||
def test_float_to_int_undefined_conversion(self, device, dtype):
|
def test_float_to_int_undefined_conversion(self, device, dtype):
|
||||||
t = torch.tensor((-3.40282e+38, 3.40282e+38), device=device, dtype=torch.float)
|
min = torch.finfo(torch.float).min
|
||||||
|
max = torch.finfo(torch.float).max
|
||||||
|
t = torch.tensor((min, max), device=device, dtype=torch.float)
|
||||||
self.assertEqual(t.to(dtype).dtype, dtype)
|
self.assertEqual(t.to(dtype).dtype, dtype)
|
||||||
|
|
||||||
|
# Note: CUDA will fail this test on most dtypes, often dramatically.
|
||||||
|
@unittest.skipIf(not TEST_NUMPY, "NumPy not found")
|
||||||
|
@onlyCPU
|
||||||
|
@dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
|
||||||
|
def test_float_to_int_conversion_precision(self, device, dtype):
|
||||||
|
min = np.finfo(np.float32).min
|
||||||
|
max = np.finfo(np.float32).max
|
||||||
|
t = torch.tensor((float('-inf'), min, max, float('inf'), float('nan')), device=device, dtype=torch.float)
|
||||||
|
a = np.array((float('-inf'), min, max, float('inf'), float('nan')), dtype=np.float32)
|
||||||
|
|
||||||
|
torch_to_np = {
|
||||||
|
torch.bool : np.bool,
|
||||||
|
torch.uint8 : np.uint8,
|
||||||
|
torch.int8 : np.int8,
|
||||||
|
torch.int16 : np.int16,
|
||||||
|
torch.int32 : np.int32,
|
||||||
|
torch.int64 : np.int64
|
||||||
|
}
|
||||||
|
|
||||||
|
torch_result = t.to(dtype)
|
||||||
|
numpy_result = torch.from_numpy(a.astype(torch_to_np[dtype]))
|
||||||
|
self.assertEqual(torch_result, numpy_result)
|
||||||
|
|
||||||
|
|
||||||
@onlyOnCPUAndCUDA
|
@onlyOnCPUAndCUDA
|
||||||
def test_complex_type_conversions(self, device):
|
def test_complex_type_conversions(self, device):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user