diff --git a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp index 7cd19bd0a85..a21fbbd0605 100644 --- a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp @@ -19,14 +19,14 @@ void add_kernel(TensorIterator& iter, Scalar alpha_scalar) { using scalar_t = bool; auto alpha = alpha_scalar.to(); cpu_kernel(iter, - [=](scalar_t a, scalar_t b) -> scalar_t { return a + alpha * b; }); + [=](scalar_t a, scalar_t b) __ubsan_ignore_undefined__ -> scalar_t { return a + alpha * b; }); } else { AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16, iter.dtype(), "add_cpu/sub_cpu", [&]() { auto alpha = alpha_scalar.to(); auto alpha_vec = Vec256(alpha); cpu_kernel_vec(iter, - [=](scalar_t a, scalar_t b) -> scalar_t { return a + alpha * b; }, - [=](Vec256 a, Vec256 b) { + [=](scalar_t a, scalar_t b) __ubsan_ignore_undefined__ -> scalar_t { return a + alpha * b; }, + [=](Vec256 a, Vec256 b) __ubsan_ignore_undefined__ { return vec256::fmadd(b, alpha_vec, a); }); }); diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h index 0258d100b6d..afd619221ec 100644 --- a/c10/macros/Macros.h +++ b/c10/macros/Macros.h @@ -25,10 +25,10 @@ #if defined(__clang__) #define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero"))) - #define __ubsan_ignore_float_cast_overflow__ __attribute__((no_sanitize("float-cast-overflow"))) + #define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined"))) #else #define __ubsan_ignore_float_divide_by_zero__ - #define __ubsan_ignore_float_cast_overflow__ + #define __ubsan_ignore_undefined__ #endif // Disable the copy and assignment operator for a class. Note that this will diff --git a/c10/util/TypeCast.h b/c10/util/TypeCast.h index 44d6562e1d3..f31fe5a6620 100644 --- a/c10/util/TypeCast.h +++ b/c10/util/TypeCast.h @@ -7,44 +7,6 @@ namespace c10 { -// Note [Implicit conversion between signed and unsigned] -// C and C++ have a lovely set of implicit conversion rules, where casting -// signed integral values to unsigned integral values is always valid -// (it basically treats the value as if using modulo arithmetic), however -// converting negative floating point values to unsigned integral types -// is UB! This means that: (double)-1 -> (int64_t)-1 -> (uint8_t)255 is -// guaranteed to look like this, but we have (double)-1 -> (uint8_t) -// because it's UB. This also makes UBSan really angry. -// -// I think those rules are stupid and we really shouldn't conform to them. -// The structs below ensure that for all unsigned types we use (currently -// only uint8_t), we will do an intermediate convertion via int64_t, -// to ensure that any negative values are wrapped around correctly. -// -// Note that conversions from doubles to signed integral types that can't -// represent a particular value after truncating the fracitonal part are UB as well, -// but fixing them is not as simple as adding an int64_t intermediate, beacuse the -// int64_t -> conversion is UB for those large values anyway. -// I guess in that case we just have to live with that, but it's definitely less -// surprising than the thing above. -// -// For the curious: -// https://en.cppreference.com/w/cpp/language/implicit_conversion -// The relevant paragraph is "Floating-integral conversions". - -template -struct inter_copy_type { - using type = T; -}; - -template <> -struct inter_copy_type { - using type = int64_t; -}; - -template -using inter_copy_type_t = typename inter_copy_type::type; - template struct needs_real { constexpr static bool value = (is_complex_t::value && !is_complex_t::value); @@ -64,13 +26,14 @@ struct maybe_real { } }; - +// Note: deliberately ignores undefined behavior, consistent with NumPy. +// PyTorch's type conversions can cause a variety of undefined behavior, +// including float to integral overflow and signed to unsigned integer overflow. template struct static_cast_with_inter_type { - C10_HOST_DEVICE __ubsan_ignore_float_cast_overflow__ static inline dest_t apply(src_t src) { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline dest_t apply(src_t src) { constexpr bool real = needs_real::value; - return static_cast( - static_cast>(maybe_real::apply(src))); + return static_cast(maybe_real::apply(src)); } }; diff --git a/test/test_torch.py b/test/test_torch.py index 1e55e620d5e..fcf287d55b5 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -15358,9 +15358,34 @@ scipy_lobpcg | {:10.2e} | {:10.2e} | {:6} | N/A # NumPy has the same behavior. @dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64) def test_float_to_int_undefined_conversion(self, device, dtype): - t = torch.tensor((-3.40282e+38, 3.40282e+38), device=device, dtype=torch.float) + min = torch.finfo(torch.float).min + max = torch.finfo(torch.float).max + t = torch.tensor((min, max), device=device, dtype=torch.float) self.assertEqual(t.to(dtype).dtype, dtype) + # Note: CUDA will fail this test on most dtypes, often dramatically. + @unittest.skipIf(not TEST_NUMPY, "NumPy not found") + @onlyCPU + @dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64) + def test_float_to_int_conversion_precision(self, device, dtype): + min = np.finfo(np.float32).min + max = np.finfo(np.float32).max + t = torch.tensor((float('-inf'), min, max, float('inf'), float('nan')), device=device, dtype=torch.float) + a = np.array((float('-inf'), min, max, float('inf'), float('nan')), dtype=np.float32) + + torch_to_np = { + torch.bool : np.bool, + torch.uint8 : np.uint8, + torch.int8 : np.int8, + torch.int16 : np.int16, + torch.int32 : np.int32, + torch.int64 : np.int64 + } + + torch_result = t.to(dtype) + numpy_result = torch.from_numpy(a.astype(torch_to_np[dtype])) + self.assertEqual(torch_result, numpy_result) + @onlyOnCPUAndCUDA def test_complex_type_conversions(self, device):