diff --git a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp
index 7cd19bd0a85..a21fbbd0605 100644
--- a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp
+++ b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp
@@ -19,14 +19,14 @@ void add_kernel(TensorIterator& iter, Scalar alpha_scalar) {
       using scalar_t = bool;
       auto alpha = alpha_scalar.to<scalar_t>();
       cpu_kernel(iter,
-        [=](scalar_t a, scalar_t b) -> scalar_t { return a + alpha * b; });
+        [=](scalar_t a, scalar_t b) __ubsan_ignore_undefined__ -> scalar_t { return a + alpha * b; });
   } else {
     AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16, iter.dtype(), "add_cpu/sub_cpu", [&]() {
       auto alpha = alpha_scalar.to<scalar_t>();
       auto alpha_vec = Vec256<scalar_t>(alpha);
       cpu_kernel_vec(iter,
-        [=](scalar_t a, scalar_t b) -> scalar_t { return a + alpha * b; },
-        [=](Vec256<scalar_t> a, Vec256<scalar_t> b) {
+        [=](scalar_t a, scalar_t b) __ubsan_ignore_undefined__ -> scalar_t { return a + alpha * b; },
+        [=](Vec256<scalar_t> a, Vec256<scalar_t> b) __ubsan_ignore_undefined__ {
           return vec256::fmadd(b, alpha_vec, a);
         });
       });
diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h
index 0258d100b6d..afd619221ec 100644
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@@ -25,10 +25,10 @@
 
 #if defined(__clang__)
   #define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero")))
-  #define __ubsan_ignore_float_cast_overflow__ __attribute__((no_sanitize("float-cast-overflow")))
+  #define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined")))
 #else
   #define __ubsan_ignore_float_divide_by_zero__
-  #define __ubsan_ignore_float_cast_overflow__
+  #define __ubsan_ignore_undefined__
 #endif
 
 // Disable the copy and assignment operator for a class. Note that this will
diff --git a/c10/util/TypeCast.h b/c10/util/TypeCast.h
index 44d6562e1d3..f31fe5a6620 100644
--- a/c10/util/TypeCast.h
+++ b/c10/util/TypeCast.h
@@ -7,44 +7,6 @@
 
 namespace c10 {
 
-// Note [Implicit conversion between signed and unsigned]
-// C and C++ have a lovely set of implicit conversion rules, where casting
-// signed integral values to unsigned integral values is always valid
-// (it basically treats the value as if using modulo arithmetic), however
-// converting negative floating point values to unsigned integral types
-// is UB! This means that: (double)-1 -> (int64_t)-1 -> (uint8_t)255 is
-// guaranteed to look like this, but we have (double)-1 -> (uint8_t)<ANYTHING>
-// because it's UB. This also makes UBSan really angry.
-//
-// I think those rules are stupid and we really shouldn't conform to them.
-// The structs below ensure that for all unsigned types we use (currently
-// only uint8_t), we will do an intermediate convertion via int64_t,
-// to ensure that any negative values are wrapped around correctly.
-//
-// Note that conversions from doubles to signed integral types that can't
-// represent a particular value after truncating the fracitonal part are UB as well,
-// but fixing them is not as simple as adding an int64_t intermediate, beacuse the
-// int64_t -> <smaller signed type> conversion is UB for those large values anyway.
-// I guess in that case we just have to live with that, but it's definitely less
-// surprising than the thing above.
-//
-// For the curious:
-//   https://en.cppreference.com/w/cpp/language/implicit_conversion
-//   The relevant paragraph is "Floating-integral conversions".
-
-template <typename T>
-struct inter_copy_type {
-  using type = T;
-};
-
-template <>
-struct inter_copy_type<uint8_t> {
-  using type = int64_t;
-};
-
-template <typename T>
-using inter_copy_type_t = typename inter_copy_type<T>::type;
-
 template<typename dest_t, typename src_t>
 struct needs_real {
   constexpr static bool value = (is_complex_t<src_t>::value && !is_complex_t<dest_t>::value);
@@ -64,13 +26,14 @@ struct maybe_real<true, src_t> {
   }
 };
 
-
+// Note: deliberately ignores undefined behavior, consistent with NumPy.
+// PyTorch's type conversions can cause a variety of undefined behavior,
+// including float to integral overflow and signed to unsigned integer overflow.
 template <typename dest_t, typename src_t>
 struct static_cast_with_inter_type {
-  C10_HOST_DEVICE __ubsan_ignore_float_cast_overflow__ static inline dest_t apply(src_t src) {
+  C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline dest_t apply(src_t src) {
     constexpr bool real = needs_real<dest_t, src_t>::value;
-    return static_cast<dest_t>(
-      static_cast<inter_copy_type_t<dest_t>>(maybe_real<real, src_t>::apply(src)));
+    return static_cast<dest_t>(maybe_real<real, src_t>::apply(src));
   }
 };
 
diff --git a/test/test_torch.py b/test/test_torch.py
index 1e55e620d5e..fcf287d55b5 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -15358,9 +15358,34 @@ scipy_lobpcg  | {:10.2e}  | {:10.2e}  | {:6} | N/A
     # NumPy has the same behavior.
     @dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
     def test_float_to_int_undefined_conversion(self, device, dtype):
-        t = torch.tensor((-3.40282e+38, 3.40282e+38), device=device, dtype=torch.float)
+        min = torch.finfo(torch.float).min
+        max = torch.finfo(torch.float).max
+        t = torch.tensor((min, max), device=device, dtype=torch.float)
         self.assertEqual(t.to(dtype).dtype, dtype)
 
+    # Note: CUDA will fail this test on most dtypes, often dramatically.
+    @unittest.skipIf(not TEST_NUMPY, "NumPy not found")
+    @onlyCPU
+    @dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
+    def test_float_to_int_conversion_precision(self, device, dtype):
+        min = np.finfo(np.float32).min
+        max = np.finfo(np.float32).max
+        t = torch.tensor((float('-inf'), min, max, float('inf'), float('nan')), device=device, dtype=torch.float)
+        a = np.array((float('-inf'), min, max, float('inf'), float('nan')), dtype=np.float32)
+
+        torch_to_np = {
+            torch.bool  : np.bool,
+            torch.uint8 : np.uint8,
+            torch.int8  : np.int8,
+            torch.int16 : np.int16,
+            torch.int32 : np.int32,
+            torch.int64 : np.int64
+        }
+
+        torch_result = t.to(dtype)
+        numpy_result = torch.from_numpy(a.astype(torch_to_np[dtype]))
+        self.assertEqual(torch_result, numpy_result)
+
 
     @onlyOnCPUAndCUDA
     def test_complex_type_conversions(self, device):