mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "[Inductor] support masked vectorization for the tail_loop for float64 datatype (#163316)"
This reverts commite9d8973427. Reverted https://github.com/pytorch/pytorch/pull/163316 on behalf of https://github.com/clee2000 due to seems to have broken some no_gpu tests? test/inductor/test_cpu_repro.py::CPUReproTests::test_double_reduction_vec [GH job link](https://github.com/pytorch/pytorch/actions/runs/18689033019/job/53290772740) [HUD commit link](e9d8973427) ([comment](https://github.com/pytorch/pytorch/pull/163316#issuecomment-3428210509))
This commit is contained in:
parent
78bf6186f2
commit
6c4412f72b
|
|
@ -4810,22 +4810,6 @@ class CPUReproTests(TestCase):
|
|||
self.common(fn, (x,))
|
||||
check_metrics_vec_kernel_count(1)
|
||||
|
||||
# Tail vectorization case
|
||||
x = torch.randn((22, 22), dtype=torch.double)
|
||||
torch._dynamo.reset()
|
||||
metrics.reset()
|
||||
with torch.no_grad():
|
||||
expected = fn(x)
|
||||
compiled_fn = torch.compile(fn)
|
||||
actual, code = run_and_get_cpp_code(compiled_fn, x)
|
||||
self.assertEqual(expected, actual)
|
||||
# 1 generated vec kernel
|
||||
self.assertEqual(metrics.generated_cpp_vec_kernel_count, 1)
|
||||
# Check that both main and tail loops are vectorized
|
||||
FileCheck().check_count(
|
||||
"at::vec::VectorizedN<double,2>::loadu", 2, exactly=True
|
||||
).run(code)
|
||||
|
||||
def test_double_reduction_vec(self):
|
||||
def fn(x):
|
||||
return x.sum(dim=1)
|
||||
|
|
@ -4835,22 +4819,6 @@ class CPUReproTests(TestCase):
|
|||
self.common(fn, (x,))
|
||||
check_metrics_vec_kernel_count(1)
|
||||
|
||||
# Tail vectorization case
|
||||
x = torch.randn((22, 22), dtype=torch.double)
|
||||
torch._dynamo.reset()
|
||||
metrics.reset()
|
||||
with torch.no_grad():
|
||||
expected = fn(x)
|
||||
compiled_fn = torch.compile(fn)
|
||||
actual, code = run_and_get_cpp_code(compiled_fn, x)
|
||||
self.assertEqual(expected, actual)
|
||||
# 1 generated vec kernel
|
||||
self.assertEqual(metrics.generated_cpp_vec_kernel_count, 1)
|
||||
# Check that both main and tail loops are vectorized
|
||||
FileCheck().check_count(
|
||||
"at::vec::VectorizedN<double,2>::loadu", 2, exactly=True
|
||||
).run(code)
|
||||
|
||||
def test_convert_fp32_to_double_vec(self):
|
||||
def fn(x):
|
||||
return x.to(torch.double)
|
||||
|
|
@ -4860,22 +4828,6 @@ class CPUReproTests(TestCase):
|
|||
self.common(fn, (x,))
|
||||
check_metrics_vec_kernel_count(1)
|
||||
|
||||
# Tail vectorization case
|
||||
x = torch.randn(22, 22)
|
||||
torch._dynamo.reset()
|
||||
metrics.reset()
|
||||
with torch.no_grad():
|
||||
expected = fn(x)
|
||||
compiled_fn = torch.compile(fn)
|
||||
actual, code = run_and_get_cpp_code(compiled_fn, x)
|
||||
self.assertEqual(expected, actual)
|
||||
# 1 generated vec kernel
|
||||
self.assertEqual(metrics.generated_cpp_vec_kernel_count, 1)
|
||||
# Check that both main and tail loops are vectorized
|
||||
FileCheck().check_count(
|
||||
"at::vec::convert<double,2,float,1>", 2, exactly=True
|
||||
).run(code)
|
||||
|
||||
def test_convert_double_to_fp32_vec(self):
|
||||
def fn(x):
|
||||
return x.to(torch.float32)
|
||||
|
|
@ -4885,22 +4837,6 @@ class CPUReproTests(TestCase):
|
|||
self.common(fn, (x,))
|
||||
check_metrics_vec_kernel_count(1)
|
||||
|
||||
# Tail vectorization case
|
||||
x = torch.randn((22, 22), dtype=torch.double)
|
||||
torch._dynamo.reset()
|
||||
metrics.reset()
|
||||
with torch.no_grad():
|
||||
expected = fn(x)
|
||||
compiled_fn = torch.compile(fn)
|
||||
actual, code = run_and_get_cpp_code(compiled_fn, x)
|
||||
self.assertEqual(expected, actual)
|
||||
# 1 generated vec kernel
|
||||
self.assertEqual(metrics.generated_cpp_vec_kernel_count, 1)
|
||||
# Check that both main and tail loops are vectorized
|
||||
FileCheck().check_count(
|
||||
"at::vec::convert<float,1,double,2>", 2, exactly=True
|
||||
).run(code)
|
||||
|
||||
def test_no_redundant_to_dtypes_between_fused_scheduler_node(self):
|
||||
# https://github.com/pytorch/pytorch/issues/115260
|
||||
p0 = torch.tensor([1.0879], dtype=torch.float16)
|
||||
|
|
|
|||
|
|
@ -159,7 +159,6 @@ VECTORIZABLE_DTYPES: list[torch.dtype] = [
|
|||
]
|
||||
|
||||
MASKED_VECTORIZABLE_DTYPES: list[torch.dtype] = [
|
||||
torch.float64,
|
||||
torch.float,
|
||||
torch.bfloat16,
|
||||
torch.float16,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user