pytorch/test/inductor/test_deterministic.py

111 lines
3.5 KiB
Python

# Owner(s): ["module: inductor"]
import contextlib
import unittest
import torch
import torch._inductor.config as inductor_config
from torch._dynamo.utils import counters
from torch._inductor.test_case import run_tests, TestCase
from torch._inductor.utils import fresh_cache
from torch.testing._internal.common_utils import (
instantiate_parametrized_tests,
parametrize,
)
from torch.testing._internal.inductor_utils import (
GPU_TYPE,
HAS_CUDA_AND_TRITON,
IS_BIG_GPU,
)
@instantiate_parametrized_tests
class DeterministicTest(TestCase):
def setUp(self) -> None:
super().setUp()
self._exit_stack = contextlib.ExitStack()
self._exit_stack.enter_context(fresh_cache())
def tearDown(self) -> None:
self._exit_stack.close()
super().tearDown()
def test_use_deterministic_algorithsm(self):
old_val = torch.are_deterministic_algorithms_enabled()
try:
for new_val in [True, False, True]:
torch.use_deterministic_algorithms(new_val, warn_only=True)
self.assertEqual(inductor_config.deterministic, new_val)
finally:
torch.use_deterministic_algorithms(old_val, warn_only=True)
@parametrize("deterministic", [False, True])
def test_mm_padding(self, deterministic):
with inductor_config.patch(deterministic=deterministic):
@torch.compile()
def foo(x, y):
return x @ y
inps = [torch.rand([2049, 2049], device=GPU_TYPE) for _ in range(2)]
out = foo(*inps)
self.assertEqual(out, inps[0] @ inps[1])
if deterministic:
self.assertTrue(counters["inductor"]["pad_mm_bench"] == 0)
else:
self.assertTrue(counters["inductor"]["pad_mm_bench"] > 0)
@parametrize("deterministic", [False, True])
@inductor_config.patch(max_autotune=True)
@unittest.skipIf(not IS_BIG_GPU, "templates require big gpu")
def test_max_autotune(self, deterministic):
with inductor_config.patch(deterministic=deterministic):
@torch.compile()
def foo(x, y):
return x @ y
inps = [torch.rand([2048, 2048], device=GPU_TYPE) for _ in range(2)]
out = foo(*inps)
self.assertEqual(out, inps[0] @ inps[1])
if deterministic:
self.assertTrue(counters["inductor"]["select_algorithm_autotune"] == 0)
else:
self.assertTrue(counters["inductor"]["select_algorithm_autotune"] > 0)
def test_pointwise_coordesc_tuning(self):
@torch.compile(mode="max-autotune")
def f(x):
return x + 1
x = torch.randn(2048, device=GPU_TYPE)
self.assertEqual(f(x), x + 1)
self.assertTrue(counters["inductor"]["coordesc_tuning_bench"] > 0)
@parametrize("deterministic", [False, True])
def test_reduction_coordesc_tuning(self, deterministic):
with inductor_config.patch(
deterministic=deterministic, coordinate_descent_tuning=True
):
@torch.compile()
def foo(x):
return x.sum(dim=-1)
inp = torch.rand([2048, 2048], device=GPU_TYPE)
out = foo(inp)
self.assertEqual(out, inp.sum(dim=-1))
if deterministic:
self.assertTrue(counters["inductor"]["coordesc_tuning_bench"] == 0)
else:
self.assertTrue(counters["inductor"]["coordesc_tuning_bench"] > 0)
if __name__ == "__main__":
if HAS_CUDA_AND_TRITON:
run_tests()