[CUDA] xFail max-autotune grouped gemm tests on devices with insufficient SM count (#165921)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165921
Approved by: https://github.com/ngimel
This commit is contained in:
Eddie Yan 2025-10-30 20:05:03 +00:00 committed by PyTorch MergeBot
parent 52db60170d
commit f911d64750

View File

@ -49,6 +49,8 @@ from torch.testing._internal.common_utils import (
decorateIf, decorateIf,
) )
from torch.testing._internal.inductor_utils import IS_BIG_GPU
from torch._inductor.test_case import TestCase as InductorTestCase from torch._inductor.test_case import TestCase as InductorTestCase
_IS_SM8X = False _IS_SM8X = False
@ -619,8 +621,12 @@ class TestMatmulCuda(InductorTestCase):
raise AssertionError(f"Invalid op: {op}") raise AssertionError(f"Invalid op: {op}")
C_ref = f_ref(A, B.transpose(-2, -1), offs=offs) C_ref = f_ref(A, B.transpose(-2, -1), offs=offs)
C = f(A, B.transpose(-2, -1), offs=offs) if not IS_BIG_GPU and max_autotune:
torch.testing.assert_close(C, C_ref) with self.assertRaisesRegex(torch._inductor.exc.InductorError, "NoValidChoicesError"):
C = f(A, B.transpose(-2, -1), offs=offs)
else:
C = f(A, B.transpose(-2, -1), offs=offs)
self.assertEqual(C, C_ref)
@onlyCUDA @onlyCUDA