[CUDA] xFail max-autotune grouped gemm tests on devices with insufficient SM count (#165921)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165921 Approved by: https://github.com/ngimel
2025-12-06 00:20:18 +01:00 · 2025-10-30 20:05:03 +00:00 · 2025-10-30 20:05:03 +00:00 · f911d64750
commit f911d64750
parent 52db60170d
1 changed files with 8 additions and 2 deletions
--- a/test/test_matmul_cuda.py
+++ b/test/test_matmul_cuda.py
@ -49,6 +49,8 @@ from torch.testing._internal.common_utils import (
    decorateIf,
 )

+from torch.testing._internal.inductor_utils import IS_BIG_GPU
+
 from torch._inductor.test_case import TestCase as InductorTestCase

 _IS_SM8X = False
@ -619,8 +621,12 @@ class TestMatmulCuda(InductorTestCase):
            raise AssertionError(f"Invalid op: {op}")

        C_ref = f_ref(A, B.transpose(-2, -1), offs=offs)
-        C = f(A, B.transpose(-2, -1), offs=offs)
-        torch.testing.assert_close(C, C_ref)
+        if not IS_BIG_GPU and max_autotune:
+            with self.assertRaisesRegex(torch._inductor.exc.InductorError, "NoValidChoicesError"):
+                C = f(A, B.transpose(-2, -1), offs=offs)
+        else:
+            C = f(A, B.transpose(-2, -1), offs=offs)
+            self.assertEqual(C, C_ref)


    @onlyCUDA