mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
[ROCm][CI] remove relaxed tolerance for tf32 tests (#166478)
Instead of relaxing tolerances for certain unit tests that exercise TF32 on MI300, skip the tests until hipblaslt accuracy is improved. Pull Request resolved: https://github.com/pytorch/pytorch/pull/166478 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <jeff.daily@amd.com> Co-authored-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com>
This commit is contained in:
parent
1e3600b528
commit
c3b71d5499
|
|
@ -47,9 +47,11 @@ from torch.testing._internal.common_utils import (
|
|||
gradgradcheck,
|
||||
instantiate_parametrized_tests,
|
||||
MACOS_VERSION,
|
||||
MI300_ARCH,
|
||||
parametrize as parametrize_test,
|
||||
run_tests,
|
||||
set_default_dtype,
|
||||
skipIfRocmArch,
|
||||
subtest,
|
||||
TEST_SCIPY,
|
||||
TEST_WITH_ROCM,
|
||||
|
|
@ -3393,8 +3395,9 @@ class TestConvolutionNNDeviceType(NNTestCase):
|
|||
F.conv_transpose2d(x, torch.randn(16, 1, 1, 1, device=device))
|
||||
F.conv2d(x, torch.randn(1, 16, 1, 1, device=device))
|
||||
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@onlyCUDA
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@tf32_on_and_off(0.005)
|
||||
def test_Conv2d_size_1_kernel(self, device):
|
||||
x_cpu = torch.randn(2, 3, 5, 5)
|
||||
conv_cpu = torch.nn.Conv2d(3, 3, kernel_size=1)
|
||||
|
|
@ -3425,8 +3428,9 @@ class TestConvolutionNNDeviceType(NNTestCase):
|
|||
exact_device=False,
|
||||
)
|
||||
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@onlyCUDA
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@tf32_on_and_off(0.005)
|
||||
def test_ConvTranspose2d_size_1_kernel(self, device):
|
||||
x_cpu = torch.randn(2, 3, 5, 5)
|
||||
conv_cpu = torch.nn.ConvTranspose2d(3, 3, kernel_size=1)
|
||||
|
|
|
|||
|
|
@ -6967,7 +6967,8 @@ class TestCompileKernel(TestCase):
|
|||
with self.assertRaises(RuntimeError):
|
||||
kernel.set_shared_memory_config(excessive_shared_mem)
|
||||
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@tf32_on_and_off(0.005)
|
||||
@unittest.skipIf(not TEST_CUDA, "No CUDA")
|
||||
def test_compile_kernel_advanced(self):
|
||||
# Test matrix multiplication
|
||||
|
|
|
|||
|
|
@ -755,10 +755,11 @@ class TestLinalg(TestCase):
|
|||
cholesky_test_helper(3, batchsize, upper)
|
||||
|
||||
@precisionOverride({torch.float32: 1e-4, torch.complex64: 1e-4})
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@skipCUDAIfNoMagma
|
||||
@skipCPUIfNoLapack
|
||||
@dtypes(*floating_and_complex_types())
|
||||
@tf32_on_and_off(0.1 if TEST_WITH_ROCM else 0.01)
|
||||
@tf32_on_and_off(0.01)
|
||||
@reduced_f32_on_and_off(0.01)
|
||||
def test_old_cholesky(self, device, dtype):
|
||||
from torch.testing._internal.common_utils import random_hermitian_pd_matrix
|
||||
|
|
@ -7410,9 +7411,10 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:
|
|||
def test_addmm_gelu(self, device, dtype):
|
||||
self._test_addmm_impl(torch._addmm_activation, "gelu", device, dtype)
|
||||
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@dtypes(torch.float, torch.double)
|
||||
@dtypesIfCUDA(*floating_and_complex_types())
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@tf32_on_and_off(0.005)
|
||||
@reduced_f32_on_and_off(0.005)
|
||||
def test_addmm_sizes(self, device, dtype):
|
||||
for m in [0, 1, 25]:
|
||||
|
|
@ -9369,8 +9371,8 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:
|
|||
r1 = fntorch(t0_full, t1, t2)
|
||||
self.assertEqual(r0, r1)
|
||||
|
||||
# ROCm 6.4 passes with tf32=on, but 6.4.1 needed tolerance reduced slightly
|
||||
@tf32_on_and_off(0.002 if torch.version.hip else 0.001)
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@tf32_on_and_off(0.001)
|
||||
@reduced_f32_on_and_off(0.001)
|
||||
def test_broadcast_batched_matmul(self, device):
|
||||
n_dim = random.randint(1, 8)
|
||||
|
|
@ -9707,7 +9709,8 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:
|
|||
self.assertEqual((torch.tensor(1., device=device), torch.tensor(0., device=device)),
|
||||
fn(torch.slogdet, (0, 0)))
|
||||
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@tf32_on_and_off(0.005)
|
||||
@reduced_f32_on_and_off(0.07, 0.005)
|
||||
def test_tensordot(self, device):
|
||||
a = torch.arange(60., device=device).reshape(3, 4, 5)
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ from torch.nn import Buffer, Parameter
|
|||
from torch.nn.parallel._functions import Broadcast
|
||||
from torch.testing._internal.common_dtype import integral_types, get_all_math_dtypes, floating_types
|
||||
from torch.testing._internal.common_utils import dtype_name, freeze_rng_state, run_tests, TestCase, \
|
||||
skipIfNoLapack, skipIfRocm, \
|
||||
skipIfNoLapack, skipIfRocm, MI300_ARCH, skipIfRocmArch, \
|
||||
TEST_NUMPY, TEST_SCIPY, TEST_WITH_CROSSREF, TEST_WITH_ROCM, \
|
||||
download_file, get_function_arglist, load_tests, skipIfMPS, \
|
||||
IS_PPC, \
|
||||
|
|
@ -8378,8 +8378,9 @@ class TestNNDeviceType(NNTestCase):
|
|||
|
||||
@unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
|
||||
"Scipy v1.0 and/or numpy not found")
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@expectedFailureMPS # Unsupported Border padding mode https://github.com/pytorch/pytorch/issues/125098
|
||||
@tf32_on_and_off(0.01 if TEST_WITH_ROCM else 0.001)
|
||||
@tf32_on_and_off(0.001)
|
||||
@reduced_f32_on_and_off(0.001)
|
||||
def test_affine_2d_rotate90(self, device):
|
||||
# scipy before 1.0.0 do not support homogeneous coordinate
|
||||
|
|
@ -8526,8 +8527,9 @@ class TestNNDeviceType(NNTestCase):
|
|||
|
||||
@unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
|
||||
"Scipy v1.0 and/or numpy not found")
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@expectedFailureMPS # Unsupported Border padding mode https://github.com/pytorch/pytorch/issues/125098
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@tf32_on_and_off(0.005)
|
||||
@reduced_f32_on_and_off(0.005)
|
||||
def test_affine_2d_rotateRandom(self, device):
|
||||
# scipy before 1.0.0 do not support homogeneous coordinate
|
||||
|
|
@ -8579,7 +8581,8 @@ class TestNNDeviceType(NNTestCase):
|
|||
|
||||
@unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
|
||||
"Scipy v1.0 and/or numpy not found")
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@tf32_on_and_off(0.005)
|
||||
@reduced_f32_on_and_off(0.005)
|
||||
def test_affine_3d_rotateRandom(self, device):
|
||||
# scipy before 1.0.0 do not support homogeneous coordinate
|
||||
|
|
@ -9456,8 +9459,9 @@ class TestNNDeviceType(NNTestCase):
|
|||
unfold(inp)
|
||||
|
||||
@onlyCUDA
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@dtypes(torch.float, torch.double)
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@tf32_on_and_off(0.005)
|
||||
def test_rnn_fused(self, device, dtype):
|
||||
|
||||
def copy_rnn(rnn1, rnn2):
|
||||
|
|
@ -11936,10 +11940,11 @@ class TestNNDeviceType(NNTestCase):
|
|||
with self.assertRaisesRegex(RuntimeError, "log_probs tensor must not be empty"):
|
||||
F.ctc_loss(log_probs, targets, input_lengths, target_lengths, reduction='none')
|
||||
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@expectedFailureMPS # RuntimeError: LSTM with projections is not currently supported with MPS.
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float)
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@tf32_on_and_off(0.005)
|
||||
@skipIfTorchDynamo("TorchDynamo fails here for unknown reasons")
|
||||
def test_variable_sequence(self, device, dtype):
|
||||
def pad(var, length):
|
||||
|
|
|
|||
|
|
@ -2479,7 +2479,8 @@ class TestTorchDeviceType(TestCase):
|
|||
self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001)
|
||||
self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001)
|
||||
|
||||
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@tf32_on_and_off(0.005)
|
||||
@reduced_f32_on_and_off(0.08)
|
||||
def test_cdist_large(self, device):
|
||||
for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ from torch.testing._internal.common_nn import NNTestCase
|
|||
from torch.testing._internal.common_utils import (
|
||||
TEST_WITH_ROCM,
|
||||
skipIfRocm,
|
||||
skipIfRocmArch,
|
||||
MI300_ARCH,
|
||||
skipIfTorchDynamo,
|
||||
TEST_FAIRSEQ,
|
||||
run_tests,
|
||||
|
|
@ -427,7 +429,8 @@ class TestTransformers(NNTestCase):
|
|||
# remove hook
|
||||
handle.remove()
|
||||
|
||||
@tf32_on_and_off(0.0021 if TEST_WITH_ROCM else 0.001)
|
||||
@skipIfRocmArch(MI300_ARCH)
|
||||
@tf32_on_and_off(0.001)
|
||||
@parametrize("use_torchscript", [False])
|
||||
@parametrize("enable_nested_tensor", [True, False])
|
||||
@parametrize("use_autocast", [True, False])
|
||||
|
|
|
|||
|
|
@ -120,7 +120,9 @@ module_tests = [
|
|||
desc='no_bias',
|
||||
reference_fn=lambda i, p, _: torch.mm(i, p[0].t()),
|
||||
with_tf32=True,
|
||||
tf32_precision=0.05 if TEST_WITH_ROCM else 0.005,
|
||||
tf32_precision=0.005,
|
||||
# ROCM: skipping tf32 test on gfx94 archs due to tolerance issue.
|
||||
test_cuda=not (TEST_WITH_ROCM and "gfx94" in torch.cuda.get_device_properties(0).gcnArchName),
|
||||
default_dtype=torch.double,
|
||||
),
|
||||
dict(
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user