From c4400fc4319eecc8637d39cfd43d4080e538be7c Mon Sep 17 00:00:00 2001
From: Jane Xu <janeyx@fb.com>
Date: Mon, 10 Jan 2022 09:12:18 -0800
Subject: [PATCH] Retire repeat_test_for_types (#71033)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/69865

cc pietern mrshenli pritamdamania87 zhaojuanmao satgera rohan-varma gqchen aazzolini osalpekar jiayisuse SciPioneer H-Huang

Pull Request resolved: https://github.com/pytorch/pytorch/pull/71033

Reviewed By: mruberry

Differential Revision: D33486370

Pulled By: janeyx99

fbshipit-source-id: 71f9383dbc1e00b572f26eb4f04d0a94c6759e35
---
 test/distributed/test_data_parallel.py  | 203 +++++++++++++-----------
 torch/testing/_internal/common_utils.py |  14 --
 2 files changed, 107 insertions(+), 110 deletions(-)

diff --git a/test/distributed/test_data_parallel.py b/test/distributed/test_data_parallel.py
index b0c10938ac2..c41088703ea 100644
--- a/test/distributed/test_data_parallel.py
+++ b/test/distributed/test_data_parallel.py
@@ -12,7 +12,8 @@ from torch import nn
 from torch.cuda.amp import autocast
 import torch.nn.parallel as dp
 from torch.testing._internal.common_cuda import TEST_MULTIGPU, TEST_CUDA
-from torch.testing._internal.common_utils import run_tests, TestCase, repeat_test_for_types, ALL_TENSORTYPES
+from torch.testing._internal.common_device_type import instantiate_device_type_tests, dtypes, onlyCUDA, skipMeta
+from torch.testing._internal.common_utils import run_tests, TestCase
 from torch.testing._internal.common_utils import _assertGradAndGradgradChecks, gradcheck
 from torch.testing._internal.common_utils import dtype2prec_DONTUSE
 from torch.testing._internal.common_utils import sandcastle_skip_if
@@ -434,93 +435,6 @@ class TestDataParallel(TestCase):
         output = dp.data_parallel(Net(), input, gpus)
         self.assertEqual(output, fn(input))
 
-    @sandcastle_skip_if(not TEST_CUDA, "CUDA unavailable")
-    @repeat_test_for_types(ALL_TENSORTYPES)
-    def test_data_parallel_module(self, dtype=torch.float):
-        l = nn.Linear(10, 5).to("cuda", dtype)
-        i = torch.randn(20, 10, device="cuda", dtype=dtype)
-        expected_out = l(i)
-        net = nn.DataParallel(l)
-        out = net(i)
-        self.assertEqual(out.get_device(), 0)
-        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
-
-    @sandcastle_skip_if(not TEST_CUDA, "CUDA unavailable")
-    @repeat_test_for_types(ALL_TENSORTYPES)
-    def test_data_parallel_module_kwargs_only(self, dtype=torch.float):
-        class Net(nn.Module):
-            def __init__(self):
-                super(Net, self).__init__()
-                self.l = l
-
-            def forward(self, input):
-                return self.l(input)
-
-        l = nn.Linear(10, 5).to("cuda", dtype)
-        i = torch.randn(20, 10, device="cuda", dtype=dtype)
-        expected_out = l(i)
-        n = nn.DataParallel(Net())
-        out = n(input=i)
-        self.assertEqual(out.get_device(), 0)
-        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
-
-    @sandcastle_skip_if(not TEST_CUDA, "CUDA unavailable")
-    @repeat_test_for_types(ALL_TENSORTYPES)
-    def test_data_parallel_module_kwargs_only_empty_list(self, dtype=torch.float):
-        class Net(nn.Module):
-            def __init__(self):
-                super(Net, self).__init__()
-                self.l = l
-
-            def forward(self, input):
-                return self.l(input['data'])
-
-        l = nn.Linear(10, 5).to("cuda", dtype)
-        i = torch.randn(20, 10, device="cuda", dtype=dtype)
-        expected_out = l(i)
-        n = nn.DataParallel(Net())
-        out = n(input={'data': i, 'unused': []})
-        self.assertEqual(out.get_device(), 0)
-        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
-
-    @sandcastle_skip_if(not TEST_CUDA, "CUDA unavailable")
-    @repeat_test_for_types(ALL_TENSORTYPES)
-    def test_data_parallel_module_kwargs_only_empty_dict(self, dtype=torch.float):
-        class Net(nn.Module):
-            def __init__(self):
-                super(Net, self).__init__()
-                self.l = l
-
-            def forward(self, input):
-                return self.l(input['data'])
-
-        l = nn.Linear(10, 5).to("cuda", dtype)
-        i = torch.randn(20, 10, device="cuda", dtype=dtype)
-        expected_out = l(i)
-        n = nn.DataParallel(Net())
-        out = n(input={'data': i, 'unused': {}})
-        self.assertEqual(out.get_device(), 0)
-        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
-
-    @sandcastle_skip_if(not TEST_CUDA, "CUDA unavailable")
-    @repeat_test_for_types(ALL_TENSORTYPES)
-    def test_data_parallel_module_kwargs_only_empty_tuple(self, dtype=torch.float):
-        class Net(nn.Module):
-            def __init__(self):
-                super(Net, self).__init__()
-                self.l = l
-
-            def forward(self, input):
-                return self.l(input['data'])
-
-        l = nn.Linear(10, 5).to("cuda", dtype)
-        i = torch.randn(20, 10, device="cuda", dtype=dtype)
-        expected_out = l(i)
-        n = nn.DataParallel(Net())
-        out = n(input={'data': i, 'unused': ()})
-        self.assertEqual(out.get_device(), 0)
-        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
-
     @sandcastle_skip_if(not TEST_MULTIGPU, "multi-GPU not supported")
     def test_data_parallel_module_zero_inputs(self):
         class TestModule(nn.Module):
@@ -757,13 +671,13 @@ class TestDataParallel(TestCase):
     @sandcastle_skip_if(not TEST_MULTIGPU, "multi-GPU not supported")
     def test_strided_grad_layout(self):
         class ConvNet(nn.Module):
-            def __init__(self, layouts, dtypes):
+            def __init__(self, layouts, dtype_list):
                 super(ConvNet, self).__init__()
-                self.dtypes = dtypes
-                self.conv0 = torch.nn.Conv2d(8, 16, (2, 2)).to(memory_format=layouts[0], dtype=dtypes[0])
-                self.conv1 = torch.nn.Conv2d(16, 32, (2, 2)).to(memory_format=layouts[1], dtype=dtypes[1])
-                self.conv2 = torch.nn.Conv2d(32, 16, (2, 2)).to(memory_format=layouts[2], dtype=dtypes[2])
-                self.conv3 = torch.nn.Conv2d(16, 8, (2, 2)).to(memory_format=layouts[3], dtype=dtypes[3])
+                self.dtypes = dtype_list
+                self.conv0 = torch.nn.Conv2d(8, 16, (2, 2)).to(memory_format=layouts[0], dtype=dtype_list[0])
+                self.conv1 = torch.nn.Conv2d(16, 32, (2, 2)).to(memory_format=layouts[1], dtype=dtype_list[1])
+                self.conv2 = torch.nn.Conv2d(32, 16, (2, 2)).to(memory_format=layouts[2], dtype=dtype_list[2])
+                self.conv3 = torch.nn.Conv2d(16, 8, (2, 2)).to(memory_format=layouts[3], dtype=dtype_list[3])
 
             def forward(self, x):
                 x = x.to(self.dtypes[0])
@@ -786,10 +700,10 @@ class TestDataParallel(TestCase):
         device_ids = list(range(ndevs))
 
         with torch.backends.cudnn.flags(enabled=True, deterministic=True, benchmark=False):
-            for formats, dtypes in product(layer_formats, layer_dtypes):
+            for formats, dtype_list in product(layer_formats, layer_dtypes):
                 model_msg = "formats = {} dtypes = {}".format(formats, dtypes)
                 try:
-                    m = ConvNet(formats, dtypes).cuda(device="cuda:0")
+                    m = ConvNet(formats, dtype_list).cuda(device="cuda:0")
                     m_dp = dp.DataParallel(deepcopy(m), device_ids=device_ids)
                     opt = torch.optim.SGD(m.parameters(), lr=0.1)
                     opt_dp = torch.optim.SGD(m_dp.parameters(), lr=0.1)
@@ -855,5 +769,102 @@ class TestDataParallel(TestCase):
             model(input)
 
 
+class TestDataParallelDeviceType(TestCase):
+
+    @onlyCUDA
+    @skipMeta
+    @dtypes(torch.float, torch.double, torch.half)
+    def test_data_parallel_module(self, device, dtype):
+        l = nn.Linear(10, 5).to(device, dtype)
+        i = torch.randn(20, 10, device=device, dtype=dtype)
+        expected_out = l(i)
+        net = nn.DataParallel(l)
+        out = net(i)
+        self.assertEqual(out.get_device(), 0)
+        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
+
+    @onlyCUDA
+    @skipMeta
+    @dtypes(torch.float, torch.double, torch.half)
+    def test_data_parallel_module_kwargs_only(self, device, dtype):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.l = l
+
+            def forward(self, input):
+                return self.l(input)
+
+        l = nn.Linear(10, 5).to(device, dtype)
+        i = torch.randn(20, 10, device=device, dtype=dtype)
+        expected_out = l(i)
+        n = nn.DataParallel(Net())
+        out = n(input=i)
+        self.assertEqual(out.get_device(), 0)
+        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
+
+    @onlyCUDA
+    @skipMeta
+    @dtypes(torch.float, torch.double, torch.half)
+    def test_data_parallel_module_kwargs_only_empty_list(self, device, dtype):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.l = l
+
+            def forward(self, input):
+                return self.l(input['data'])
+
+        l = nn.Linear(10, 5).to(device, dtype)
+        i = torch.randn(20, 10, device=device, dtype=dtype)
+        expected_out = l(i)
+        n = nn.DataParallel(Net())
+        out = n(input={'data': i, 'unused': []})
+        self.assertEqual(out.get_device(), 0)
+        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
+
+    @onlyCUDA
+    @skipMeta
+    @dtypes(torch.float, torch.double, torch.half)
+    def test_data_parallel_module_kwargs_only_empty_dict(self, device, dtype):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.l = l
+
+            def forward(self, input):
+                return self.l(input['data'])
+
+        l = nn.Linear(10, 5).to(device, dtype)
+        i = torch.randn(20, 10, device=device, dtype=dtype)
+        expected_out = l(i)
+        n = nn.DataParallel(Net())
+        out = n(input={'data': i, 'unused': {}})
+        self.assertEqual(out.get_device(), 0)
+        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
+
+    @onlyCUDA
+    @skipMeta
+    @dtypes(torch.float, torch.double, torch.half)
+    def test_data_parallel_module_kwargs_only_empty_tuple(self, device, dtype):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.l = l
+
+            def forward(self, input):
+                return self.l(input['data'])
+
+        l = nn.Linear(10, 5).to(device, dtype)
+        i = torch.randn(20, 10, device=device, dtype=dtype)
+        expected_out = l(i)
+        n = nn.DataParallel(Net())
+        out = n(input={'data': i, 'unused': ()})
+        self.assertEqual(out.get_device(), 0)
+        self.assertEqual(out, expected_out, atol=dtype2prec_DONTUSE[dtype], rtol=0)
+
+
+instantiate_device_type_tests(TestDataParallelDeviceType, globals())
+
 if __name__ == '__main__':
     run_tests()
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
index 46a5ad26bd7..9a253fbdc03 100644
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py
@@ -523,20 +523,6 @@ def shell(command, cwd=None, env=None):
     return wait_for_process(p)
 
 
-# Used to run the same test with different tensor types
-def repeat_test_for_types(dtypes):
-    def repeat_helper(f):
-        @wraps(f)
-        def call_helper(self, *args):
-            for dtype in dtypes:
-                with TestCase.subTest(self, dtype=dtype):
-                    f(self, *args, dtype=dtype)
-
-        return call_helper
-    return repeat_helper
-
-
-
 def discover_test_cases_recursively(suite_or_case):
     if isinstance(suite_or_case, unittest.TestCase):
         return [suite_or_case]