mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Jetson Update for CI Redo (#94549)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/94549 Approved by: https://github.com/ezyang, https://github.com/malfet
This commit is contained in:
parent
c6d8d10b3e
commit
8aa34602f7
|
|
@ -7,7 +7,7 @@ import torch
|
|||
import torch._dynamo
|
||||
import torch._inductor.utils
|
||||
from torch._dynamo.test_minifier_common import MinifierTestBase
|
||||
from torch.testing._internal.common_utils import IS_MACOS
|
||||
from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS
|
||||
|
||||
_HAS_TRITON = torch._inductor.utils.has_triton()
|
||||
requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")
|
||||
|
|
@ -99,11 +99,13 @@ torch._dynamo.config.debug_dir_root = "{self.DEBUG_DIR}"
|
|||
(test_proc.returncode, repro_proc.returncode),
|
||||
)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_cpu_compile_error(self):
|
||||
(tb1, tb2), _ = self._test_after_aot("cpu", CPP_COMPILE_ERROR, 2)
|
||||
self.assertIn("CppCompileError", tb1)
|
||||
self.assertIn("CppCompileError", tb2)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_cpu_accuracy_error(self):
|
||||
(tb1, tb2), _ = self._test_after_aot("cpu", CPP_ACCURACY_ERROR, 4)
|
||||
self.assertIn("AccuracyError", tb1)
|
||||
|
|
@ -149,6 +151,7 @@ torch._dynamo.config.debug_dir_root = "{self.DEBUG_DIR}"
|
|||
self.assertEqual(test_proc.returncode, repro_proc.returncode)
|
||||
self.assertNotEqual(test_proc.returncode, 0)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_cpu_runtime_error(self):
|
||||
self._test_after_aot_runtime_error("cpu", CPP_RUNTIME_ERROR)
|
||||
|
||||
|
|
@ -181,12 +184,15 @@ torch._dynamo.config.debug_dir_root = "{self.DEBUG_DIR}"
|
|||
self.assertEqual(proc.returncode, 0)
|
||||
self.assertIsNone(repro_dir)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_cpu_compile_backend_passes(self):
|
||||
self._test_after_aot_backend_passes("cpu", 2, CPP_COMPILE_ERROR)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_cpu_runtime_backend_passes(self):
|
||||
self._test_after_aot_backend_passes("cpu", 2, CPP_RUNTIME_ERROR)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_cpu_accuracy_backend_passes(self):
|
||||
self._test_after_aot_backend_passes("cpu", 4, CPP_ACCURACY_ERROR)
|
||||
|
||||
|
|
@ -206,6 +212,7 @@ torch._dynamo.config.debug_dir_root = "{self.DEBUG_DIR}"
|
|||
|
||||
# Test that inductor config can be saved and restored, especially class
|
||||
# variables.
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_inductor_config_serialization(self):
|
||||
run_code = textwrap.dedent(
|
||||
"""\
|
||||
|
|
@ -248,11 +255,13 @@ inner(torch.randn(20, 20).to("cpu"))
|
|||
)
|
||||
return (test_proc.stderr.decode("utf-8"), repro_proc.stderr.decode("utf-8"))
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_with_modified_config_compile_error(self):
|
||||
tb1, tb2 = self._test_after_aot_with_modified_config(CPP_COMPILE_ERROR, 2)
|
||||
self.assertIn("CppCompileError", tb1)
|
||||
self.assertIn("CppCompileError", tb2)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_after_aot_with_modified_config_accuracy_error(self):
|
||||
tb1, tb2 = self._test_after_aot_with_modified_config(CPP_ACCURACY_ERROR, 4)
|
||||
self.assertIn("AccuracyError", tb1)
|
||||
|
|
@ -287,21 +296,25 @@ inner(torch.randn(20, 20).to("cpu"))
|
|||
(test_proc.returncode, repro_proc.returncode),
|
||||
)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_torch_compile_after_dynamo_compile_error(self):
|
||||
(tb1, tb2), _ = self._test_torch_compile("dynamo", 2, CPP_COMPILE_ERROR)
|
||||
self.assertIn("CppCompileError", tb1)
|
||||
self.assertIn("CppCompileError", tb2)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_torch_compile_after_dynamo_accuracy_error(self):
|
||||
(tb1, tb2), _ = self._test_torch_compile("dynamo", 4, CPP_ACCURACY_ERROR)
|
||||
self.assertIn("AccuracyError", tb1)
|
||||
self.assertIn("AccuracyError", tb2)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_torch_compile_after_aot_compile_error(self):
|
||||
(tb1, tb2), _ = self._test_torch_compile("aot", 2, CPP_COMPILE_ERROR)
|
||||
self.assertIn("CppCompileError", tb1)
|
||||
self.assertIn("CppCompileError", tb2)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
|
||||
def test_torch_compile_after_aot_accuracy_error(self):
|
||||
(tb1, tb2), _ = self._test_torch_compile("aot", 4, CPP_ACCURACY_ERROR)
|
||||
self.assertIn("AccuracyError", tb1)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
# Owner(s): ["module: inductor"]
|
||||
import logging
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
import torch._dynamo as torchdynamo
|
||||
import torch._inductor.config as torchinductor_config
|
||||
from torch.testing._internal.common_utils import IS_LINUX, TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
class MLP(torch.nn.Module):
|
||||
|
|
@ -24,6 +26,7 @@ def _test_f(x):
|
|||
|
||||
|
||||
class SmokeTest(TestCase):
|
||||
@unittest.skipIf(not HAS_CUDA, "Triton is not available")
|
||||
def test_mlp(self):
|
||||
torchdynamo.config.log_level = logging.INFO
|
||||
torchdynamo.config.verbose = True
|
||||
|
|
@ -36,6 +39,7 @@ class SmokeTest(TestCase):
|
|||
torchdynamo.config.verbose = False
|
||||
torchinductor_config.debug = False
|
||||
|
||||
@unittest.skipIf(not HAS_CUDA, "Triton is not available")
|
||||
def test_compile_decorator(self):
|
||||
@torch.compile
|
||||
def foo(x):
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from itertools import product
|
|||
|
||||
import torch
|
||||
from torch.testing._internal.common_utils import run_tests, set_default_dtype, \
|
||||
instantiate_parametrized_tests, parametrize as parametrize_test, _assertGradAndGradgradChecks
|
||||
instantiate_parametrized_tests, parametrize as parametrize_test, _assertGradAndGradgradChecks, IS_JETSON
|
||||
from torch.testing._internal.common_cuda import TEST_CUDA
|
||||
from torch.testing._internal.common_nn import NNTestCase
|
||||
from torch.testing._internal.common_device_type import onlyNativeDeviceTypes, dtypes, \
|
||||
|
|
@ -1172,6 +1172,8 @@ class TestEmbeddingNNDeviceType(NNTestCase):
|
|||
@dtypesIfCUDA(*itertools.product((torch.int, torch.long), (torch.int, torch.long),
|
||||
(torch.float, torch.double, torch.half)))
|
||||
def test_embedding_bag_device(self, device, dtypes):
|
||||
if IS_JETSON and torch.bfloat16 in dtypes and device == "cpu":
|
||||
self.skipTest("bfloat16 not supported with Jetson cpu")
|
||||
with set_default_dtype(torch.double):
|
||||
self._test_EmbeddingBag(device, 'sum', False, wdtype=dtypes[2], dtype=dtypes[0], odtype=dtypes[1])
|
||||
self._test_EmbeddingBag(device, 'mean', False, wdtype=dtypes[2], dtype=dtypes[0], odtype=dtypes[1])
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from torch import inf, nan
|
|||
import torch
|
||||
from torch.testing import make_tensor
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_UBSAN, set_default_dtype, \
|
||||
instantiate_parametrized_tests, slowTest, parametrize as parametrize_test, subtest, skipIfMps
|
||||
instantiate_parametrized_tests, slowTest, parametrize as parametrize_test, subtest, skipIfMps, gcIfJetson
|
||||
from torch.testing._internal.common_cuda import TEST_CUDA
|
||||
from torch.testing._internal.common_nn import NNTestCase, _test_bfloat16_ops, _test_module_empty_input
|
||||
from torch.testing._internal.common_device_type import largeTensorTest, onlyNativeDeviceTypes, dtypes, \
|
||||
|
|
@ -711,6 +711,7 @@ torch.cuda.synchronize()
|
|||
output = module(input)
|
||||
|
||||
@onlyNativeDeviceTypes
|
||||
@gcIfJetson
|
||||
@dtypes(torch.float, torch.double)
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
def test_avg_pool2d_nhwc(self, device, dtype):
|
||||
|
|
@ -798,6 +799,7 @@ torch.cuda.synchronize()
|
|||
check(tensor.transpose(1, 2), 3, 2, 1, 2, ceil_mode=True)
|
||||
|
||||
@onlyCUDA
|
||||
@gcIfJetson
|
||||
def test_max_pool2d(self, device):
|
||||
def helper(n, c, h, w, ks):
|
||||
x = torch.randn(n, c, h, w, device='cuda', dtype=torch.float, requires_grad=True)
|
||||
|
|
@ -821,6 +823,7 @@ torch.cuda.synchronize()
|
|||
@onlyNativeDeviceTypes
|
||||
@dtypes(torch.float, torch.double)
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@gcIfJetson
|
||||
def test_max_pool2d_nhwc(self, device, dtype):
|
||||
def helper(n, c, h, w, kernel_size, stride=None):
|
||||
if stride is None:
|
||||
|
|
@ -857,6 +860,7 @@ torch.cuda.synchronize()
|
|||
@onlyNativeDeviceTypes
|
||||
@dtypes(torch.half, torch.float, torch.double)
|
||||
@onlyCUDA
|
||||
@gcIfJetson
|
||||
def test_max_pool3d_ndhwc(self, device, dtype):
|
||||
def helper(n, c, h, w, d, kernel_size, stride=None):
|
||||
batch = n
|
||||
|
|
@ -946,6 +950,7 @@ torch.cuda.synchronize()
|
|||
helper(1, 19, 20, 10, 8, 2, torch.channels_last)
|
||||
|
||||
@onlyCUDA
|
||||
@gcIfJetson
|
||||
def test_max_pool2d_indices(self, device):
|
||||
def helper(n, c, h, w, ks):
|
||||
if n is None:
|
||||
|
|
@ -1259,6 +1264,7 @@ torch.cuda.synchronize()
|
|||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float)
|
||||
@onlyNativeDeviceTypes # TODO: Fails on XLA
|
||||
@gcIfJetson
|
||||
def test_max_pool_nan_inf(self, device, dtype):
|
||||
for adaptive in ['', 'adaptive_']:
|
||||
for num_dim in [1, 2, 3]:
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ from torch.profiler._pattern_matcher import (
|
|||
from torch.testing._internal.common_cuda import TEST_MULTIGPU
|
||||
from torch.testing._internal.common_device_type import skipCUDAVersionIn
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_JETSON,
|
||||
IS_WINDOWS,
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
|
|
@ -924,6 +925,7 @@ class TestProfiler(TestCase):
|
|||
]
|
||||
)
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Jetson has a guard against OOM since host and gpu memory are shared")
|
||||
def test_oom_tracing(self):
|
||||
def run_profiler(tensor_creation_fn):
|
||||
with _profile(profile_memory=True, record_shapes=True) as prof:
|
||||
|
|
@ -2685,6 +2687,7 @@ class TestExperimentalUtils(TestCase):
|
|||
0 [CPU (After GPU)]
|
||||
100000 [CPU (After GPU)]""")
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "JSON not behaving as expected on Jetson")
|
||||
def test_utils_get_optimizable_events(self):
|
||||
basic_evaluation = _utils.BasicEvaluation(self.load_mock_profile())
|
||||
optimizable_events = basic_evaluation.get_optimizable_events(
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from torch.utils.checkpoint import checkpoint_sequential
|
|||
from torch.testing._internal.common_utils import TestCase, freeze_rng_state, run_tests, \
|
||||
NO_MULTIPROCESSING_SPAWN, skipIfRocm, load_tests, IS_REMOTE_GPU, IS_SANDCASTLE, IS_WINDOWS, \
|
||||
slowTest, skipCUDANonDefaultStreamIf, skipCUDAMemoryLeakCheckIf, TEST_WITH_ROCM, TEST_NUMPY, \
|
||||
get_cycles_per_ms, parametrize, instantiate_parametrized_tests, subtest
|
||||
get_cycles_per_ms, parametrize, instantiate_parametrized_tests, subtest, IS_JETSON, gcIfJetson
|
||||
from torch.testing._internal.autocast_test_lists import AutocastTestLists
|
||||
|
||||
# load_tests from common_utils is used to automatically filter tests for
|
||||
|
|
@ -390,7 +390,7 @@ class TestCuda(TestCase):
|
|||
self.assertTrue((tensor == 1).all())
|
||||
|
||||
|
||||
@unittest.skipIf(TEST_CUDAMALLOCASYNC, "Segmentation fault (core dumped)")
|
||||
@unittest.skipIf(TEST_CUDAMALLOCASYNC or IS_JETSON, "Segmentation fault (core dumped)")
|
||||
def test_out_of_memory_retry(self):
|
||||
torch.cuda.empty_cache()
|
||||
total_memory = torch.cuda.get_device_properties(0).total_memory
|
||||
|
|
@ -1746,6 +1746,10 @@ except RuntimeError as e:
|
|||
before_free_bytes, before_available_bytes = torch.cuda.mem_get_info(idx)
|
||||
# increasing to 8MB to force acquiring a new block and overcome blocksize differences across platforms
|
||||
t = torch.randn(1024 * 1024 * 8, device='cuda:' + str(idx))
|
||||
if IS_JETSON:
|
||||
# w/o syncing, mem_get_info will run before memory allocated has actually increased.
|
||||
# This race condition causes consistent failure
|
||||
torch.cuda.synchronize()
|
||||
after_free_bytes, after_available_bytes = torch.cuda.mem_get_info(idx)
|
||||
|
||||
self.assertTrue(after_free_bytes < before_free_bytes)
|
||||
|
|
@ -1769,9 +1773,18 @@ except RuntimeError as e:
|
|||
l.append(torch.randn(1024 * 1024 * 8, device=torch.device("cuda:0")))
|
||||
|
||||
no_leak()
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, r"CUDA driver API confirmed .+ on device 0.+"):
|
||||
leak_gpu0()
|
||||
regex = r"CUDA driver API confirmed .+ on device 0.+"
|
||||
if IS_JETSON:
|
||||
try:
|
||||
leak_gpu0()
|
||||
except RuntimeError as e:
|
||||
import re
|
||||
assert re.match(regex, str(e)), str(e) + "\n does not match: \n" + regex
|
||||
else:
|
||||
# assertRaisesRegex does not pass with Python for Jetson,
|
||||
# even though the RuntimeError matches regex using re.match
|
||||
with self.assertRaisesRegex(RuntimeError, regex):
|
||||
leak_gpu0()
|
||||
|
||||
if TEST_MULTIGPU:
|
||||
@self.wrap_with_cuda_memory_check
|
||||
|
|
@ -1800,6 +1813,7 @@ except RuntimeError as e:
|
|||
self.assertEqual(y[0, 0, 0, 2**30], expected)
|
||||
|
||||
@unittest.skipIf(not TEST_LARGE_TENSOR, "not enough memory")
|
||||
@gcIfJetson
|
||||
def test_cuda_kernel_loop_overflow_large(self):
|
||||
# Make sure input.numel() > INT_MAX is handled:
|
||||
x = torch.randn(1, 1, 1, 2**31, dtype=torch.float16, device="cuda")
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ with patch.dict(os.environ, {"PYTORCH_NVML_BASED_CUDA_CHECK": "1"}):
|
|||
# Before executing the desired tests, we need to disable CUDA initialization and fork_handler additions that would
|
||||
# otherwise be triggered by the `torch.testing._internal.common_utils` module import
|
||||
from torch.testing._internal.common_utils import (parametrize, instantiate_parametrized_tests, run_tests, TestCase,
|
||||
IS_WINDOWS)
|
||||
IS_WINDOWS, IS_JETSON)
|
||||
# NOTE: Because `remove_device_and_dtype_suffixes` initializes CUDA context (triggered via the import of
|
||||
# `torch.testing._internal.common_device_type` which imports `torch.testing._internal.common_cuda`) we need
|
||||
# to bypass that method here which should be irrelevant to the parameterized tests in this module.
|
||||
|
|
@ -48,6 +48,8 @@ class TestExtendedCUDAIsAvail(TestCase):
|
|||
@parametrize("nvml_avail", [True, False])
|
||||
@parametrize("avoid_init", ['1', '0', None])
|
||||
def test_cuda_is_available(self, avoid_init, nvml_avail):
|
||||
if IS_JETSON and nvml_avail and avoid_init == '1':
|
||||
self.skipTest('Not working for Jetson')
|
||||
patch_env = {"PYTORCH_NVML_BASED_CUDA_CHECK": avoid_init} if avoid_init else {}
|
||||
with patch.dict(os.environ, **patch_env):
|
||||
if nvml_avail:
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ from torch.utils.data._utils import MP_STATUS_CHECK_INTERVAL
|
|||
from torch.utils.data.dataset import random_split
|
||||
from torch.utils.data.datapipes.iter import IterableWrapper
|
||||
from torch._utils import ExceptionWrapper
|
||||
from torch.testing._internal.common_utils import (TestCase, run_tests, TEST_NUMPY, IS_WINDOWS,
|
||||
from torch.testing._internal.common_utils import (TestCase, run_tests, TEST_NUMPY, IS_WINDOWS, IS_JETSON,
|
||||
IS_CI, NO_MULTIPROCESSING_SPAWN, skipIfRocm, slowTest,
|
||||
load_tests, TEST_WITH_ASAN, TEST_WITH_TSAN, IS_SANDCASTLE,
|
||||
IS_MACOS)
|
||||
|
|
@ -78,11 +78,6 @@ load_tests = load_tests
|
|||
# as well during the execution of this test suite, and it will cause
|
||||
# CUDA OOM error on Windows.
|
||||
TEST_CUDA = torch.cuda.is_available()
|
||||
if TEST_CUDA:
|
||||
dev_name = torch.cuda.get_device_name(torch.cuda.current_device()).lower()
|
||||
IS_JETSON = 'xavier' in dev_name or 'nano' in dev_name or 'jetson' in dev_name or 'tegra' in dev_name
|
||||
else:
|
||||
IS_JETSON = False
|
||||
|
||||
if not NO_MULTIPROCESSING_SPAWN:
|
||||
# We want to use `spawn` if able because some of our tests check that the
|
||||
|
|
@ -1111,6 +1106,7 @@ except RuntimeError as e:
|
|||
self.assertTrue(input.is_pinned())
|
||||
self.assertTrue(target.is_pinned())
|
||||
|
||||
@unittest.skipIf(IS_JETSON, "Not working on Jetson")
|
||||
def test_multiple_dataloaders(self):
|
||||
for multiprocessing_context in supported_multiprocessing_contexts:
|
||||
loader1_it = iter(self._get_data_loader(self.dataset, num_workers=1))
|
||||
|
|
@ -1435,6 +1431,7 @@ except RuntimeError as e:
|
|||
list(iter(ChainDataset([dataset1, self.dataset])))
|
||||
|
||||
@unittest.skipIf(IS_MACOS, "Not working on macos")
|
||||
@unittest.skipIf(IS_MACOS or IS_JETSON, "Not working on macos or Jetson")
|
||||
@skipIfRocm # https://github.com/pytorch/pytorch/issues/90940
|
||||
def test_multiprocessing_contexts(self):
|
||||
reference = [
|
||||
|
|
@ -1460,6 +1457,7 @@ except RuntimeError as e:
|
|||
reference, list(self._get_data_loader(ds_cls(counting_ds_n), multiprocessing_context=ctx, **dl_common_args)))
|
||||
|
||||
@skipIfNoNumpy
|
||||
@unittest.skipIf(IS_JETSON, "Not working on Jetson")
|
||||
def test_multiprocessing_iterdatapipe(self):
|
||||
# Testing to make sure that function from global scope (e.g. imported from library) can be serialized
|
||||
# and used with multiprocess DataLoader
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
import torch
|
||||
from torch.testing import make_tensor
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests, IS_JETSON
|
||||
from torch.testing._internal.common_device_type import (
|
||||
instantiate_device_type_tests, onlyCUDA, dtypes, skipMeta,
|
||||
onlyNativeDeviceTypes)
|
||||
|
|
@ -52,6 +52,10 @@ class TestTorchDlPack(TestCase):
|
|||
# (hence data dependency) at the exchange boundary.
|
||||
# DLPack manages this synchronization for us, so we don't need to
|
||||
# explicitly wait until x is populated
|
||||
if IS_JETSON:
|
||||
# DLPack protocol that establishes correct stream order
|
||||
# does not behave as expected on Jetson
|
||||
stream.synchronize()
|
||||
stream = torch.cuda.Stream()
|
||||
with torch.cuda.stream(stream):
|
||||
z = from_dlpack(x)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from torch.testing._internal.common_device_type import (
|
|||
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_ARM64,
|
||||
IS_JETSON,
|
||||
parametrize,
|
||||
run_tests,
|
||||
TEST_WITH_ROCM,
|
||||
|
|
@ -114,6 +115,7 @@ class TestMatmulCuda(TestCase):
|
|||
|
||||
@onlyCUDA
|
||||
@unittest.skipIf(TEST_WITH_ROCM, "Only CUDA 11+ is supported")
|
||||
@unittest.skipIf(IS_JETSON, "Too large for Jetson")
|
||||
@toleranceOverride({torch.float32: xtol(atol=1e-5, rtol=1e-5)})
|
||||
@dtypes(*([torch.float32, torch.float16] +
|
||||
[torch.bfloat16] if TEST_WITH_ROCM or SM53OrLater else []))
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ from torch.testing._internal.common_utils import freeze_rng_state, run_tests, Te
|
|||
download_file, get_function_arglist, load_tests, skipIfMps,\
|
||||
TEST_WITH_UBSAN, IS_PPC, \
|
||||
parametrize as parametrize_test, subtest, instantiate_parametrized_tests, \
|
||||
skipIfTorchDynamo, IS_WINDOWS
|
||||
skipIfTorchDynamo, IS_WINDOWS, gcIfJetson
|
||||
from torch.testing._internal.common_cuda import TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, TEST_CUDNN_VERSION
|
||||
from torch.testing._internal.common_nn import NNTestCase, NewModuleTest, CriterionTest, \
|
||||
module_tests, criterion_tests, loss_reference_fns, _create_basic_net, \
|
||||
|
|
@ -9625,6 +9625,7 @@ class TestNNDeviceType(NNTestCase):
|
|||
)
|
||||
|
||||
@onlyCUDA
|
||||
@gcIfJetson
|
||||
def test_masked_softmax_devices_parity(self):
|
||||
# Test that softmax with mask type 0 (LxL attention mask), mask type 1 (BxL padding mask),
|
||||
# and mask type 2 (BxHxLxL generic mask) gives the same result on CPU and on CUDA.
|
||||
|
|
@ -10220,6 +10221,7 @@ class TestNNDeviceType(NNTestCase):
|
|||
self.assertEqual(out_ref, out)
|
||||
|
||||
@onlyCUDA
|
||||
@gcIfJetson
|
||||
def test_upsamplingNearest3d_launch_config(self, device):
|
||||
m = nn.Upsample(scale_factor=2)
|
||||
inp = torch.rand(2**25, 1, 1, 1, 1, device=device)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Owner(s): ["module: autograd"]
|
||||
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests, IS_WINDOWS
|
||||
from torch.testing._internal.common_utils import TestCase, run_tests, IS_JETSON, IS_WINDOWS
|
||||
import pkgutil
|
||||
import torch
|
||||
import sys
|
||||
|
|
@ -271,7 +271,7 @@ class TestPublicBindings(TestCase):
|
|||
self.assertTrue(torch_C_bindings.issubset(torch_C_allowlist_superset), msg)
|
||||
|
||||
# AttributeError: module 'torch.distributed' has no attribute '_shard'
|
||||
@unittest.skipIf(IS_WINDOWS, "Distributed Attribute Error")
|
||||
@unittest.skipIf(IS_WINDOWS or IS_JETSON, "Distributed Attribute Error")
|
||||
def test_correct_module_names(self):
|
||||
'''
|
||||
An API is considered public, if its `__module__` starts with `torch.`
|
||||
|
|
|
|||
|
|
@ -7,11 +7,12 @@ from itertools import product, combinations, permutations, chain
|
|||
from functools import partial
|
||||
import random
|
||||
import warnings
|
||||
import unittest
|
||||
|
||||
from torch import nan
|
||||
from torch.testing import make_tensor
|
||||
from torch.testing._internal.common_utils import (
|
||||
TestCase, run_tests, skipIfTorchDynamo, torch_to_numpy_dtype_dict)
|
||||
TestCase, run_tests, skipIfTorchDynamo, torch_to_numpy_dtype_dict, IS_JETSON)
|
||||
from torch.testing._internal.common_device_type import (
|
||||
instantiate_device_type_tests, onlyCPU, onlyCUDA, dtypes, onlyNativeDeviceTypes,
|
||||
dtypesIfCUDA, largeTensorTest)
|
||||
|
|
@ -505,6 +506,7 @@ class TestShapeOps(TestCase):
|
|||
@onlyCUDA # CPU is too slow
|
||||
@largeTensorTest('17GB') # 4 tensors of 4GB (in, out) x (torch, numpy) + 1GB
|
||||
@largeTensorTest("81GB", "cpu") # even for CUDA test, sufficient system memory is required
|
||||
@unittest.skipIf(IS_JETSON, "Too large for Jetson")
|
||||
def test_flip_large_tensor(self, device):
|
||||
t_in = torch.empty(2**32 + 1, dtype=torch.uint8).random_()
|
||||
torch_fn = partial(torch.flip, dims=(0,))
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from torch.testing import make_tensor
|
|||
from torch.testing._internal.common_utils import (
|
||||
TestCase, run_tests, do_test_empty_full, TEST_WITH_ROCM, suppress_warnings,
|
||||
torch_to_numpy_dtype_dict, numpy_to_torch_dtype_dict, slowTest,
|
||||
TEST_SCIPY, IS_MACOS, IS_PPC, IS_WINDOWS, parametrize, skipIfTorchDynamo)
|
||||
TEST_SCIPY, IS_MACOS, IS_PPC, IS_JETSON, IS_WINDOWS, parametrize, skipIfTorchDynamo)
|
||||
from torch.testing._internal.common_device_type import (
|
||||
expectedFailureMeta, instantiate_device_type_tests, deviceCountAtLeast, onlyNativeDeviceTypes,
|
||||
onlyCPU, largeTensorTest, precisionOverride, dtypes,
|
||||
|
|
@ -953,8 +953,9 @@ class TestTensorCreation(TestCase):
|
|||
# errors with UBSAN. These casts are deliberate in PyTorch, however, and
|
||||
# NumPy has the same behavior.
|
||||
@onlyNativeDeviceTypes
|
||||
@unittest.skipIf(IS_MACOS, "Test is broken on MacOS, see https://github.com/pytorch/pytorch/issues/38752")
|
||||
@unittest.skipIf(IS_PPC, "Test is borken on PowerPC, see https://github.com/pytorch/pytorch/issues/39671")
|
||||
@unittest.skipIf(IS_MACOS or IS_JETSON, "Test is broken on MacOS and Jetson, \
|
||||
see https://github.com/pytorch/pytorch/issues/38752")
|
||||
@unittest.skipIf(IS_PPC, "Test is broken on PowerPC, see https://github.com/pytorch/pytorch/issues/39671")
|
||||
@dtypes(torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
|
||||
def test_float_to_int_conversion_finite(self, device, dtype):
|
||||
min = torch.finfo(torch.float).min
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import torch
|
|||
|
||||
from torch.testing import make_tensor
|
||||
from torch.testing._internal.common_utils import \
|
||||
(IS_FBCODE, IS_MACOS, IS_SANDCASTLE, IS_WINDOWS, TestCase, run_tests, skipIfRocm, slowTest,
|
||||
(IS_FBCODE, IS_JETSON, IS_MACOS, IS_SANDCASTLE, IS_WINDOWS, TestCase, run_tests, skipIfRocm, slowTest,
|
||||
parametrize, subtest, instantiate_parametrized_tests, dtype_name, TEST_WITH_ROCM)
|
||||
from torch.testing._internal.common_device_type import \
|
||||
(PYTORCH_TESTING_DEVICE_EXCEPT_FOR_KEY, PYTORCH_TESTING_DEVICE_ONLY_FOR_KEY, dtypes,
|
||||
|
|
@ -1992,9 +1992,9 @@ class TestImports(TestCase):
|
|||
# See https://github.com/pytorch/pytorch/issues/77801
|
||||
if not sys.version_info >= (3, 9):
|
||||
ignored_modules.append("torch.utils.benchmark")
|
||||
if IS_WINDOWS or IS_MACOS:
|
||||
if IS_WINDOWS or IS_MACOS or IS_JETSON:
|
||||
# Distributed should be importable on Windows(except nn.api.), but not on Mac
|
||||
if IS_MACOS:
|
||||
if IS_MACOS or IS_JETSON:
|
||||
ignored_modules.append("torch.distributed.")
|
||||
else:
|
||||
ignored_modules.append("torch.distributed.nn.api.")
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ from functools import partial
|
|||
from torch import multiprocessing as mp
|
||||
from torch.testing import make_tensor
|
||||
from torch.testing._internal.common_utils import (
|
||||
TEST_WITH_TORCHINDUCTOR, TestCase, TEST_WITH_ROCM, run_tests,
|
||||
TEST_WITH_TORCHINDUCTOR, TestCase, TEST_WITH_ROCM, run_tests, IS_JETSON,
|
||||
IS_WINDOWS, IS_FILESYSTEM_UTF8_ENCODING, NO_MULTIPROCESSING_SPAWN,
|
||||
IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, load_tests, skipIfTorchInductor, slowTest,
|
||||
TEST_WITH_CROSSREF, skipIfTorchDynamo,
|
||||
|
|
@ -2781,6 +2781,7 @@ else:
|
|||
torch.testing.assert_close(expected, actual)
|
||||
|
||||
@unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration")
|
||||
@unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.")
|
||||
@onlyCUDA
|
||||
@dtypes(torch.half) # only small dtype not to get oom
|
||||
@largeTensorTest('25GB', device='cpu')
|
||||
|
|
@ -2797,6 +2798,7 @@ else:
|
|||
@dtypes(torch.half) # only small dtype not to get oom
|
||||
@largeTensorTest('25GB', device='cpu')
|
||||
@largeTensorTest('4GB', device='cuda')
|
||||
@unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.")
|
||||
def test_large_cumprod(self, device, dtype):
|
||||
# initialization to avoid overflow and half caveats
|
||||
x = torch.empty(2**30 + 200, device=device, dtype=dtype)
|
||||
|
|
|
|||
|
|
@ -124,6 +124,18 @@ if os.getenv("DISABLED_TESTS_FILE", ""):
|
|||
|
||||
NATIVE_DEVICES = ('cpu', 'cuda', 'meta')
|
||||
|
||||
check_names = ['orin', 'concord', 'galen', 'xavier', 'nano', 'jetson', 'tegra']
|
||||
IS_JETSON = any(name in platform.platform() for name in check_names)
|
||||
|
||||
def gcIfJetson(fn):
|
||||
# Irregular Jetson host/device memory setup requires cleanup to avoid tests being killed
|
||||
@functools.wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
if IS_JETSON:
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
fn(*args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
class _TestParametrizer:
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user