mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[BE][2/6] fix typos in test/ (test/test_*.py) (#157636)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/157636 Approved by: https://github.com/yewentao256, https://github.com/mlazos ghstack dependencies: #156311, #156609
This commit is contained in:
parent
ffe11b2bf2
commit
fc0376e8b1
|
|
@ -1167,7 +1167,6 @@ exclude_patterns = [
|
||||||
'aten/src/ATen/native/[a-pA-P]*/**',
|
'aten/src/ATen/native/[a-pA-P]*/**',
|
||||||
'aten/src/ATen/[a-mA-M]*/**',
|
'aten/src/ATen/[a-mA-M]*/**',
|
||||||
'test/**',
|
'test/**',
|
||||||
'test/test_*',
|
|
||||||
'test/[a-hA-h]*/**',
|
'test/[a-hA-h]*/**',
|
||||||
'test/distributed/**',
|
'test/distributed/**',
|
||||||
'torch/_*/**',
|
'torch/_*/**',
|
||||||
|
|
|
||||||
|
|
@ -4129,7 +4129,7 @@ class TestAutograd(TestCase):
|
||||||
self.assertIsNone(y.grad_fn)
|
self.assertIsNone(y.grad_fn)
|
||||||
|
|
||||||
def test_backward_copy(self):
|
def test_backward_copy(self):
|
||||||
# This tests checks backward engine for a very subtle bug that appreared
|
# This tests checks backward engine for a very subtle bug that appeared
|
||||||
# in one of the initial versions of autograd. Gradients tensors were
|
# in one of the initial versions of autograd. Gradients tensors were
|
||||||
# simply stored in lists while the function waited for all its gradients
|
# simply stored in lists while the function waited for all its gradients
|
||||||
# to be computed. However, sometimes an output was used multiple times,
|
# to be computed. However, sometimes an output was used multiple times,
|
||||||
|
|
@ -4312,7 +4312,7 @@ class TestAutograd(TestCase):
|
||||||
ctx.output_var.sum().backward()
|
ctx.output_var.sum().backward()
|
||||||
return ctx.x.grad * grad_output
|
return ctx.x.grad * grad_output
|
||||||
|
|
||||||
# Reentrant starts on CPU thread, finishs on GPU thread
|
# Reentrant starts on CPU thread, finishes on GPU thread
|
||||||
x = torch.randn(2, 2, requires_grad=True)
|
x = torch.randn(2, 2, requires_grad=True)
|
||||||
out = Reenter.apply(x)
|
out = Reenter.apply(x)
|
||||||
out.sum().backward()
|
out.sum().backward()
|
||||||
|
|
@ -10728,7 +10728,7 @@ class TestAutogradForwardMode(TestCase):
|
||||||
dual = fwAD.make_dual(foo, tangent)
|
dual = fwAD.make_dual(foo, tangent)
|
||||||
self.assertFalse(tangent_ref.expired())
|
self.assertFalse(tangent_ref.expired())
|
||||||
|
|
||||||
# Make sure that the tangent we provided has been re-used as is
|
# Make sure that the tangent we provided has been reused as is
|
||||||
self.assertTrue(fwAD.unpack_dual(dual)[1] is tangent)
|
self.assertTrue(fwAD.unpack_dual(dual)[1] is tangent)
|
||||||
|
|
||||||
# Make sure that dual is keeping the tangent alive
|
# Make sure that dual is keeping the tangent alive
|
||||||
|
|
@ -11087,7 +11087,7 @@ class TestAutogradForwardMode(TestCase):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
dual_tangent.storage().data_ptr(), bar.storage().data_ptr()
|
dual_tangent.storage().data_ptr(), bar.storage().data_ptr()
|
||||||
)
|
)
|
||||||
# And the tangent is actually re-used as-is so it is still the same Tensor
|
# And the tangent is actually reused as-is so it is still the same Tensor
|
||||||
self.assertIs(dual_tangent, bar)
|
self.assertIs(dual_tangent, bar)
|
||||||
|
|
||||||
# Ensure we properly share the version counter
|
# Ensure we properly share the version counter
|
||||||
|
|
@ -11969,19 +11969,19 @@ class TestAutogradDeviceType(TestCase):
|
||||||
(new_param**2).sum().backward()
|
(new_param**2).sum().backward()
|
||||||
return grad_output
|
return grad_output
|
||||||
|
|
||||||
# Reentrant starts on GPU thread, finishs on GPU thread
|
# Reentrant starts on GPU thread, finishes on GPU thread
|
||||||
x = torch.randn(2, 2, device=device, requires_grad=True)
|
x = torch.randn(2, 2, device=device, requires_grad=True)
|
||||||
out = ReentrantFunc.apply(x)
|
out = ReentrantFunc.apply(x)
|
||||||
out.sum().backward()
|
out.sum().backward()
|
||||||
|
|
||||||
# Reentrant starts on CPU thread, finishs on GPU thread
|
# Reentrant starts on CPU thread, finishes on GPU thread
|
||||||
x = torch.randn(2, 2, requires_grad=True)
|
x = torch.randn(2, 2, requires_grad=True)
|
||||||
# set ReentrantFunc node to GPU to emit tasks to GPU queue
|
# set ReentrantFunc node to GPU to emit tasks to GPU queue
|
||||||
ReentrantFunc._cpu_mode = False
|
ReentrantFunc._cpu_mode = False
|
||||||
out = ReentrantFunc.apply(x)
|
out = ReentrantFunc.apply(x)
|
||||||
out.sum().backward()
|
out.sum().backward()
|
||||||
|
|
||||||
# Reentrant starts on GPU thread, finishs on CPU thread
|
# Reentrant starts on GPU thread, finishes on CPU thread
|
||||||
x = torch.randn(2, 2, device=device, requires_grad=True)
|
x = torch.randn(2, 2, device=device, requires_grad=True)
|
||||||
# set ReentrantFunc node to CPU to emit tasks to CPU queue
|
# set ReentrantFunc node to CPU to emit tasks to CPU queue
|
||||||
ReentrantFunc._cpu_mode = True
|
ReentrantFunc._cpu_mode = True
|
||||||
|
|
@ -13665,7 +13665,7 @@ class TestMultithreadAutograd(TestCase):
|
||||||
y = x * x
|
y = x * x
|
||||||
if torch.cuda.device_count() >= 2:
|
if torch.cuda.device_count() >= 2:
|
||||||
# DataParallel is calling the forward in different threads
|
# DataParallel is calling the forward in different threads
|
||||||
# without progating TLS, so hooks should not be called here
|
# without propagating TLS, so hooks should not be called here
|
||||||
_self.assertEqual(len(w), 0)
|
_self.assertEqual(len(w), 0)
|
||||||
else:
|
else:
|
||||||
# DataParallel only uses one thread
|
# DataParallel only uses one thread
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ if TEST_SCIPY:
|
||||||
class TestBinaryUfuncs(TestCase):
|
class TestBinaryUfuncs(TestCase):
|
||||||
# Generic tests for elementwise binary (AKA binary universal (u) functions (funcs))
|
# Generic tests for elementwise binary (AKA binary universal (u) functions (funcs))
|
||||||
# TODO: below contiguous tensor results are compared with a variety of noncontiguous results.
|
# TODO: below contiguous tensor results are compared with a variety of noncontiguous results.
|
||||||
# It would be interesting to have the lhs and rhs have different discontiguities.
|
# It would be interesting to have the lhs and rhs have different discontinuities.
|
||||||
|
|
||||||
# Helper for comparing torch tensors and NumPy arrays
|
# Helper for comparing torch tensors and NumPy arrays
|
||||||
# TODO: should this or assertEqual also validate that strides are equal?
|
# TODO: should this or assertEqual also validate that strides are equal?
|
||||||
|
|
@ -2521,7 +2521,7 @@ class TestBinaryUfuncs(TestCase):
|
||||||
# Verify Value
|
# Verify Value
|
||||||
self.assertEqual(torch_result, expected)
|
self.assertEqual(torch_result, expected)
|
||||||
# Verify Sign
|
# Verify Sign
|
||||||
# Use double copysign to verify the correctnes of 0.0 and -0.0, since
|
# Use double copysign to verify the correctness of 0.0 and -0.0, since
|
||||||
# it always True for self.assertEqual(0.0 == -0.0). So, we use 1 as the
|
# it always True for self.assertEqual(0.0 == -0.0). So, we use 1 as the
|
||||||
# magnitude to verify the sign between torch and numpy results, elementwise.
|
# magnitude to verify the sign between torch and numpy results, elementwise.
|
||||||
# Special case: NaN conversions between FP32 and FP16 is not bitwise
|
# Special case: NaN conversions between FP32 and FP16 is not bitwise
|
||||||
|
|
|
||||||
|
|
@ -1031,7 +1031,7 @@ class TestCppExtensionJIT(common.TestCase):
|
||||||
t = torch.rand(2).double()
|
t = torch.rand(2).double()
|
||||||
cpp_tensor_name = r"CPUDoubleType"
|
cpp_tensor_name = r"CPUDoubleType"
|
||||||
|
|
||||||
# Without error handling, the warnings cannot be catched
|
# Without error handling, the warnings cannot be caught
|
||||||
warn_mod = torch.utils.cpp_extension.load_inline(
|
warn_mod = torch.utils.cpp_extension.load_inline(
|
||||||
name="warn_mod",
|
name="warn_mod",
|
||||||
cpp_sources=[source],
|
cpp_sources=[source],
|
||||||
|
|
@ -1065,23 +1065,23 @@ class TestCppExtensionJIT(common.TestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
# Catched with no error should be detected
|
# Caught with no error should be detected
|
||||||
warn_mod.foo(t, 0)
|
warn_mod.foo(t, 0)
|
||||||
self.assertEqual(len(w), 1)
|
self.assertEqual(len(w), 1)
|
||||||
|
|
||||||
# Catched with cpp error should also be detected
|
# Caught with cpp error should also be detected
|
||||||
with self.assertRaisesRegex(TypeError, t.type()):
|
with self.assertRaisesRegex(TypeError, t.type()):
|
||||||
warn_mod.foo(t, 1)
|
warn_mod.foo(t, 1)
|
||||||
self.assertEqual(len(w), 2)
|
self.assertEqual(len(w), 2)
|
||||||
|
|
||||||
# Catched with python error should also be detected
|
# Caught with python error should also be detected
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
SystemError, "bad argument to internal function"
|
SystemError, "bad argument to internal function"
|
||||||
):
|
):
|
||||||
warn_mod.foo(t, 2)
|
warn_mod.foo(t, 2)
|
||||||
self.assertEqual(len(w), 3)
|
self.assertEqual(len(w), 3)
|
||||||
|
|
||||||
# Catched with pybind error should also be detected
|
# Caught with pybind error should also be detected
|
||||||
# Note that there is no type name translation for pybind errors
|
# Note that there is no type name translation for pybind errors
|
||||||
with self.assertRaisesRegex(KeyError, cpp_tensor_name):
|
with self.assertRaisesRegex(KeyError, cpp_tensor_name):
|
||||||
warn_mod.foo(t, 3)
|
warn_mod.foo(t, 3)
|
||||||
|
|
|
||||||
|
|
@ -795,7 +795,7 @@ print(t.is_pinned())
|
||||||
os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"]
|
os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"]
|
||||||
)
|
)
|
||||||
# this is really just checking that the environment variable is respected during testing
|
# this is really just checking that the environment variable is respected during testing
|
||||||
# and not overwritten by another function that doesn't revert it to the intitial value
|
# and not overwritten by another function that doesn't revert it to the initial value
|
||||||
if not skip_tf32_cublas:
|
if not skip_tf32_cublas:
|
||||||
self.assertFalse(torch.backends.cuda.matmul.allow_tf32)
|
self.assertFalse(torch.backends.cuda.matmul.allow_tf32)
|
||||||
self.assertEqual(torch.get_float32_matmul_precision(), "highest")
|
self.assertEqual(torch.get_float32_matmul_precision(), "highest")
|
||||||
|
|
@ -1143,7 +1143,7 @@ print(t.is_pinned())
|
||||||
tmp2 = torch.cuda.FloatTensor(t.size())
|
tmp2 = torch.cuda.FloatTensor(t.size())
|
||||||
tmp2.zero_()
|
tmp2.zero_()
|
||||||
self.assertNotEqual(
|
self.assertNotEqual(
|
||||||
tmp2.data_ptr(), ptr[0], msg="allocation re-used to soon"
|
tmp2.data_ptr(), ptr[0], msg="allocation reused to soon"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(result.tolist(), [1, 2, 3, 4])
|
self.assertEqual(result.tolist(), [1, 2, 3, 4])
|
||||||
|
|
@ -1154,7 +1154,7 @@ print(t.is_pinned())
|
||||||
torch.cuda.current_stream().synchronize()
|
torch.cuda.current_stream().synchronize()
|
||||||
with torch.cuda.stream(stream):
|
with torch.cuda.stream(stream):
|
||||||
tmp3 = torch.cuda.FloatTensor(t.size())
|
tmp3 = torch.cuda.FloatTensor(t.size())
|
||||||
self.assertEqual(tmp3.data_ptr(), ptr[0], msg="allocation not re-used")
|
self.assertEqual(tmp3.data_ptr(), ptr[0], msg="allocation not reused")
|
||||||
|
|
||||||
def test_record_stream_on_shifted_view(self):
|
def test_record_stream_on_shifted_view(self):
|
||||||
# See issue #27366
|
# See issue #27366
|
||||||
|
|
@ -1235,20 +1235,20 @@ print(t.is_pinned())
|
||||||
def test_caching_pinned_memory(self):
|
def test_caching_pinned_memory(self):
|
||||||
cycles_per_ms = get_cycles_per_ms()
|
cycles_per_ms = get_cycles_per_ms()
|
||||||
|
|
||||||
# check that allocations are re-used after deletion
|
# check that allocations are reused after deletion
|
||||||
t = torch.FloatTensor([1]).pin_memory()
|
t = torch.FloatTensor([1]).pin_memory()
|
||||||
ptr = t.data_ptr()
|
ptr = t.data_ptr()
|
||||||
del t
|
del t
|
||||||
t = torch.FloatTensor([1]).pin_memory()
|
t = torch.FloatTensor([1]).pin_memory()
|
||||||
self.assertEqual(t.data_ptr(), ptr, msg="allocation not reused")
|
self.assertEqual(t.data_ptr(), ptr, msg="allocation not reused")
|
||||||
|
|
||||||
# check that the allocation is not re-used if it's in-use by a copy
|
# check that the allocation is not reused if it's in-use by a copy
|
||||||
gpu_tensor = torch.cuda.FloatTensor([0])
|
gpu_tensor = torch.cuda.FloatTensor([0])
|
||||||
torch.cuda._sleep(int(1000 * cycles_per_ms)) # delay the copy by 1s
|
torch.cuda._sleep(int(1000 * cycles_per_ms)) # delay the copy by 1s
|
||||||
gpu_tensor.copy_(t, non_blocking=True)
|
gpu_tensor.copy_(t, non_blocking=True)
|
||||||
del t
|
del t
|
||||||
t = torch.FloatTensor([1]).pin_memory()
|
t = torch.FloatTensor([1]).pin_memory()
|
||||||
self.assertNotEqual(t.data_ptr(), ptr, msg="allocation re-used too soon")
|
self.assertNotEqual(t.data_ptr(), ptr, msg="allocation reused too soon")
|
||||||
self.assertEqual(list(gpu_tensor), [1])
|
self.assertEqual(list(gpu_tensor), [1])
|
||||||
|
|
||||||
def test_caching_allocator_record_stream_oom(self):
|
def test_caching_allocator_record_stream_oom(self):
|
||||||
|
|
@ -1263,7 +1263,7 @@ print(t.is_pinned())
|
||||||
x = torch.empty(40 * 1024 * 1024, device="cuda")
|
x = torch.empty(40 * 1024 * 1024, device="cuda")
|
||||||
with torch.cuda.stream(stream):
|
with torch.cuda.stream(stream):
|
||||||
y += x
|
y += x
|
||||||
# delays re-use of `x` until after all operations in `stream`
|
# delays reuse of `x` until after all operations in `stream`
|
||||||
x.record_stream(stream)
|
x.record_stream(stream)
|
||||||
del x
|
del x
|
||||||
|
|
||||||
|
|
@ -2970,7 +2970,7 @@ exit(2)
|
||||||
current = postcapture_stats[stat] - precapture_stats[stat]
|
current = postcapture_stats[stat] - precapture_stats[stat]
|
||||||
|
|
||||||
# There will only ever be one expandable segment in each of the small and large pools. The way the
|
# There will only ever be one expandable segment in each of the small and large pools. The way the
|
||||||
# bookeeping is done in the allocator means that we never increment the number of segments.
|
# bookkeeping is done in the allocator means that we never increment the number of segments.
|
||||||
if self.expandable_segments and "segment" in stat:
|
if self.expandable_segments and "segment" in stat:
|
||||||
expected = 0
|
expected = 0
|
||||||
# These two cases hit an edge case where the PyTorch allocator won't immediately unmap part of an
|
# These two cases hit an edge case where the PyTorch allocator won't immediately unmap part of an
|
||||||
|
|
@ -3011,7 +3011,7 @@ exit(2)
|
||||||
current = postdel_stats[stat] - precapture_stats[stat]
|
current = postdel_stats[stat] - precapture_stats[stat]
|
||||||
|
|
||||||
# There will only ever be one expandable segment in each of the small and large pools. The way the
|
# There will only ever be one expandable segment in each of the small and large pools. The way the
|
||||||
# bookeeping is done in the allocator means that we never increment the number of segments.
|
# bookkeeping is done in the allocator means that we never increment the number of segments.
|
||||||
if self.expandable_segments and "segment" in stat:
|
if self.expandable_segments and "segment" in stat:
|
||||||
expected = 0
|
expected = 0
|
||||||
# These two cases hit an edge case where the PyTorch allocator won't immediately unmap part of an
|
# These two cases hit an edge case where the PyTorch allocator won't immediately unmap part of an
|
||||||
|
|
@ -3648,7 +3648,7 @@ exit(2)
|
||||||
graph.replay()
|
graph.replay()
|
||||||
self.assertTrue(torch.all(x == 3.0))
|
self.assertTrue(torch.all(x == 3.0))
|
||||||
|
|
||||||
# Check that graph capture can succeed after reseting.
|
# Check that graph capture can succeed after resetting.
|
||||||
graph.reset()
|
graph.reset()
|
||||||
|
|
||||||
# Don't do x[:] = 0.0 because we want to capture a new address
|
# Don't do x[:] = 0.0 because we want to capture a new address
|
||||||
|
|
@ -5382,7 +5382,7 @@ class TestMemPool(TestCase):
|
||||||
out_2 = torch.randn(nelem_1mb, device="cuda")
|
out_2 = torch.randn(nelem_1mb, device="cuda")
|
||||||
|
|
||||||
# pool now should have 2 segments since the CUDACachingAllocator had
|
# pool now should have 2 segments since the CUDACachingAllocator had
|
||||||
# to make a new 2 MB buffer to accomodate out_2
|
# to make a new 2 MB buffer to accommodate out_2
|
||||||
self.assertEqual(len(pool.snapshot()), 2)
|
self.assertEqual(len(pool.snapshot()), 2)
|
||||||
|
|
||||||
self.assertEqual(len(pool.snapshot()), 2)
|
self.assertEqual(len(pool.snapshot()), 2)
|
||||||
|
|
|
||||||
|
|
@ -967,7 +967,7 @@ class TestCudaMultiGPU(TestCase):
|
||||||
|
|
||||||
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
|
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
|
||||||
def test_caching_pinned_memory_multi_gpu(self):
|
def test_caching_pinned_memory_multi_gpu(self):
|
||||||
# checks that the events preventing pinned memory from being re-used
|
# checks that the events preventing pinned memory from being reused
|
||||||
# too early are recorded on the correct GPU
|
# too early are recorded on the correct GPU
|
||||||
cycles_per_ms = get_cycles_per_ms()
|
cycles_per_ms = get_cycles_per_ms()
|
||||||
|
|
||||||
|
|
@ -982,7 +982,7 @@ class TestCudaMultiGPU(TestCase):
|
||||||
|
|
||||||
del t
|
del t
|
||||||
t = torch.FloatTensor([2]).pin_memory()
|
t = torch.FloatTensor([2]).pin_memory()
|
||||||
self.assertNotEqual(t.data_ptr(), ptr, msg="allocation re-used too soon")
|
self.assertNotEqual(t.data_ptr(), ptr, msg="allocation reused too soon")
|
||||||
|
|
||||||
with torch.cuda.device(0):
|
with torch.cuda.device(0):
|
||||||
gpu_tensor0.copy_(t, non_blocking=True)
|
gpu_tensor0.copy_(t, non_blocking=True)
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ class TestVisibleDeviceParses(TestCase):
|
||||||
_transform_uuid_to_ordinals(["GPU-9e8d35e3", "GPU-123", "GPU-47"], uuids),
|
_transform_uuid_to_ordinals(["GPU-9e8d35e3", "GPU-123", "GPU-47"], uuids),
|
||||||
[1],
|
[1],
|
||||||
)
|
)
|
||||||
# First ambigous UUID aborts parsing
|
# First ambiguous UUID aborts parsing
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
_transform_uuid_to_ordinals(["GPU-9e8d35e3", "GPU-e", "GPU-47"], uuids), [1]
|
_transform_uuid_to_ordinals(["GPU-9e8d35e3", "GPU-e", "GPU-47"], uuids), [1]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ class TestCudaPrimaryCtx(TestCase):
|
||||||
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
|
||||||
torch.cuda.set_device(0)
|
torch.cuda.set_device(0)
|
||||||
if _get_torch_cuda_version() >= (12, 0):
|
if _get_torch_cuda_version() >= (12, 0):
|
||||||
# Now after the device was set, the contex should present in CUDA 12.
|
# Now after the device was set, the context should present in CUDA 12.
|
||||||
self.assertTrue(torch._C._cuda_hasPrimaryContext(0))
|
self.assertTrue(torch._C._cuda_hasPrimaryContext(0))
|
||||||
else:
|
else:
|
||||||
# In CUDA 11 the context should not be created.
|
# In CUDA 11 the context should not be created.
|
||||||
|
|
|
||||||
|
|
@ -630,7 +630,7 @@ def _(x):
|
||||||
g(x)
|
g(x)
|
||||||
|
|
||||||
def test_invalid_schemas(self):
|
def test_invalid_schemas(self):
|
||||||
# function schmea validation goes through torchgen, so this is just a
|
# function schema validation goes through torchgen, so this is just a
|
||||||
# basic test.
|
# basic test.
|
||||||
with self.assertRaisesRegex(AssertionError, "Invalid function schema: foo"):
|
with self.assertRaisesRegex(AssertionError, "Invalid function schema: foo"):
|
||||||
custom_ops.custom_op(f"{TestCustomOp.test_ns}::foo", "(")
|
custom_ops.custom_op(f"{TestCustomOp.test_ns}::foo", "(")
|
||||||
|
|
@ -2712,7 +2712,7 @@ class TestCustomOpAPI(TestCase):
|
||||||
self.assertEqual(ctx.needs_input_grad, expected)
|
self.assertEqual(ctx.needs_input_grad, expected)
|
||||||
return list(grad.unbind(0))
|
return list(grad.unbind(0))
|
||||||
|
|
||||||
# call two applys, do a backward on the first
|
# call two applies, do a backward on the first
|
||||||
def t():
|
def t():
|
||||||
return torch.randn([], requires_grad=True)
|
return torch.randn([], requires_grad=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -734,12 +734,12 @@ class SleepDataset(Dataset):
|
||||||
def __init__(self, size, sleep_sec):
|
def __init__(self, size, sleep_sec):
|
||||||
self.size = size
|
self.size = size
|
||||||
self.sleep_sec = sleep_sec
|
self.sleep_sec = sleep_sec
|
||||||
self.sleeped = False
|
self.slept = False
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
if not self.sleeped:
|
if not self.slept:
|
||||||
time.sleep(self.sleep_sec)
|
time.sleep(self.sleep_sec)
|
||||||
self.sleeped = True
|
self.slept = True
|
||||||
return idx
|
return idx
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
|
|
||||||
|
|
@ -573,7 +573,7 @@ class TestCaptureDataFrame(TestCase):
|
||||||
|
|
||||||
class TestDataFramesPipes(TestCase):
|
class TestDataFramesPipes(TestCase):
|
||||||
"""
|
"""
|
||||||
Most of test will fail if pandas instaled, but no dill available.
|
Most of test will fail if pandas installed, but no dill available.
|
||||||
Need to rework them to avoid multiple skips.
|
Need to rework them to avoid multiple skips.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -1887,7 +1887,7 @@ class TestFunctionalIterDataPipe(TestCase):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
list(filter_dp)
|
list(filter_dp)
|
||||||
|
|
||||||
# Funtional Test: Specify input_col
|
# Functional Test: Specify input_col
|
||||||
tuple_input_ds = dp.iter.IterableWrapper([(d - 1, d, d + 1) for d in range(10)])
|
tuple_input_ds = dp.iter.IterableWrapper([(d - 1, d, d + 1) for d in range(10)])
|
||||||
|
|
||||||
# Single input_col
|
# Single input_col
|
||||||
|
|
@ -3356,7 +3356,7 @@ class TestSharding(TestCase):
|
||||||
with self.assertRaises(Exception):
|
with self.assertRaises(Exception):
|
||||||
dp.apply_sharding(2, 1, sharding_group=SHARDING_PRIORITIES.DEFAULT)
|
dp.apply_sharding(2, 1, sharding_group=SHARDING_PRIORITIES.DEFAULT)
|
||||||
|
|
||||||
# Test tud.datapipes.iter.grouping.SHARDING_PRIORITIES for backward compatbility
|
# Test tud.datapipes.iter.grouping.SHARDING_PRIORITIES for backward compatibility
|
||||||
# TODO: Remove this test once tud.datapipes.iter.grouping.SHARDING_PRIORITIES is deprecated
|
# TODO: Remove this test once tud.datapipes.iter.grouping.SHARDING_PRIORITIES is deprecated
|
||||||
def test_sharding_groups_in_legacy_grouping_package(self):
|
def test_sharding_groups_in_legacy_grouping_package(self):
|
||||||
with self.assertWarnsRegex(
|
with self.assertWarnsRegex(
|
||||||
|
|
|
||||||
|
|
@ -854,7 +854,7 @@ def forward(self, scores_1, mask_1, value_1):
|
||||||
# de-functionalise the graph, as that would break AoTAutograd
|
# de-functionalise the graph, as that would break AoTAutograd
|
||||||
# We run the real function *after* the decomposition to make sure that the
|
# We run the real function *after* the decomposition to make sure that the
|
||||||
# decomposition does not modify any of the inputs in-place. If it does
|
# decomposition does not modify any of the inputs in-place. If it does
|
||||||
# real_out should be differen than decom_out so we should catch this
|
# real_out should be different than decom_out so we should catch this
|
||||||
real_out_unflat = func(*args, **kwargs)
|
real_out_unflat = func(*args, **kwargs)
|
||||||
real_out = pytree.tree_leaves(real_out_unflat)
|
real_out = pytree.tree_leaves(real_out_unflat)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3286,7 +3286,7 @@ def forward(self, arg0_1: "i64[1][1]cpu", arg1_1: "Sym(u1)", arg2_1: "i64[u1][1]
|
||||||
def test_unbacked_reshape2(self):
|
def test_unbacked_reshape2(self):
|
||||||
cnt = CompileCounterWithBackend("inductor")
|
cnt = CompileCounterWithBackend("inductor")
|
||||||
|
|
||||||
# This reshape requires a clone when the input is not contiguous and we cant compute strides.
|
# This reshape requires a clone when the input is not contiguous and we can't compute strides.
|
||||||
# reshape (u2, u3) -> (u0, u1)
|
# reshape (u2, u3) -> (u0, u1)
|
||||||
def func(x, y):
|
def func(x, y):
|
||||||
u0, u1 = y.tolist()
|
u0, u1 = y.tolist()
|
||||||
|
|
@ -3421,7 +3421,7 @@ def forward(self, arg0_1: "i64[2][1]cpu", arg1_1: "Sym(u2)", arg2_1: "Sym(u3)",
|
||||||
def test_invalid_view_unbacked_view(self):
|
def test_invalid_view_unbacked_view(self):
|
||||||
cnt = CompileCounterWithBackend("inductor")
|
cnt = CompileCounterWithBackend("inductor")
|
||||||
|
|
||||||
# This view (u2, u3) -> (u0, u1) cant happen in general unless we know that input is contigous or we have
|
# This view (u2, u3) -> (u0, u1) can't happen in general unless we know that input is contiguous or we have
|
||||||
# hints to to compute strides.
|
# hints to to compute strides.
|
||||||
def func(x, y):
|
def func(x, y):
|
||||||
u0, u1 = y.tolist()
|
u0, u1 = y.tolist()
|
||||||
|
|
@ -3452,7 +3452,7 @@ def forward(self, arg0_1: "i64[2][1]cpu", arg1_1: "Sym(u2)", arg2_1: "Sym(u3)",
|
||||||
|
|
||||||
func(torch.ones(5, 6, 7, 8))
|
func(torch.ones(5, 6, 7, 8))
|
||||||
self.assertEqual(cnt.frame_count, 1)
|
self.assertEqual(cnt.frame_count, 1)
|
||||||
# it can be dynamic in all dimentions except dim=2
|
# it can be dynamic in all dimensions except dim=2
|
||||||
func(torch.ones(4, 9, 7, 10))
|
func(torch.ones(4, 9, 7, 10))
|
||||||
self.assertEqual(cnt.frame_count, 1)
|
self.assertEqual(cnt.frame_count, 1)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@ class FakeTensorTest(TestCase):
|
||||||
|
|
||||||
@unittest.skipIf(not RUN_CUDA, "requires cuda")
|
@unittest.skipIf(not RUN_CUDA, "requires cuda")
|
||||||
def test_cuda_initialized(self):
|
def test_cuda_initialized(self):
|
||||||
# doesnt error
|
# doesn't error
|
||||||
with FakeTensorMode():
|
with FakeTensorMode():
|
||||||
p = torch.randn(4, 2, requires_grad=True, device="cuda")
|
p = torch.randn(4, 2, requires_grad=True, device="cuda")
|
||||||
x = torch.randn(8, 4, device="cuda")
|
x = torch.randn(8, 4, device="cuda")
|
||||||
|
|
@ -1471,7 +1471,7 @@ class FakeTensorOperatorInvariants(TestCase):
|
||||||
with torch._subclasses.CrossRefFakeMode():
|
with torch._subclasses.CrossRefFakeMode():
|
||||||
Repro()(*args)
|
Repro()(*args)
|
||||||
except MetadataMismatchError as e:
|
except MetadataMismatchError as e:
|
||||||
# We expect the cross ref to succed for the first output to fail
|
# We expect the cross ref to succeed for the first output to fail
|
||||||
# for the rng state, see Note [Seed and Offset]
|
# for the rng state, see Note [Seed and Offset]
|
||||||
self.assertTrue("output[0]" not in str(e))
|
self.assertTrue("output[0]" not in str(e))
|
||||||
if self.__class__.__name__.startswith("PropagateRealTensors"):
|
if self.__class__.__name__.startswith("PropagateRealTensors"):
|
||||||
|
|
@ -2327,7 +2327,7 @@ class FakeTensorDispatchCache(TestCase):
|
||||||
self.assertEqual(len(backend.fw_graphs), 1)
|
self.assertEqual(len(backend.fw_graphs), 1)
|
||||||
mod = backend.fw_graphs[0]
|
mod = backend.fw_graphs[0]
|
||||||
|
|
||||||
# Ensure that we see hits everytime
|
# Ensure that we see hits every time
|
||||||
with FakeTensorMode():
|
with FakeTensorMode():
|
||||||
x = torch.randn(6, 4)
|
x = torch.randn(6, 4)
|
||||||
y = torch.randn(6, 4)
|
y = torch.randn(6, 4)
|
||||||
|
|
|
||||||
|
|
@ -199,7 +199,7 @@ class TestFunctionalization(TestCase):
|
||||||
y.set_(x.storage())
|
y.set_(x.storage())
|
||||||
return y
|
return y
|
||||||
|
|
||||||
# We should probaby get the crossref test to work,
|
# We should probably get the crossref test to work,
|
||||||
# but fixing it for Storage() objects is annoying.
|
# but fixing it for Storage() objects is annoying.
|
||||||
r = _functionalize(f, reapply_views=True, crossref=False)(torch.ones(2))
|
r = _functionalize(f, reapply_views=True, crossref=False)(torch.ones(2))
|
||||||
self.assertEqual(str(r.device), "cpu")
|
self.assertEqual(str(r.device), "cpu")
|
||||||
|
|
@ -2318,7 +2318,7 @@ def forward(self, arg0_1):
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@unittest.skipIf(
|
@unittest.skipIf(
|
||||||
TEST_WITH_TORCHDYNAMO, "dynamo-ing code with proxy + fake doesnt work well"
|
TEST_WITH_TORCHDYNAMO, "dynamo-ing code with proxy + fake doesn't work well"
|
||||||
)
|
)
|
||||||
class TestCrossRefFunctionalization(TestFunctionalization):
|
class TestCrossRefFunctionalization(TestFunctionalization):
|
||||||
crossref = True
|
crossref = True
|
||||||
|
|
|
||||||
|
|
@ -302,7 +302,7 @@ class TestFunctionalizationRngOps(TestCase):
|
||||||
fwd_compiler = functools.partial(count_philox_rand, freq=1)
|
fwd_compiler = functools.partial(count_philox_rand, freq=1)
|
||||||
bwd_compiler = functools.partial(count_philox_rand, freq=0)
|
bwd_compiler = functools.partial(count_philox_rand, freq=0)
|
||||||
aot_fn = aot_function(fn, fwd_compiler, bwd_compiler)
|
aot_fn = aot_function(fn, fwd_compiler, bwd_compiler)
|
||||||
# We cant check accuracy here because rand_like generated different rand numbers than dropout
|
# We can't check accuracy here because rand_like generated different rand numbers than dropout
|
||||||
res = aot_fn(x, y)
|
res = aot_fn(x, y)
|
||||||
res.sum().backward()
|
res.sum().backward()
|
||||||
|
|
||||||
|
|
@ -316,7 +316,7 @@ class TestFunctionalizationRngOps(TestCase):
|
||||||
|
|
||||||
# Ensure the decomp is happening
|
# Ensure the decomp is happening
|
||||||
aot_fn = aot_function(fn, functools.partial(count_philox_rand, freq=1))
|
aot_fn = aot_function(fn, functools.partial(count_philox_rand, freq=1))
|
||||||
# We cant check accuracy here because rand_like generated different rand numbers than dropout
|
# We can't check accuracy here because rand_like generated different rand numbers than dropout
|
||||||
aot_fn(x)
|
aot_fn(x)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -908,7 +908,7 @@ class TestFX(JitTestCase):
|
||||||
wrapper = WrapperModule(interpreter)
|
wrapper = WrapperModule(interpreter)
|
||||||
|
|
||||||
# Create a graph that: 1) Takes function arguments 2) Invokes the interpreter
|
# Create a graph that: 1) Takes function arguments 2) Invokes the interpreter
|
||||||
# 3) Returns the speficied return value
|
# 3) Returns the specified return value
|
||||||
|
|
||||||
# FIXME: The following code could be greatly simplified by symbolic_trace'ing
|
# FIXME: The following code could be greatly simplified by symbolic_trace'ing
|
||||||
# the wrapper with a Tracer that considers the Wrapper instance a root
|
# the wrapper with a Tracer that considers the Wrapper instance a root
|
||||||
|
|
@ -2225,8 +2225,8 @@ class TestFX(JitTestCase):
|
||||||
foo_scripted = torch.jit.script(Foo())
|
foo_scripted = torch.jit.script(Foo())
|
||||||
foo_scripted(Pair(torch.rand(5), torch.rand(5)), torch.rand(5), 3)
|
foo_scripted(Pair(torch.rand(5), torch.rand(5)), torch.rand(5), 3)
|
||||||
|
|
||||||
fxed = symbolic_trace(Foo())
|
fixed = symbolic_trace(Foo())
|
||||||
fxed_scripted = torch.jit.script(fxed)
|
fxed_scripted = torch.jit.script(fixed)
|
||||||
fxed_scripted(Pair(torch.rand(5), torch.rand(5)), torch.rand(5), 3)
|
fxed_scripted(Pair(torch.rand(5), torch.rand(5)), torch.rand(5), 3)
|
||||||
|
|
||||||
def test_fn_type_annotation_empty(self):
|
def test_fn_type_annotation_empty(self):
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,7 @@ class TestPartitionFunctions:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def forward6(a, b, c):
|
def forward6(a, b, c):
|
||||||
# add should have its own partition, as neither branchs are supported
|
# add should have its own partition, as neither branches are supported
|
||||||
add = a + 1
|
add = a + 1
|
||||||
# left branch
|
# left branch
|
||||||
relu = add.relu()
|
relu = add.relu()
|
||||||
|
|
@ -283,7 +283,7 @@ class TestFXGraphPasses(JitTestCase):
|
||||||
(TestPartitionFunctions.forward15, [['add_1', 'add', 'permute_1', 'view', 'permute_2', 'permute_3', 'permute']], False),
|
(TestPartitionFunctions.forward15, [['add_1', 'add', 'permute_1', 'view', 'permute_2', 'permute_3', 'permute']], False),
|
||||||
(TestPartitionFunctions.forward16, [["permute_1", "add_1", "add"]], True),
|
(TestPartitionFunctions.forward16, [["permute_1", "add_1", "add"]], True),
|
||||||
(TestPartitionFunctions.forward16, [['add_1', 'add', 'permute_1', 'view', 'permute_2', 'permute_3', 'permute']], False),
|
(TestPartitionFunctions.forward16, [['add_1', 'add', 'permute_1', 'view', 'permute_2', 'permute_3', 'permute']], False),
|
||||||
# should be empty partition, not a partiton with empty nodes
|
# should be empty partition, not a partition with empty nodes
|
||||||
(TestPartitionFunctions.forward18, [], False),
|
(TestPartitionFunctions.forward18, [], False),
|
||||||
])
|
])
|
||||||
def test_partitioner(self, fn, expected_partition, bookend_non_compute_pass):
|
def test_partitioner(self, fn, expected_partition, bookend_non_compute_pass):
|
||||||
|
|
@ -344,9 +344,9 @@ class TestFXGraphPasses(JitTestCase):
|
||||||
[['add', 'add_1', 'add_2']], # vertical fusion
|
[['add', 'add_1', 'add_2']], # vertical fusion
|
||||||
[['add_2', 'add_3']], # horizontal fusion
|
[['add_2', 'add_3']], # horizontal fusion
|
||||||
[['add_3', 'add_4']],
|
[['add_3', 'add_4']],
|
||||||
[['add_6', 'add_5']], # arbitray node order
|
[['add_6', 'add_5']], # arbitrary node order
|
||||||
[['add_4', 'add_1', 'add_3', 'add_2']], # arbitray node order
|
[['add_4', 'add_1', 'add_3', 'add_2']], # arbitrary node order
|
||||||
[['add_5', 'add_6'], ['add_1', 'add_2', 'add_3', 'add_4']], # arbitray partition order
|
[['add_5', 'add_6'], ['add_1', 'add_2', 'add_3', 'add_4']], # arbitrary partition order
|
||||||
[['add_5', 'linear2']], # includes call_function + call_module node
|
[['add_5', 'linear2']], # includes call_function + call_module node
|
||||||
[['add_6', 'relu']], # includes call_function + call_module node
|
[['add_6', 'relu']], # includes call_function + call_module node
|
||||||
[['param', 'add_2']], # includes get_attr + call_module nodes
|
[['param', 'add_2']], # includes get_attr + call_module nodes
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ def forward(self, x_1):
|
||||||
def f(x):
|
def f(x):
|
||||||
a = x.clone()
|
a = x.clone()
|
||||||
a_view = a.view(-1)
|
a_view = a.view(-1)
|
||||||
# We shouldn't re-inplace the first add(), because an alias of a is re-used later in the program
|
# We shouldn't re-inplace the first add(), because an alias of a is reused later in the program
|
||||||
b = a.add(1) # noqa: F841
|
b = a.add(1) # noqa: F841
|
||||||
|
|
||||||
# Second add() is fine to re-inplace
|
# Second add() is fine to re-inplace
|
||||||
|
|
|
||||||
|
|
@ -231,7 +231,7 @@ class TestIndexing(TestCase):
|
||||||
x[ri([0, 2, 4]),], torch.tensor([5, 4, 3], dtype=dtype, device=device)
|
x[ri([0, 2, 4]),], torch.tensor([5, 4, 3], dtype=dtype, device=device)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Only validates indexing and setting for halfs
|
# Only validates indexing and setting for Halfs
|
||||||
if dtype == torch.half:
|
if dtype == torch.half:
|
||||||
reference = consec((10,))
|
reference = consec((10,))
|
||||||
validate_indexing(reference)
|
validate_indexing(reference)
|
||||||
|
|
|
||||||
|
|
@ -4764,7 +4764,7 @@ a")
|
||||||
self.assertIsNot(fun_compiled, fun_compiled_2)
|
self.assertIsNot(fun_compiled, fun_compiled_2)
|
||||||
self.assertEqual(fun_compiled_2(), 7)
|
self.assertEqual(fun_compiled_2(), 7)
|
||||||
|
|
||||||
# caching doesnt increase refcounts to function (holds weak reference)
|
# caching doesn't increase refcounts to function (holds weak reference)
|
||||||
self.assertTrue(sys.getrefcount(fun), num_ref_counts)
|
self.assertTrue(sys.getrefcount(fun), num_ref_counts)
|
||||||
|
|
||||||
def test_string_ops(self):
|
def test_string_ops(self):
|
||||||
|
|
@ -7374,7 +7374,7 @@ a")
|
||||||
# tensor from empty list is type float in python and annotated type in torchscript
|
# tensor from empty list is type float in python and annotated type in torchscript
|
||||||
if "annotate" in li and "dtype" not in option:
|
if "annotate" in li and "dtype" not in option:
|
||||||
continue
|
continue
|
||||||
# Skip unsigned tensor initializaton for signed values on 3.10
|
# Skip unsigned tensor initialization for signed values on 3.10
|
||||||
if sys.version_info[:2] >= (3, 10) and "torch.uint8" in option and "-" in li:
|
if sys.version_info[:2] >= (3, 10) and "torch.uint8" in option and "-" in li:
|
||||||
continue
|
continue
|
||||||
code = tensor_template.format(list_create=li, tensor_op=op, options=option)
|
code = tensor_template.format(list_create=li, tensor_op=op, options=option)
|
||||||
|
|
@ -7990,7 +7990,7 @@ dedent """
|
||||||
m += k
|
m += k
|
||||||
return m
|
return m
|
||||||
|
|
||||||
# use of k tests the pathway where we have to insert unitialized
|
# use of k tests the pathway where we have to insert uninitialized
|
||||||
self.checkScript(test_varexit, (3,))
|
self.checkScript(test_varexit, (3,))
|
||||||
self.checkScript(test_varexit, (2,))
|
self.checkScript(test_varexit, (2,))
|
||||||
|
|
||||||
|
|
@ -10066,7 +10066,7 @@ dedent """
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
torch.jit.save(cm, buffer)
|
torch.jit.save(cm, buffer)
|
||||||
buffer.seek(0)
|
buffer.seek(0)
|
||||||
# when tensor is loaded as constant it isnt specialized
|
# when tensor is loaded as constant it isn't specialized
|
||||||
cm_load = torch.jit.load(buffer)
|
cm_load = torch.jit.load(buffer)
|
||||||
FileCheck().check_not("Float(1, 3)").run(cm_load.forward.graph)
|
FileCheck().check_not("Float(1, 3)").run(cm_load.forward.graph)
|
||||||
|
|
||||||
|
|
@ -10300,7 +10300,7 @@ dedent """
|
||||||
|
|
||||||
def test_type_inferred_from_empty_annotation(self):
|
def test_type_inferred_from_empty_annotation(self):
|
||||||
"""
|
"""
|
||||||
Test that the type inferred from an empty or missing annotation is Torch.Tensor wtih `inferred=true`
|
Test that the type inferred from an empty or missing annotation is Torch.Tensor with `inferred=true`
|
||||||
"""
|
"""
|
||||||
@torch.jit.script
|
@torch.jit.script
|
||||||
def fn(x):
|
def fn(x):
|
||||||
|
|
@ -15606,7 +15606,7 @@ dedent """
|
||||||
a = hasattr(self, "fee")
|
a = hasattr(self, "fee")
|
||||||
b = hasattr(self, "foo")
|
b = hasattr(self, "foo")
|
||||||
c = hasattr(self, "hi")
|
c = hasattr(self, "hi")
|
||||||
d = hasattr(self, "nonexistant")
|
d = hasattr(self, "nonexistent")
|
||||||
return (a, b, c, d)
|
return (a, b, c, d)
|
||||||
|
|
||||||
def foo(self):
|
def foo(self):
|
||||||
|
|
@ -16044,7 +16044,7 @@ EXCLUDE_TYPE_CHECK = {
|
||||||
# chunk returns a list in scripting and we don't unpack the list,
|
# chunk returns a list in scripting and we don't unpack the list,
|
||||||
# Thus it won't be replaced by ConstantChunk and run AD.
|
# Thus it won't be replaced by ConstantChunk and run AD.
|
||||||
# It's explicitly checked in test_chunk_constant_script_ad
|
# It's explicitly checked in test_chunk_constant_script_ad
|
||||||
# Similary for split, it's replaced by split_with_sizes in tracing,
|
# Similarly for split, it's replaced by split_with_sizes in tracing,
|
||||||
# but we don't have AD formula for aten::split(Tensor, int[], int),
|
# but we don't have AD formula for aten::split(Tensor, int[], int),
|
||||||
# an op registered in JIT so AD is not triggered in scripting.
|
# an op registered in JIT so AD is not triggered in scripting.
|
||||||
EXCLUDE_SCRIPT_AD_CHECK = {
|
EXCLUDE_SCRIPT_AD_CHECK = {
|
||||||
|
|
|
||||||
|
|
@ -319,7 +319,7 @@ class TestAutocast(JitTestCase):
|
||||||
|
|
||||||
# TODO: fix and enable this test?
|
# TODO: fix and enable this test?
|
||||||
# (we could technically fix this, but is it really worth it?)
|
# (we could technically fix this, but is it really worth it?)
|
||||||
@unittest.skipIf(True, "unsuported autocast syntax")
|
@unittest.skipIf(True, "unsupported autocast syntax")
|
||||||
def test_reused_autocast_expr(self):
|
def test_reused_autocast_expr(self):
|
||||||
@torch.jit.script
|
@torch.jit.script
|
||||||
def fn(a, b, c, d):
|
def fn(a, b, c, d):
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ class TestTEFuser(JitTestCase):
|
||||||
super().setUp()
|
super().setUp()
|
||||||
self.tensorexpr_options = TensorExprTestOptions()
|
self.tensorexpr_options = TensorExprTestOptions()
|
||||||
|
|
||||||
# note: `self.dynamic_shapes` instatiated in specialization of class
|
# note: `self.dynamic_shapes` instantiated in specialization of class
|
||||||
# defined below
|
# defined below
|
||||||
|
|
||||||
fusion_strategy = [("DYNAMIC", 20)] if self.dynamic_shapes else [("STATIC", 20)]
|
fusion_strategy = [("DYNAMIC", 20)] if self.dynamic_shapes else [("STATIC", 20)]
|
||||||
|
|
|
||||||
|
|
@ -1679,7 +1679,7 @@ class TestVmapOperatorsLegacy(Namespace.TestVmapBaseLegacy):
|
||||||
|
|
||||||
# Interesting case #2: Batch dim at end of tensor, success cases
|
# Interesting case #2: Batch dim at end of tensor, success cases
|
||||||
# view_as_complex requires that the dim with size 2 have stride 1
|
# view_as_complex requires that the dim with size 2 have stride 1
|
||||||
# in order for the view to function propertly
|
# in order for the view to function properly
|
||||||
test(op, [get([B0, 2]).transpose(0, 1)], in_dims=1)
|
test(op, [get([B0, 2]).transpose(0, 1)], in_dims=1)
|
||||||
test(vmap(op, in_dims=1), [get([B0, B1, 2]).movedim(1, 2)])
|
test(vmap(op, in_dims=1), [get([B0, B1, 2]).movedim(1, 2)])
|
||||||
test(vmap(op, in_dims=2), [get([B0, 3, B1, 2]).movedim(2, 3)])
|
test(vmap(op, in_dims=2), [get([B0, 3, B1, 2]).movedim(2, 3)])
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ class TestLicense(TestCase):
|
||||||
'Found too many "torch-*dist-info" directories '
|
'Found too many "torch-*dist-info" directories '
|
||||||
f'in "{site_packages}, expected only one'
|
f'in "{site_packages}, expected only one'
|
||||||
)
|
)
|
||||||
# setuptools renamed *dist-info/LICENSE to *dist-info/licenses/LICENSE sicne 77.0
|
# setuptools renamed *dist-info/LICENSE to *dist-info/licenses/LICENSE since 77.0
|
||||||
license_file = os.path.join(distinfo[0], "licenses", "LICENSE")
|
license_file = os.path.join(distinfo[0], "licenses", "LICENSE")
|
||||||
if not os.path.exists(license_file):
|
if not os.path.exists(license_file):
|
||||||
license_file = os.path.join(distinfo[0], "LICENSE")
|
license_file = os.path.join(distinfo[0], "LICENSE")
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ class TestLinalg(TestCase):
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def _tunableop_ctx(self):
|
def _tunableop_ctx(self):
|
||||||
# Inialize and then tear down TunableOp
|
# Initialize and then tear down TunableOp
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
self._set_tunableop_defaults()
|
self._set_tunableop_defaults()
|
||||||
|
|
@ -4261,7 +4261,7 @@ class TestLinalg(TestCase):
|
||||||
output = torch.einsum(equation, tensors)
|
output = torch.einsum(equation, tensors)
|
||||||
self.assertEqual(output, torch.tensor(expected_output, dtype=torch.float32, device=device))
|
self.assertEqual(output, torch.tensor(expected_output, dtype=torch.float32, device=device))
|
||||||
|
|
||||||
# Test equation variantions
|
# Test equation variations
|
||||||
check(' ', 1, expected_output=1)
|
check(' ', 1, expected_output=1)
|
||||||
check(' -> ', 1, expected_output=1)
|
check(' -> ', 1, expected_output=1)
|
||||||
check(' , ', 2, 2, expected_output=4)
|
check(' , ', 2, 2, expected_output=4)
|
||||||
|
|
@ -4770,7 +4770,7 @@ class TestLinalg(TestCase):
|
||||||
with self._tunableop_ctx():
|
with self._tunableop_ctx():
|
||||||
torch.cuda.tunable.set_rotating_buffer_size(0)
|
torch.cuda.tunable.set_rotating_buffer_size(0)
|
||||||
# Numerical check adds significant overhead, unsure if this is needed
|
# Numerical check adds significant overhead, unsure if this is needed
|
||||||
# or if there was a transiet problem at the time.
|
# or if there was a transient problem at the time.
|
||||||
# if dtype is torch.half:
|
# if dtype is torch.half:
|
||||||
# os.environ["PYTORCH_TUNABLEOP_NUMERICAL_CHECK"] = "1"
|
# os.environ["PYTORCH_TUNABLEOP_NUMERICAL_CHECK"] = "1"
|
||||||
ordinal = torch.cuda.current_device()
|
ordinal = torch.cuda.current_device()
|
||||||
|
|
@ -5009,7 +5009,7 @@ class TestLinalg(TestCase):
|
||||||
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
||||||
new_results = len(torch.cuda.tunable.get_results())
|
new_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = new_results - ref_results
|
total_num_results = new_results - ref_results
|
||||||
|
|
||||||
# Rowwise case will have an extra solution
|
# Rowwise case will have an extra solution
|
||||||
|
|
@ -5202,7 +5202,7 @@ class TestLinalg(TestCase):
|
||||||
# Validator,ROCBLAS_VERSION,X.Y,Z
|
# Validator,ROCBLAS_VERSION,X.Y,Z
|
||||||
# Validator,HIPBLASLT_VERSION,X,Y.Z
|
# Validator,HIPBLASLT_VERSION,X,Y.Z
|
||||||
# Validator,ROCM_Version,X,Y.Z
|
# Validator,ROCM_Version,X,Y.Z
|
||||||
# Validator,GCN_ARCH_NAME,<architecutre name>
|
# Validator,GCN_ARCH_NAME,<architecture name>
|
||||||
validator_num_lines = 5
|
validator_num_lines = 5
|
||||||
|
|
||||||
with self._tunableop_ctx():
|
with self._tunableop_ctx():
|
||||||
|
|
@ -5242,7 +5242,7 @@ class TestLinalg(TestCase):
|
||||||
B = torch.randn(K, M, device=device, dtype=dtype)
|
B = torch.randn(K, M, device=device, dtype=dtype)
|
||||||
C = torch.matmul(A, B)
|
C = torch.matmul(A, B)
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = len(torch.cuda.tunable.get_results())
|
total_num_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# There must be a new tuning result
|
# There must be a new tuning result
|
||||||
|
|
@ -5270,7 +5270,7 @@ class TestLinalg(TestCase):
|
||||||
B = torch.randn(K, M, device=device, dtype=dtype)
|
B = torch.randn(K, M, device=device, dtype=dtype)
|
||||||
C = torch.matmul(A, B)
|
C = torch.matmul(A, B)
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = len(torch.cuda.tunable.get_results())
|
total_num_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# Take the difference to calculate the number of results from
|
# Take the difference to calculate the number of results from
|
||||||
|
|
@ -5303,7 +5303,7 @@ class TestLinalg(TestCase):
|
||||||
B = torch.randn(K, M, device=device, dtype=dtype)
|
B = torch.randn(K, M, device=device, dtype=dtype)
|
||||||
C = torch.matmul(A, B)
|
C = torch.matmul(A, B)
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = len(torch.cuda.tunable.get_results())
|
total_num_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# Take the difference to calculate the number of results from
|
# Take the difference to calculate the number of results from
|
||||||
|
|
@ -5326,7 +5326,7 @@ class TestLinalg(TestCase):
|
||||||
|
|
||||||
# Take the difference to calculate the number of results from
|
# Take the difference to calculate the number of results from
|
||||||
# this test. There should be no change in the number of results
|
# this test. There should be no change in the number of results
|
||||||
# since tuning is disabe.
|
# since tuning is disable.
|
||||||
self.assertEqual((total_num_results - ref_num_results), 0)
|
self.assertEqual((total_num_results - ref_num_results), 0)
|
||||||
|
|
||||||
@onlyCUDA
|
@onlyCUDA
|
||||||
|
|
@ -5335,7 +5335,7 @@ class TestLinalg(TestCase):
|
||||||
# Test that the TunableOp results file is created
|
# Test that the TunableOp results file is created
|
||||||
# and is NOT empty.
|
# and is NOT empty.
|
||||||
# To test this we create a subprocess and then
|
# To test this we create a subprocess and then
|
||||||
# execut a matmul from within the subprocess
|
# execute a matmul from within the subprocess
|
||||||
import os
|
import os
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
|
|
||||||
|
|
@ -5384,7 +5384,7 @@ class TestLinalg(TestCase):
|
||||||
|
|
||||||
torch.nn.functional.linear(X, matA, bias)
|
torch.nn.functional.linear(X, matA, bias)
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = len(torch.cuda.tunable.get_results())
|
total_num_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# There must be a new tuning result
|
# There must be a new tuning result
|
||||||
|
|
@ -5438,7 +5438,7 @@ class TestLinalg(TestCase):
|
||||||
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
||||||
new_results = len(torch.cuda.tunable.get_results())
|
new_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = new_results - ref_results
|
total_num_results = new_results - ref_results
|
||||||
|
|
||||||
# There must be a new tuning results
|
# There must be a new tuning results
|
||||||
|
|
@ -5514,7 +5514,7 @@ class TestLinalg(TestCase):
|
||||||
scaleB = torch.ones((1, matB.shape[1]), device=device)
|
scaleB = torch.ones((1, matB.shape[1]), device=device)
|
||||||
torch._scaled_mm(matA, matB, scale_a=scaleA, scale_b=scaleB, out_dtype=torch.bfloat16)
|
torch._scaled_mm(matA, matB, scale_a=scaleA, scale_b=scaleB, out_dtype=torch.bfloat16)
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = len(torch.cuda.tunable.get_results())
|
total_num_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# Rowwise case will have an extra solution
|
# Rowwise case will have an extra solution
|
||||||
|
|
@ -5638,7 +5638,7 @@ class TestLinalg(TestCase):
|
||||||
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
||||||
new_results = len(torch.cuda.tunable.get_results())
|
new_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = new_results - ref_results
|
total_num_results = new_results - ref_results
|
||||||
|
|
||||||
# There must be a new tuning results
|
# There must be a new tuning results
|
||||||
|
|
@ -5879,7 +5879,7 @@ class TestLinalg(TestCase):
|
||||||
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
|
||||||
new_results = len(torch.cuda.tunable.get_results())
|
new_results = len(torch.cuda.tunable.get_results())
|
||||||
|
|
||||||
# This stores total number of cummulative results
|
# This stores total number of cumulative results
|
||||||
total_num_results = new_results - ref_results
|
total_num_results = new_results - ref_results
|
||||||
|
|
||||||
# There must be a new tuning results
|
# There must be a new tuning results
|
||||||
|
|
@ -6700,7 +6700,7 @@ class TestLinalg(TestCase):
|
||||||
with self.assertRaisesRegex(RuntimeError, "torch.int32 dtype"):
|
with self.assertRaisesRegex(RuntimeError, "torch.int32 dtype"):
|
||||||
torch.lu_unpack(lu_data, lu_pivots.long())
|
torch.lu_unpack(lu_data, lu_pivots.long())
|
||||||
|
|
||||||
# check that onces flags are unset, Nones are returned
|
# check that once flags are unset, Nones are returned
|
||||||
p, l, u = torch.lu_unpack(lu_data, lu_pivots, unpack_data=False)
|
p, l, u = torch.lu_unpack(lu_data, lu_pivots, unpack_data=False)
|
||||||
self.assertTrue(l.numel() == 0 and u.numel() == 0)
|
self.assertTrue(l.numel() == 0 and u.numel() == 0)
|
||||||
p, l, u = torch.lu_unpack(lu_data, lu_pivots, unpack_pivots=False)
|
p, l, u = torch.lu_unpack(lu_data, lu_pivots, unpack_pivots=False)
|
||||||
|
|
@ -6919,7 +6919,7 @@ class TestLinalg(TestCase):
|
||||||
lambdas1.append(worker.E[:])
|
lambdas1.append(worker.E[:])
|
||||||
|
|
||||||
tol = 1e-8
|
tol = 1e-8
|
||||||
# tol for scipy lobpcg will be choosed so that the number of
|
# tol for scipy lobpcg will be chosen so that the number of
|
||||||
# iterations will be equal or very close to pytorch lobpcg
|
# iterations will be equal or very close to pytorch lobpcg
|
||||||
# (that is around 170-180)
|
# (that is around 170-180)
|
||||||
|
|
||||||
|
|
@ -6999,7 +6999,7 @@ scipy_lobpcg | {elapsed_scipy_ms:10.2f} | {elapsed_general_scipy_ms:10.2f} |
|
||||||
-(input size: {m:4}, eigenpairs:{k:2}, units: ms per call)-
|
-(input size: {m:4}, eigenpairs:{k:2}, units: ms per call)-
|
||||||
''')
|
''')
|
||||||
|
|
||||||
# Handling of very small tolerence
|
# Handling of very small tolerance
|
||||||
tol = 1e-100
|
tol = 1e-100
|
||||||
|
|
||||||
lambdas1 = []
|
lambdas1 = []
|
||||||
|
|
@ -8025,7 +8025,7 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:
|
||||||
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
|
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
|
||||||
# cuBLAS does not guarantee BFloat16 support on SM < 53.
|
# cuBLAS does not guarantee BFloat16 support on SM < 53.
|
||||||
# So on PyTorch, we consider BFloat16 support on SM < 53 as
|
# So on PyTorch, we consider BFloat16 support on SM < 53 as
|
||||||
# undefined bahavior
|
# undefined behavior
|
||||||
return
|
return
|
||||||
|
|
||||||
batch_sizes = [1, 10]
|
batch_sizes = [1, 10]
|
||||||
|
|
@ -8138,7 +8138,7 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:
|
||||||
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
|
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
|
||||||
# cuBLAS does not guarantee BFloat16 support on SM < 53.
|
# cuBLAS does not guarantee BFloat16 support on SM < 53.
|
||||||
# So on PyTorch, we consider BFloat16 support on SM < 53 as
|
# So on PyTorch, we consider BFloat16 support on SM < 53 as
|
||||||
# undefined bahavior
|
# undefined behavior
|
||||||
return
|
return
|
||||||
|
|
||||||
num_batches = 2
|
num_batches = 2
|
||||||
|
|
@ -8212,7 +8212,7 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:
|
||||||
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
|
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
|
||||||
# cuBLAS does not guarantee BFloat16 support on SM < 53.
|
# cuBLAS does not guarantee BFloat16 support on SM < 53.
|
||||||
# So on PyTorch, we consider BFloat16 support on SM < 53 as
|
# So on PyTorch, we consider BFloat16 support on SM < 53 as
|
||||||
# undefined bahavior
|
# undefined behavior
|
||||||
return
|
return
|
||||||
|
|
||||||
num_batches = 10
|
num_batches = 10
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ def apply_masked_reduction_along_dim(op, input, *args, **kwargs):
|
||||||
[[op([1, 2], *args0, **kwargs, dim=None, keepdim=False)]
|
[[op([1, 2], *args0, **kwargs, dim=None, keepdim=False)]
|
||||||
[op([3, 4, 5], *args0, **kwargs, dim=None, keepdim=False)]]
|
[op([3, 4, 5], *args0, **kwargs, dim=None, keepdim=False)]]
|
||||||
|
|
||||||
where args0 is args where dim value is replased with None if
|
where args0 is args where dim value is replaced with None if
|
||||||
present.
|
present.
|
||||||
|
|
||||||
Using the same example data, if the op is called with dim=(0, 1)
|
Using the same example data, if the op is called with dim=(0, 1)
|
||||||
|
|
|
||||||
|
|
@ -595,7 +595,7 @@ class TestMatmulCuda(TestCase):
|
||||||
-2, -1
|
-2, -1
|
||||||
)[:, :n, :]
|
)[:, :n, :]
|
||||||
else:
|
else:
|
||||||
raise AssertionError(f"Invaild op: {op}")
|
raise AssertionError(f"Invalid op: {op}")
|
||||||
|
|
||||||
C_ref = f_ref(A, B.transpose(-2, -1), offs=offs)
|
C_ref = f_ref(A, B.transpose(-2, -1), offs=offs)
|
||||||
C = f(A, B.transpose(-2, -1), offs=offs)
|
C = f(A, B.transpose(-2, -1), offs=offs)
|
||||||
|
|
@ -1284,7 +1284,7 @@ class TestFP8Matmul(TestCase):
|
||||||
out_dtype=torch.bfloat16,
|
out_dtype=torch.bfloat16,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Note re.compile is used, not re.escape. This is to accomodate fn vs fnuz type message.
|
# Note re.compile is used, not re.escape. This is to accommodate fn vs fnuz type message.
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
RuntimeError,
|
RuntimeError,
|
||||||
r"Expected b\.dtype\(\) == at::kFloat8_e4m3fnu?z? to be true, but got false\.",
|
r"Expected b\.dtype\(\) == at::kFloat8_e4m3fnu?z? to be true, but got false\.",
|
||||||
|
|
@ -1754,7 +1754,7 @@ class TestFP8Matmul(TestCase):
|
||||||
|
|
||||||
# Testing only _scaled_grouped_mm() with multiple shapes, as
|
# Testing only _scaled_grouped_mm() with multiple shapes, as
|
||||||
# _scaled_mm() already has more combinations of parameters than
|
# _scaled_mm() already has more combinations of parameters than
|
||||||
# _scaled_grouped_mm(), for supporing more than one inputs layout
|
# _scaled_grouped_mm(), for supporting more than one inputs layout
|
||||||
# combinations.
|
# combinations.
|
||||||
|
|
||||||
@unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS")
|
@unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS")
|
||||||
|
|
|
||||||
|
|
@ -1502,7 +1502,7 @@ class TestMeta(TestCase):
|
||||||
def test_fill__alias_relationship(self):
|
def test_fill__alias_relationship(self):
|
||||||
inps = torch.rand(2**52, device='meta')
|
inps = torch.rand(2**52, device='meta')
|
||||||
r = torch.ops.aten.fill_(inps, 1.0)
|
r = torch.ops.aten.fill_(inps, 1.0)
|
||||||
# aten.fill_ returns an aliase
|
# aten.fill_ returns an alias
|
||||||
self.assertEqual(id(inps), id(r))
|
self.assertEqual(id(inps), id(r))
|
||||||
|
|
||||||
# aten.fill returns a new tensor
|
# aten.fill returns a new tensor
|
||||||
|
|
|
||||||
|
|
@ -492,7 +492,7 @@ class TestMkldnn(TestCase):
|
||||||
C = torch.randint(1, 3, (1,)).item() * groups
|
C = torch.randint(1, 3, (1,)).item() * groups
|
||||||
x_shape = (N, C) + input_shapes[dim]
|
x_shape = (N, C) + input_shapes[dim]
|
||||||
data = torch.randn(x_shape, dtype=torch.float32)
|
data = torch.randn(x_shape, dtype=torch.float32)
|
||||||
# conv: mkldnn tranpose conv fp32
|
# conv: mkldnn transpose conv fp32
|
||||||
# conv_ref: thnn transpose conv fp32
|
# conv_ref: thnn transpose conv fp32
|
||||||
conv = conv_module[dim](in_channels=C,
|
conv = conv_module[dim](in_channels=C,
|
||||||
out_channels=M,
|
out_channels=M,
|
||||||
|
|
|
||||||
|
|
@ -640,7 +640,7 @@ class MPSLeakyReluTest(TestCaseMPS):
|
||||||
mps_x = cpu_x.detach().clone().to('mps')
|
mps_x = cpu_x.detach().clone().to('mps')
|
||||||
|
|
||||||
if not contiguous and not (0 in shape or len(shape) < 2):
|
if not contiguous and not (0 in shape or len(shape) < 2):
|
||||||
# Tranposing will make the tensor non-contiguous
|
# Transposing will make the tensor non-contiguous
|
||||||
cpu_x = cpu_x.transpose(0, 1)
|
cpu_x = cpu_x.transpose(0, 1)
|
||||||
mps_x = mps_x.transpose(0, 1)
|
mps_x = mps_x.transpose(0, 1)
|
||||||
assert not mps_x.is_contiguous()
|
assert not mps_x.is_contiguous()
|
||||||
|
|
@ -940,7 +940,7 @@ class TestMPS(TestCaseMPS):
|
||||||
x.requires_grad = True
|
x.requires_grad = True
|
||||||
d = torch.cdist(x, y)
|
d = torch.cdist(x, y)
|
||||||
d.backward(dist_grad)
|
d.backward(dist_grad)
|
||||||
# Check that the backward passs does not contain invalid
|
# Check that the backward pass does not contain invalid
|
||||||
# values such as nan or inf
|
# values such as nan or inf
|
||||||
assert torch.isfinite(x.grad).all()
|
assert torch.isfinite(x.grad).all()
|
||||||
|
|
||||||
|
|
@ -1195,7 +1195,7 @@ class TestMPS(TestCaseMPS):
|
||||||
torch.nn.functional.linear(torch.rand(size, device='mps'),
|
torch.nn.functional.linear(torch.rand(size, device='mps'),
|
||||||
torch.randint(-10, 10, size, dtype=torch.int8, device='mps'))
|
torch.randint(-10, 10, size, dtype=torch.int8, device='mps'))
|
||||||
|
|
||||||
# Weigths on wrong device
|
# Weights on wrong device
|
||||||
with self.assertRaisesRegex(RuntimeError, "argument weight is on cpu but expected on mps"):
|
with self.assertRaisesRegex(RuntimeError, "argument weight is on cpu but expected on mps"):
|
||||||
torch.nn.functional.linear(torch.rand(size, device='mps'),
|
torch.nn.functional.linear(torch.rand(size, device='mps'),
|
||||||
torch.rand(size, device='cpu'))
|
torch.rand(size, device='cpu'))
|
||||||
|
|
@ -6285,7 +6285,7 @@ class TestMPS(TestCaseMPS):
|
||||||
x = cpu_x.detach().clone().to('mps')
|
x = cpu_x.detach().clone().to('mps')
|
||||||
|
|
||||||
if not contiguous and (0 not in shape and len(shape) >= 2):
|
if not contiguous and (0 not in shape and len(shape) >= 2):
|
||||||
# Tranposing will make the tensor non-contiguous
|
# Transposing will make the tensor non-contiguous
|
||||||
cpu_x = cpu_x.transpose(0, 1)
|
cpu_x = cpu_x.transpose(0, 1)
|
||||||
x = x.transpose(0, 1)
|
x = x.transpose(0, 1)
|
||||||
assert not x.is_contiguous()
|
assert not x.is_contiguous()
|
||||||
|
|
@ -6441,7 +6441,7 @@ class TestMPS(TestCaseMPS):
|
||||||
x = cpu_x.detach().clone().to('mps')
|
x = cpu_x.detach().clone().to('mps')
|
||||||
|
|
||||||
if not contiguous and (0 not in shape and len(shape) >= 2):
|
if not contiguous and (0 not in shape and len(shape) >= 2):
|
||||||
# Tranposing will make the tensor non-contiguous
|
# Transposing will make the tensor non-contiguous
|
||||||
cpu_x = cpu_x.transpose(0, 1)
|
cpu_x = cpu_x.transpose(0, 1)
|
||||||
x = x.transpose(0, 1)
|
x = x.transpose(0, 1)
|
||||||
assert not x.is_contiguous()
|
assert not x.is_contiguous()
|
||||||
|
|
@ -6481,7 +6481,7 @@ class TestMPS(TestCaseMPS):
|
||||||
x = cpu_x.detach().clone().to('mps')
|
x = cpu_x.detach().clone().to('mps')
|
||||||
|
|
||||||
if not contiguous and (0 not in shape and len(shape) >= 2):
|
if not contiguous and (0 not in shape and len(shape) >= 2):
|
||||||
# Tranposing will make the tensor non-contiguous
|
# Transposing will make the tensor non-contiguous
|
||||||
cpu_x = cpu_x.transpose(0, 1)
|
cpu_x = cpu_x.transpose(0, 1)
|
||||||
x = x.transpose(0, 1)
|
x = x.transpose(0, 1)
|
||||||
assert not x.is_contiguous()
|
assert not x.is_contiguous()
|
||||||
|
|
@ -7706,13 +7706,13 @@ class TestMPS(TestCaseMPS):
|
||||||
# Test exponential
|
# Test exponential
|
||||||
@unittest.skip("This does not test anything")
|
@unittest.skip("This does not test anything")
|
||||||
def test_exponential(self):
|
def test_exponential(self):
|
||||||
def helper(shape, lamda, dtype=torch.float32):
|
def helper(shape, lambda_, dtype=torch.float32):
|
||||||
|
|
||||||
mps_out = torch.zeros(shape, device='mps', dtype=dtype)
|
mps_out = torch.zeros(shape, device='mps', dtype=dtype)
|
||||||
mps_out.exponential_(lamda)
|
mps_out.exponential_(lambda_)
|
||||||
|
|
||||||
print(mps_out.to('cpu').float().mean(), 1 / lamda)
|
print(mps_out.to('cpu').float().mean(), 1 / lambda_)
|
||||||
print(mps_out.to('cpu').float().std() ** 2, 1 / (lamda**2))
|
print(mps_out.to('cpu').float().std() ** 2, 1 / (lambda_**2))
|
||||||
|
|
||||||
for dtype in [torch.float32, torch.float16]:
|
for dtype in [torch.float32, torch.float16]:
|
||||||
helper([100, 100], 2, dtype)
|
helper([100, 100], 2, dtype)
|
||||||
|
|
@ -8179,7 +8179,7 @@ class TestLogical(TestCaseMPS):
|
||||||
self.assertEqual(torch.isin(x, 2.0), torch.tensor([False, False, True, False], device="mps"))
|
self.assertEqual(torch.isin(x, 2.0), torch.tensor([False, False, True, False], device="mps"))
|
||||||
self.assertEqual(torch.isin(x, 1.0, invert=True), torch.tensor([True, False, True, True], device="mps"))
|
self.assertEqual(torch.isin(x, 1.0, invert=True), torch.tensor([True, False, True, True], device="mps"))
|
||||||
self.assertEqual(torch.isin(x, 8.0), torch.tensor([False, False, False, False], device="mps"))
|
self.assertEqual(torch.isin(x, 8.0), torch.tensor([False, False, False, False], device="mps"))
|
||||||
# Scalar.Tensor varaiant(alaises to Scalar.Scalar), not covered by OpInfo
|
# Scalar.Tensor variant(alaises to Scalar.Scalar), not covered by OpInfo
|
||||||
self.assertEqual(torch.isin(2.0, x), torch.tensor(True, device="mps"))
|
self.assertEqual(torch.isin(2.0, x), torch.tensor(True, device="mps"))
|
||||||
|
|
||||||
def test_isin_asserts(self):
|
def test_isin_asserts(self):
|
||||||
|
|
@ -10437,7 +10437,7 @@ class TestConvolutionMPS(TestCaseMPS):
|
||||||
grad_in_cl = torch.empty(1, f, oc, device="mps").transpose(1, 2)
|
grad_in_cl = torch.empty(1, f, oc, device="mps").transpose(1, 2)
|
||||||
grad_in_cl[:] = grad_in
|
grad_in_cl[:] = grad_in
|
||||||
|
|
||||||
# It does not matter whether grad_in contigous, or channels last, results should equal to each other
|
# It does not matter whether grad_in contiguous, or channels last, results should equal to each other
|
||||||
grad_rc = torch.autograd.grad((out,), (inp, conv.weight, conv.bias), (grad_in,), retain_graph=True)
|
grad_rc = torch.autograd.grad((out,), (inp, conv.weight, conv.bias), (grad_in,), retain_graph=True)
|
||||||
grad_rc_cl = torch.autograd.grad((out,), (inp, conv.weight, conv.bias), (grad_in_cl,), retain_graph=True)
|
grad_rc_cl = torch.autograd.grad((out,), (inp, conv.weight, conv.bias), (grad_in_cl,), retain_graph=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7190,7 +7190,7 @@ torch.cuda.synchronize()
|
||||||
|
|
||||||
query = torch.rand(bs, d1, d3, device=device)
|
query = torch.rand(bs, d1, d3, device=device)
|
||||||
value = torch.rand(30, d2, requires_grad=True, device=device)
|
value = torch.rand(30, d2, requires_grad=True, device=device)
|
||||||
# total_length must > than max_length otherwise flash_attn backwark will fail
|
# total_length must > than max_length otherwise flash_attn backward will fail
|
||||||
offsets = torch.tensor([0, 2, 3, 30], device=device)
|
offsets = torch.tensor([0, 2, 3, 30], device=device)
|
||||||
|
|
||||||
m = mha(use_legacy_api)
|
m = mha(use_legacy_api)
|
||||||
|
|
|
||||||
|
|
@ -2013,7 +2013,7 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
|
||||||
eval_out0 = wrapped_m(input)
|
eval_out0 = wrapped_m(input)
|
||||||
# assert eval gives same result as last training iteration
|
# assert eval gives same result as last training iteration
|
||||||
self.assertEqual(eval_out0, last_train_out)
|
self.assertEqual(eval_out0, last_train_out)
|
||||||
# assert doing more iteartion in eval don't change things
|
# assert doing more iteration in eval don't change things
|
||||||
self.assertEqual(eval_out0, wrapped_m(input))
|
self.assertEqual(eval_out0, wrapped_m(input))
|
||||||
self.assertEqual(last_train_u, m.weight_u)
|
self.assertEqual(last_train_u, m.weight_u)
|
||||||
self.assertEqual(last_train_v, m.weight_v)
|
self.assertEqual(last_train_v, m.weight_v)
|
||||||
|
|
@ -8911,7 +8911,7 @@ class TestNNDeviceType(NNTestCase):
|
||||||
# Should raise error when negative padding results in negative output shape
|
# Should raise error when negative padding results in negative output shape
|
||||||
self.assertRaises(RuntimeError, lambda: F.pad(inputs, (-3, -2), mode='circular'))
|
self.assertRaises(RuntimeError, lambda: F.pad(inputs, (-3, -2), mode='circular'))
|
||||||
|
|
||||||
# assert that relfection padding errors when pad >= input size
|
# assert that reflection padding errors when pad >= input size
|
||||||
expected_err_msg = r"Padding size should be less than the corresponding input dimension"
|
expected_err_msg = r"Padding size should be less than the corresponding input dimension"
|
||||||
inputs = torch.randn(1, 1, 2, 3, device=device, dtype=dtype)
|
inputs = torch.randn(1, 1, 2, 3, device=device, dtype=dtype)
|
||||||
self.assertRaisesRegex(RuntimeError, expected_err_msg,
|
self.assertRaisesRegex(RuntimeError, expected_err_msg,
|
||||||
|
|
@ -11018,7 +11018,7 @@ class TestNNDeviceType(NNTestCase):
|
||||||
@onlyCUDA
|
@onlyCUDA
|
||||||
@dtypes(torch.double)
|
@dtypes(torch.double)
|
||||||
def test_lstmcell_backward_only_one_output_grad(self, device, dtype):
|
def test_lstmcell_backward_only_one_output_grad(self, device, dtype):
|
||||||
# checks that undefined gradients doen't hamper the backward
|
# checks that undefined gradients doesn't hamper the backward
|
||||||
# see #11872
|
# see #11872
|
||||||
l = torch.nn.LSTMCell(2, 3).to(device).to(dtype=dtype)
|
l = torch.nn.LSTMCell(2, 3).to(device).to(dtype=dtype)
|
||||||
s = torch.randn(1, 2, device=device, dtype=dtype, requires_grad=True)
|
s = torch.randn(1, 2, device=device, dtype=dtype, requires_grad=True)
|
||||||
|
|
@ -11967,7 +11967,7 @@ class TestNNDeviceType(NNTestCase):
|
||||||
def test_softmax_bfloat16(self, device):
|
def test_softmax_bfloat16(self, device):
|
||||||
for dim in [0, 1, 2, 3]:
|
for dim in [0, 1, 2, 3]:
|
||||||
_test_bfloat16_ops(self, torch.nn.Softmax(dim=dim), device, inp_dims=(16, 33, 15, 16), prec=1e-2)
|
_test_bfloat16_ops(self, torch.nn.Softmax(dim=dim), device, inp_dims=(16, 33, 15, 16), prec=1e-2)
|
||||||
# test softmax with large input value which casues exp() to overflow
|
# test softmax with large input value which causes exp() to overflow
|
||||||
_test_bfloat16_ops(self, torch.nn.Softmax(dim=dim), device, inp_dims=(16, 33, 15, 16), prec=0.05, scale_factor=1000.0)
|
_test_bfloat16_ops(self, torch.nn.Softmax(dim=dim), device, inp_dims=(16, 33, 15, 16), prec=0.05, scale_factor=1000.0)
|
||||||
|
|
||||||
def test_nll_loss_mismatched_batch(self, device):
|
def test_nll_loss_mismatched_batch(self, device):
|
||||||
|
|
@ -12298,7 +12298,7 @@ if __name__ == '__main__':
|
||||||
input = torch.randn(N, C, *other_dims, device=device, requires_grad=True)
|
input = torch.randn(N, C, *other_dims, device=device, requires_grad=True)
|
||||||
target = torch.empty(N, *other_dims, dtype=torch.long, device=device).random_(0, C)
|
target = torch.empty(N, *other_dims, dtype=torch.long, device=device).random_(0, C)
|
||||||
|
|
||||||
# construct target probablity that should have the same result as label_smoothing
|
# construct target probability that should have the same result as label_smoothing
|
||||||
target_proba = F.one_hot(target, num_classes=C)
|
target_proba = F.one_hot(target, num_classes=C)
|
||||||
# Need to put the C dim at index 1.
|
# Need to put the C dim at index 1.
|
||||||
target_proba = target_proba.permute(0, -1, *range(1, target_proba.dim() - 1))
|
target_proba = target_proba.permute(0, -1, *range(1, target_proba.dim() - 1))
|
||||||
|
|
|
||||||
|
|
@ -205,7 +205,7 @@ class TestPrivateUse1(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class TestOpenReg(TestCase):
|
class TestOpenReg(TestCase):
|
||||||
"""Tests of mimick accelerator named OpenReg based on PrivateUse1"""
|
"""Tests of mimic accelerator named OpenReg based on PrivateUse1"""
|
||||||
|
|
||||||
# Stream & Event
|
# Stream & Event
|
||||||
def test_stream_synchronize(self):
|
def test_stream_synchronize(self):
|
||||||
|
|
@ -475,7 +475,7 @@ class TestOpenReg(TestCase):
|
||||||
with torch.serialization.skip_data():
|
with torch.serialization.skip_data():
|
||||||
torch.save(sd, f)
|
torch.save(sd, f)
|
||||||
|
|
||||||
# Opeartors
|
# Operators
|
||||||
def test_factory(self):
|
def test_factory(self):
|
||||||
x = torch.empty(3, device="openreg")
|
x = torch.empty(3, device="openreg")
|
||||||
self.assertEqual(x.device.type, "openreg")
|
self.assertEqual(x.device.type, "openreg")
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ _variant_ops = partial(
|
||||||
# Get names of all the operators which have ref in their entry in OpInfo (testing infra)
|
# Get names of all the operators which have ref in their entry in OpInfo (testing infra)
|
||||||
# except for elementwise unary operators (separately implemented in test/test_unary_ufuncs.py),
|
# except for elementwise unary operators (separately implemented in test/test_unary_ufuncs.py),
|
||||||
# elementwise binary operators (separately implemented in test_binary_ufuncs.py),
|
# elementwise binary operators (separately implemented in test_binary_ufuncs.py),
|
||||||
# reduction operations (separately impelemented in test_reductions.py),
|
# reduction operations (separately implemented in test_reductions.py),
|
||||||
# and Spectral Functions (separately implemented for only 1D as of now, in test/test_spectral_ops.py)
|
# and Spectral Functions (separately implemented for only 1D as of now, in test/test_spectral_ops.py)
|
||||||
_ref_test_ops = tuple(
|
_ref_test_ops = tuple(
|
||||||
filter(
|
filter(
|
||||||
|
|
@ -373,7 +373,7 @@ class TestCommon(TestCase):
|
||||||
|
|
||||||
# output_process_fn_grad has a very unfortunate name
|
# output_process_fn_grad has a very unfortunate name
|
||||||
# We use this function in linalg extensively to postprocess the inputs of functions
|
# We use this function in linalg extensively to postprocess the inputs of functions
|
||||||
# that are not completely well-defined. Think svd and muliplying the singular vectors by -1.
|
# that are not completely well-defined. Think svd and multiplying the singular vectors by -1.
|
||||||
# CPU and CUDA implementations of the SVD can return valid SVDs that are different.
|
# CPU and CUDA implementations of the SVD can return valid SVDs that are different.
|
||||||
# We use this function to compare them.
|
# We use this function to compare them.
|
||||||
cuda_results = sample.output_process_fn_grad(cuda_results)
|
cuda_results = sample.output_process_fn_grad(cuda_results)
|
||||||
|
|
@ -580,7 +580,7 @@ class TestCommon(TestCase):
|
||||||
|
|
||||||
# Tests that experimental Python References perform the same computation
|
# Tests that experimental Python References perform the same computation
|
||||||
# as the operators they reference, when operator calls in the torch
|
# as the operators they reference, when operator calls in the torch
|
||||||
# namesapce are remapped to the refs namespace (torch.foo becomes refs.foo).
|
# namespace are remapped to the refs namespace (torch.foo becomes refs.foo).
|
||||||
@onlyNativeDeviceTypesAnd(["hpu"])
|
@onlyNativeDeviceTypesAnd(["hpu"])
|
||||||
@ops(python_ref_db)
|
@ops(python_ref_db)
|
||||||
@skipIfTorchInductor("Takes too long for inductor")
|
@skipIfTorchInductor("Takes too long for inductor")
|
||||||
|
|
@ -759,7 +759,7 @@ class TestCommon(TestCase):
|
||||||
else tuple(n_inp) + n_args
|
else tuple(n_inp) + n_args
|
||||||
)
|
)
|
||||||
|
|
||||||
# Filter the elemnts that are tensors that require grad
|
# Filter the elements that are tensors that require grad
|
||||||
t_input_tensors = [
|
t_input_tensors = [
|
||||||
t for t in t_inputs if isinstance(t, torch.Tensor) and t.requires_grad
|
t for t in t_inputs if isinstance(t, torch.Tensor) and t.requires_grad
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -188,7 +188,7 @@ class TestJit(JitCommonTestCase):
|
||||||
# Note: only runs in float32 because schema isn't affected by dtype,
|
# Note: only runs in float32 because schema isn't affected by dtype,
|
||||||
# so running it on all dtypes is would be excessive
|
# so running it on all dtypes is would be excessive
|
||||||
if dtype == torch.float32:
|
if dtype == torch.float32:
|
||||||
# TODO: no reason why we cant run this with tracing graph
|
# TODO: no reason why we can't run this with tracing graph
|
||||||
if support_script and op.name != "rsub":
|
if support_script and op.name != "rsub":
|
||||||
check_alias_annotation(
|
check_alias_annotation(
|
||||||
name,
|
name,
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ def quux(a):
|
||||||
# dictionary are function names in the torch API and the values are
|
# dictionary are function names in the torch API and the values are
|
||||||
# function implementations. Implementations are added to
|
# function implementations. Implementations are added to
|
||||||
# HANDLED_FUNCTION_DIAGONAL by decorating a python function with
|
# HANDLED_FUNCTION_DIAGONAL by decorating a python function with
|
||||||
# implements_diagonal. See the overrides immediately below the defintion
|
# implements_diagonal. See the overrides immediately below the definition
|
||||||
# of DiagonalTensor for usage examples.
|
# of DiagonalTensor for usage examples.
|
||||||
HANDLED_FUNCTIONS_DIAGONAL = {}
|
HANDLED_FUNCTIONS_DIAGONAL = {}
|
||||||
|
|
||||||
|
|
@ -133,7 +133,7 @@ class DiagonalTensor:
|
||||||
https://numpy.org/devdocs/user/basics.dispatch.html
|
https://numpy.org/devdocs/user/basics.dispatch.html
|
||||||
"""
|
"""
|
||||||
# This is defined as a class attribute so that SubDiagonalTensor
|
# This is defined as a class attribute so that SubDiagonalTensor
|
||||||
# below which subclasses DiagonalTensor can re-use DiagonalTensor's
|
# below which subclasses DiagonalTensor can reuse DiagonalTensor's
|
||||||
# __torch_function__ implementation.
|
# __torch_function__ implementation.
|
||||||
handled_functions = HANDLED_FUNCTIONS_DIAGONAL
|
handled_functions = HANDLED_FUNCTIONS_DIAGONAL
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase
|
||||||
|
|
||||||
class TestPerOverloadAPI(TestCase):
|
class TestPerOverloadAPI(TestCase):
|
||||||
def test_basics_opoverloadpacket(self):
|
def test_basics_opoverloadpacket(self):
|
||||||
# add is ony used as an example here. It is ok to update the test
|
# add is only used as an example here. It is ok to update the test
|
||||||
# if the semantics of add are modified in the future.
|
# if the semantics of add are modified in the future.
|
||||||
add_packet = torch.ops.aten.add
|
add_packet = torch.ops.aten.add
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -512,7 +512,7 @@ class TestPublicBindings(TestCase):
|
||||||
"does not have `__all__` defined"
|
"does not have `__all__` defined"
|
||||||
)
|
)
|
||||||
fix_is_public = (
|
fix_is_public = (
|
||||||
f"remove it from the modules's (`{modname}`) `__all__`"
|
f"remove it from the modules' (`{modname}`) `__all__`"
|
||||||
if is_all
|
if is_all
|
||||||
else f"either define a `__all__` for `{modname}` or add a `_` at the beginning of the name"
|
else f"either define a `__all__` for `{modname}` or add a `_` at the beginning of the name"
|
||||||
)
|
)
|
||||||
|
|
@ -522,7 +522,7 @@ class TestPublicBindings(TestCase):
|
||||||
f"it is not inside the module's (`{modname}`) `__all__`"
|
f"it is not inside the module's (`{modname}`) `__all__`"
|
||||||
)
|
)
|
||||||
fix_is_public = (
|
fix_is_public = (
|
||||||
f"add it from the modules's (`{modname}`) `__all__`"
|
f"add it from the modules' (`{modname}`) `__all__`"
|
||||||
)
|
)
|
||||||
if looks_public:
|
if looks_public:
|
||||||
why_looks_public = (
|
why_looks_public = (
|
||||||
|
|
|
||||||
|
|
@ -156,7 +156,7 @@ class TestPythonRegistration(TestCase):
|
||||||
# New dispatcher call should hit the first callback again
|
# New dispatcher call should hit the first callback again
|
||||||
self.assertFalse(first_called)
|
self.assertFalse(first_called)
|
||||||
a, b = args
|
a, b = args
|
||||||
# Make a substraction here instead of add !
|
# Make a subtraction here instead of add !
|
||||||
c = a - b
|
c = a - b
|
||||||
self.assertTrue(first_called)
|
self.assertTrue(first_called)
|
||||||
return c
|
return c
|
||||||
|
|
|
||||||
|
|
@ -735,7 +735,7 @@ class TestReductions(TestCase):
|
||||||
res2 = x1.sum(axis=(0, 2), keepdims=True)
|
res2 = x1.sum(axis=(0, 2), keepdims=True)
|
||||||
self.assertEqual(res1, res2)
|
self.assertEqual(res1, res2)
|
||||||
|
|
||||||
# TODO: kill this ane replace with common creation ops
|
# TODO: kill this and replace with common creation ops
|
||||||
def _make_tensors(self, shape, val_range=(-100, 100), use_floating=True, use_integral=True,
|
def _make_tensors(self, shape, val_range=(-100, 100), use_floating=True, use_integral=True,
|
||||||
use_complex=False) -> dict[str, list[torch.Tensor]]:
|
use_complex=False) -> dict[str, list[torch.Tensor]]:
|
||||||
float_types = [torch.double,
|
float_types = [torch.double,
|
||||||
|
|
@ -1629,7 +1629,7 @@ class TestReductions(TestCase):
|
||||||
RuntimeError, "only when boundaries tensor dimension is 1"):
|
RuntimeError, "only when boundaries tensor dimension is 1"):
|
||||||
torch.searchsorted(boundaries, 1)
|
torch.searchsorted(boundaries, 1)
|
||||||
|
|
||||||
# incompatiable output tensor's dtype
|
# incompatible output tensor's dtype
|
||||||
def test_output_dtype(dtype, is_int32):
|
def test_output_dtype(dtype, is_int32):
|
||||||
output = values_1d.to(dtype)
|
output = values_1d.to(dtype)
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
|
|
@ -2018,7 +2018,7 @@ class TestReductions(TestCase):
|
||||||
with self.assertRaisesRegex(RuntimeError, error_msg):
|
with self.assertRaisesRegex(RuntimeError, error_msg):
|
||||||
op(x, dim=dim)
|
op(x, dim=dim)
|
||||||
|
|
||||||
# TODO: update this test to comapre against NumPy
|
# TODO: update this test to compare against NumPy
|
||||||
@onlyCUDA
|
@onlyCUDA
|
||||||
def test_var(self, device):
|
def test_var(self, device):
|
||||||
cpu_tensor = torch.randn(2, 3, 3)
|
cpu_tensor = torch.randn(2, 3, 3)
|
||||||
|
|
@ -2513,7 +2513,7 @@ class TestReductions(TestCase):
|
||||||
k = int((t.numel() - 1) / 2)
|
k = int((t.numel() - 1) / 2)
|
||||||
self.assertEqual(res, t.view(-1).sort()[0][k])
|
self.assertEqual(res, t.view(-1).sort()[0][k])
|
||||||
if t.numel() % 2 == 1:
|
if t.numel() % 2 == 1:
|
||||||
# We can only test agains numpy for odd reductions because numpy
|
# We can only test against numpy for odd reductions because numpy
|
||||||
# returns the mean of the two medians and torch returns the lower
|
# returns the mean of the two medians and torch returns the lower
|
||||||
self.assertEqual(res.cpu().numpy(), np.median(t_numpy))
|
self.assertEqual(res.cpu().numpy(), np.median(t_numpy))
|
||||||
for dim in range(t.ndim):
|
for dim in range(t.ndim):
|
||||||
|
|
@ -2524,7 +2524,7 @@ class TestReductions(TestCase):
|
||||||
self.assertEqual(res[0], (t.sort(dim)[0]).select(dim, k).unsqueeze_(dim))
|
self.assertEqual(res[0], (t.sort(dim)[0]).select(dim, k).unsqueeze_(dim))
|
||||||
self.assertEqual(res[0], t.gather(dim, res[1]))
|
self.assertEqual(res[0], t.gather(dim, res[1]))
|
||||||
if size % 2 == 1:
|
if size % 2 == 1:
|
||||||
# We can only test agains numpy for odd reductions because numpy
|
# We can only test against numpy for odd reductions because numpy
|
||||||
# returns the mean of the two medians and torch returns the lower
|
# returns the mean of the two medians and torch returns the lower
|
||||||
self.assertEqual(res[0].cpu().numpy(), np.median(t_numpy, dim, keepdims=True), exact_dtype=False)
|
self.assertEqual(res[0].cpu().numpy(), np.median(t_numpy, dim, keepdims=True), exact_dtype=False)
|
||||||
|
|
||||||
|
|
@ -2548,7 +2548,7 @@ class TestReductions(TestCase):
|
||||||
k = int((t.numel() - num_nan - 1) / 2)
|
k = int((t.numel() - num_nan - 1) / 2)
|
||||||
self.assertEqual(res, t.view(-1).sort()[0][k])
|
self.assertEqual(res, t.view(-1).sort()[0][k])
|
||||||
if (t.numel() - num_nan) % 2 == 1:
|
if (t.numel() - num_nan) % 2 == 1:
|
||||||
# We can only test agains numpy for odd reductions because numpy
|
# We can only test against numpy for odd reductions because numpy
|
||||||
# returns the mean of the two medians and torch returns the lower
|
# returns the mean of the two medians and torch returns the lower
|
||||||
self.assertEqual(res.item(), numpy_op(t.cpu().numpy()))
|
self.assertEqual(res.item(), numpy_op(t.cpu().numpy()))
|
||||||
for dim in range(t.ndim):
|
for dim in range(t.ndim):
|
||||||
|
|
@ -2561,7 +2561,7 @@ class TestReductions(TestCase):
|
||||||
k = ((size - num_nan - 1) / 2).type(torch.long)
|
k = ((size - num_nan - 1) / 2).type(torch.long)
|
||||||
self.assertEqual(res[0], (t.sort(dim)[0]).gather(dim, k))
|
self.assertEqual(res[0], (t.sort(dim)[0]).gather(dim, k))
|
||||||
self.assertEqual(res[0], t.gather(dim, res[1]))
|
self.assertEqual(res[0], t.gather(dim, res[1]))
|
||||||
# We can only test agains numpy for odd reductions because numpy
|
# We can only test against numpy for odd reductions because numpy
|
||||||
# returns the mean of the two medians and torch returns the lower
|
# returns the mean of the two medians and torch returns the lower
|
||||||
mask = (size - num_nan) % 2 == 1
|
mask = (size - num_nan) % 2 == 1
|
||||||
res = res[0].masked_select(mask).cpu()
|
res = res[0].masked_select(mask).cpu()
|
||||||
|
|
@ -3526,7 +3526,7 @@ as the input tensor excluding its innermost dimension'):
|
||||||
# raises an error if no `dim` parameter is specified. This exists separately from tests in
|
# raises an error if no `dim` parameter is specified. This exists separately from tests in
|
||||||
# test_tensot_compare_ops_empty because not specifying a `dim` parameter in the former tests does
|
# test_tensot_compare_ops_empty because not specifying a `dim` parameter in the former tests does
|
||||||
# not throw errors. Also, checking the return type of argmax requires supplying a different dtype
|
# not throw errors. Also, checking the return type of argmax requires supplying a different dtype
|
||||||
# argument than that for the input tensor. There is also variantion in numpy testing.
|
# argument than that for the input tensor. There is also variation in numpy testing.
|
||||||
def test_tensor_compare_ops_argmax_argmix_kthvalue_dim_empty(self, device):
|
def test_tensor_compare_ops_argmax_argmix_kthvalue_dim_empty(self, device):
|
||||||
shape = (2, 0, 4)
|
shape = (2, 0, 4)
|
||||||
master_input = torch.randn(shape, device=device)
|
master_input = torch.randn(shape, device=device)
|
||||||
|
|
|
||||||
|
|
@ -455,7 +455,7 @@ class TestScatterGather(TestCase):
|
||||||
helper([50, 8, 7], 100)
|
helper([50, 8, 7], 100)
|
||||||
helper([50, 3, 4, 5], 100)
|
helper([50, 3, 4, 5], 100)
|
||||||
|
|
||||||
# Generic Device Test Framework instantation, see
|
# Generic Device Test Framework instantiation, see
|
||||||
# https://github.com/pytorch/pytorch/wiki/Running-and-writing-tests
|
# https://github.com/pytorch/pytorch/wiki/Running-and-writing-tests
|
||||||
# for details.
|
# for details.
|
||||||
instantiate_device_type_tests(TestScatterGather, globals())
|
instantiate_device_type_tests(TestScatterGather, globals())
|
||||||
|
|
|
||||||
|
|
@ -558,7 +558,7 @@ class TestSegmentReductions(TestCase):
|
||||||
lengths = torch.tensor([0, 2, 3, 0], device=device, dtype=length_type)
|
lengths = torch.tensor([0, 2, 3, 0], device=device, dtype=length_type)
|
||||||
data = torch.arange(6, dtype=torch.float, device=device)
|
data = torch.arange(6, dtype=torch.float, device=device)
|
||||||
|
|
||||||
# test for error on 1-D lenghts
|
# test for error on 1-D lengths
|
||||||
with self.assertRaisesRegex(RuntimeError, "Expected all rows of lengths along axis"):
|
with self.assertRaisesRegex(RuntimeError, "Expected all rows of lengths along axis"):
|
||||||
torch._segment_reduce(data, 'sum', lengths=lengths, axis=0, unsafe=False)
|
torch._segment_reduce(data, 'sum', lengths=lengths, axis=0, unsafe=False)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -746,7 +746,7 @@ class SerializationMixin:
|
||||||
'readinto() stress test')
|
'readinto() stress test')
|
||||||
|
|
||||||
def test_serialization_filelike_uses_readinto(self):
|
def test_serialization_filelike_uses_readinto(self):
|
||||||
# For maximum effiency, when reading a file-like object,
|
# For maximum efficiency, when reading a file-like object,
|
||||||
# ensure the C API calls readinto instead of read.
|
# ensure the C API calls readinto instead of read.
|
||||||
a = torch.randn(5, 4)
|
a = torch.randn(5, 4)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -458,7 +458,7 @@ class TestSparse(TestSparseBase):
|
||||||
torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), shape, True))
|
torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), shape, True))
|
||||||
|
|
||||||
@dtypes(*floating_and_complex_types_and(torch.float16, torch.bfloat16))
|
@dtypes(*floating_and_complex_types_and(torch.float16, torch.bfloat16))
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
@gradcheck_semantics()
|
@gradcheck_semantics()
|
||||||
def test_to_dense_with_gradcheck(self, device, dtype, gradcheck):
|
def test_to_dense_with_gradcheck(self, device, dtype, gradcheck):
|
||||||
|
|
||||||
|
|
@ -594,7 +594,7 @@ class TestSparse(TestSparseBase):
|
||||||
self.assertEqual(torch.empty((3, 0), dtype=dtype, device=device), self.safeToDense(x))
|
self.assertEqual(torch.empty((3, 0), dtype=dtype, device=device), self.safeToDense(x))
|
||||||
|
|
||||||
@dtypes(torch.double, torch.cdouble)
|
@dtypes(torch.double, torch.cdouble)
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
@gradcheck_semantics()
|
@gradcheck_semantics()
|
||||||
def test_to_dense_hybrid(self, device, dtype, gradcheck):
|
def test_to_dense_hybrid(self, device, dtype, gradcheck):
|
||||||
|
|
||||||
|
|
@ -950,7 +950,7 @@ class TestSparse(TestSparseBase):
|
||||||
|
|
||||||
@coalescedonoff
|
@coalescedonoff
|
||||||
@dtypes(torch.double, torch.cdouble)
|
@dtypes(torch.double, torch.cdouble)
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
@gradcheck_semantics()
|
@gradcheck_semantics()
|
||||||
def test_permute(self, device, dtype, coalesced, gradcheck):
|
def test_permute(self, device, dtype, coalesced, gradcheck):
|
||||||
# trivial checks
|
# trivial checks
|
||||||
|
|
@ -1240,7 +1240,7 @@ class TestSparse(TestSparseBase):
|
||||||
# NOTE: indices are negative
|
# NOTE: indices are negative
|
||||||
idx_dim_d_range = list(range(-sizes[d], 0))
|
idx_dim_d_range = list(range(-sizes[d], 0))
|
||||||
for idx_len in range(sizes[d], sizes[d] + 1):
|
for idx_len in range(sizes[d], sizes[d] + 1):
|
||||||
# creates all possible valid indices into dim d of lenght idx_len
|
# creates all possible valid indices into dim d of length idx_len
|
||||||
for idx in itertools.product(*itertools.repeat(idx_dim_d_range, idx_len)):
|
for idx in itertools.product(*itertools.repeat(idx_dim_d_range, idx_len)):
|
||||||
t_idx = torch.tensor(idx, dtype=torch.long, device=device)
|
t_idx = torch.tensor(idx, dtype=torch.long, device=device)
|
||||||
|
|
||||||
|
|
@ -1619,7 +1619,7 @@ class TestSparse(TestSparseBase):
|
||||||
|
|
||||||
@coalescedonoff
|
@coalescedonoff
|
||||||
@dtypes(torch.double)
|
@dtypes(torch.double)
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
def test_sparse_mm(self, device, dtype, coalesced):
|
def test_sparse_mm(self, device, dtype, coalesced):
|
||||||
def test_shape(d1, d2, d3, nnz, transposed):
|
def test_shape(d1, d2, d3, nnz, transposed):
|
||||||
if transposed:
|
if transposed:
|
||||||
|
|
@ -1641,7 +1641,7 @@ class TestSparse(TestSparseBase):
|
||||||
|
|
||||||
@coalescedonoff
|
@coalescedonoff
|
||||||
@dtypes(torch.double)
|
@dtypes(torch.double)
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
@gradcheck_semantics()
|
@gradcheck_semantics()
|
||||||
def test_sparse_mul(self, device, dtype, coalesced, gradcheck):
|
def test_sparse_mul(self, device, dtype, coalesced, gradcheck):
|
||||||
# https://github.com/pytorch/pytorch/issues/79914
|
# https://github.com/pytorch/pytorch/issues/79914
|
||||||
|
|
@ -3600,13 +3600,13 @@ class TestSparse(TestSparseBase):
|
||||||
|
|
||||||
|
|
||||||
@dtypes(torch.double, torch.float)
|
@dtypes(torch.double, torch.float)
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
def test_softmax_zero_nnz(self, device, dtype):
|
def test_softmax_zero_nnz(self, device, dtype):
|
||||||
self._check_zero_nnz_softmax_op(torch.sparse.softmax, 1, device, dtype)
|
self._check_zero_nnz_softmax_op(torch.sparse.softmax, 1, device, dtype)
|
||||||
self._check_zero_nnz_softmax_op(torch.sparse.softmax, 10, device, dtype)
|
self._check_zero_nnz_softmax_op(torch.sparse.softmax, 10, device, dtype)
|
||||||
|
|
||||||
@dtypes(torch.double, torch.float)
|
@dtypes(torch.double, torch.float)
|
||||||
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
|
@unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
|
||||||
def test_log_softmax_zero_nnz(self, device, dtype):
|
def test_log_softmax_zero_nnz(self, device, dtype):
|
||||||
self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 1, device, dtype)
|
self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 1, device, dtype)
|
||||||
self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 10, device, dtype)
|
self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 10, device, dtype)
|
||||||
|
|
@ -3985,11 +3985,11 @@ class TestSparse(TestSparseBase):
|
||||||
# some normal cases
|
# some normal cases
|
||||||
yield (make_diags((1, 5)), make_offsets([0]), (5, 5))
|
yield (make_diags((1, 5)), make_offsets([0]), (5, 5))
|
||||||
yield (make_diags((3, 3)), make_offsets([-1, 0, 1]), (4, 4))
|
yield (make_diags((3, 3)), make_offsets([-1, 0, 1]), (4, 4))
|
||||||
# noncontigous diags
|
# non-contiguous diags
|
||||||
yield (make_diags((5, 4), noncontiguous=True), make_offsets([-1, 1, 0, 2, -2]), (5, 5))
|
yield (make_diags((5, 4), noncontiguous=True), make_offsets([-1, 1, 0, 2, -2]), (5, 5))
|
||||||
# noncontigous offsets
|
# non-contiguous offsets
|
||||||
yield (make_diags((3, 4)), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
|
yield (make_diags((3, 4)), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
|
||||||
# noncontigous diags + offsets
|
# non-contiguous diags + offsets
|
||||||
yield (make_diags((3, 4), noncontiguous=True), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
|
yield (make_diags((3, 4), noncontiguous=True), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
|
||||||
# correct dimensionality, 2d, 2d , and shapes match, but the number of diagonals is zero
|
# correct dimensionality, 2d, 2d , and shapes match, but the number of diagonals is zero
|
||||||
yield (make_diags((0, 3)), make_offsets([]), (3, 3))
|
yield (make_diags((0, 3)), make_offsets([]), (3, 3))
|
||||||
|
|
@ -4624,7 +4624,7 @@ class TestSparseAny(TestCase):
|
||||||
|
|
||||||
# However, invariants check can be disabled via
|
# However, invariants check can be disabled via
|
||||||
# constructor's optional argument so that the invalid
|
# constructor's optional argument so that the invalid
|
||||||
# tensor is succesfully constructed:
|
# tensor is successfully constructed:
|
||||||
r = create_invalid_tensor(check_invariants=False)
|
r = create_invalid_tensor(check_invariants=False)
|
||||||
self.assertEqual(r.layout, layout)
|
self.assertEqual(r.layout, layout)
|
||||||
|
|
||||||
|
|
@ -4646,7 +4646,7 @@ class TestSparseAny(TestCase):
|
||||||
self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
|
self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
|
||||||
self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
|
self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
|
||||||
|
|
||||||
# Test an attempt to re-use an activate context manager instance
|
# Test an attempt to reuse an activate context manager instance
|
||||||
check_ctx2 = torch.sparse.check_sparse_tensor_invariants(True)
|
check_ctx2 = torch.sparse.check_sparse_tensor_invariants(True)
|
||||||
with check_ctx:
|
with check_ctx:
|
||||||
self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
|
self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
|
||||||
|
|
|
||||||
|
|
@ -2791,7 +2791,7 @@ class TestSparseCSR(TestCase):
|
||||||
raise ValueError("Expected at least one 2D tensor in samples.")
|
raise ValueError("Expected at least one 2D tensor in samples.")
|
||||||
|
|
||||||
for sample in samples:
|
for sample in samples:
|
||||||
# We must skip samples of low dimensionality, we can't covert them to sparsed compressed layouts
|
# We must skip samples of low dimensionality, we can't convert them to sparsed compressed layouts
|
||||||
if sample.input.ndim < 2:
|
if sample.input.ndim < 2:
|
||||||
continue
|
continue
|
||||||
sparse_input = sample.input.to_sparse_csr().requires_grad_(True)
|
sparse_input = sample.input.to_sparse_csr().requires_grad_(True)
|
||||||
|
|
@ -3255,7 +3255,7 @@ class TestSparseCSR(TestCase):
|
||||||
# helpers
|
# helpers
|
||||||
|
|
||||||
def _check_against_scipy_matrix(pt_matrix, dense, blocksize, **kwargs):
|
def _check_against_scipy_matrix(pt_matrix, dense, blocksize, **kwargs):
|
||||||
# scipy has no bsc layout, so we check against the bsr layout of the tranposed dense
|
# scipy has no bsc layout, so we check against the bsr layout of the transposed dense
|
||||||
if layout == torch.sparse_bsc:
|
if layout == torch.sparse_bsc:
|
||||||
sp_matrix = self._construct_sp_matrix(dense.t(), layout=torch.sparse_bsr, blocksize=blocksize[::-1])
|
sp_matrix = self._construct_sp_matrix(dense.t(), layout=torch.sparse_bsr, blocksize=blocksize[::-1])
|
||||||
else:
|
else:
|
||||||
|
|
@ -3272,7 +3272,7 @@ class TestSparseCSR(TestCase):
|
||||||
self.assertEqual(torch.tensor(sp_matrix.indptr, dtype=torch.int64), compressed_indices_mth(pt_matrix))
|
self.assertEqual(torch.tensor(sp_matrix.indptr, dtype=torch.int64), compressed_indices_mth(pt_matrix))
|
||||||
self.assertEqual(torch.tensor(sp_matrix.indices, dtype=torch.int64), plain_indices_mth(pt_matrix))
|
self.assertEqual(torch.tensor(sp_matrix.indices, dtype=torch.int64), plain_indices_mth(pt_matrix))
|
||||||
if layout == torch.sparse_bsc:
|
if layout == torch.sparse_bsc:
|
||||||
# we must tranpose the blocks before comparing
|
# we must transpose the blocks before comparing
|
||||||
self.assertEqual(torch.tensor(sp_matrix.data), pt_matrix.values().transpose(-2, -1))
|
self.assertEqual(torch.tensor(sp_matrix.data), pt_matrix.values().transpose(-2, -1))
|
||||||
else:
|
else:
|
||||||
self.assertEqual(torch.tensor(sp_matrix.data), pt_matrix.values())
|
self.assertEqual(torch.tensor(sp_matrix.data), pt_matrix.values())
|
||||||
|
|
@ -3371,7 +3371,7 @@ class TestSparseCSR(TestCase):
|
||||||
|
|
||||||
# special cases for batched tensors
|
# special cases for batched tensors
|
||||||
if batched:
|
if batched:
|
||||||
# batched sparse tensors need only have the same number of non-zeros in each batch not nessesarily the
|
# batched sparse tensors need only have the same number of non-zeros in each batch not necessarily the
|
||||||
# same sparsity pattern in each batch
|
# same sparsity pattern in each batch
|
||||||
sparse_shape = sparse_sizes[0]
|
sparse_shape = sparse_sizes[0]
|
||||||
hybrid_shape = hybrid_sizes[0]
|
hybrid_shape = hybrid_sizes[0]
|
||||||
|
|
@ -3382,7 +3382,7 @@ class TestSparseCSR(TestCase):
|
||||||
# number of elements/blocks in each batch (total not nnz)
|
# number of elements/blocks in each batch (total not nnz)
|
||||||
batch_mask_shape = sparse_shape
|
batch_mask_shape = sparse_shape
|
||||||
if layout in blocked_layouts:
|
if layout in blocked_layouts:
|
||||||
# if we are blocked the mask is genereated for the block valued elemetns
|
# if we are blocked the mask is generated for the block valued elements
|
||||||
batch_mask_shape = sparse_shape[0] // blocksize[0], sparse_shape[1] // blocksize[1]
|
batch_mask_shape = sparse_shape[0] // blocksize[0], sparse_shape[1] // blocksize[1]
|
||||||
|
|
||||||
# random bool vector w/ length equal to max possible nnz for the sparse_shape
|
# random bool vector w/ length equal to max possible nnz for the sparse_shape
|
||||||
|
|
@ -3815,7 +3815,7 @@ class TestSparseCompressedTritonKernels(TestCase):
|
||||||
input_broadcasted_clone.col_indices(),
|
input_broadcasted_clone.col_indices(),
|
||||||
# For testing `out=` let's make values to have "weird" strides
|
# For testing `out=` let's make values to have "weird" strides
|
||||||
# so that if the kernel modifies values to it's needs, the result
|
# so that if the kernel modifies values to it's needs, the result
|
||||||
# is being compied into out.values.
|
# is being copied into out.values.
|
||||||
input_broadcasted_clone.values().transpose(-3, -2).contiguous().transpose(-3, -2),
|
input_broadcasted_clone.values().transpose(-3, -2).contiguous().transpose(-3, -2),
|
||||||
layout=input_broadcasted_clone.layout,
|
layout=input_broadcasted_clone.layout,
|
||||||
size=input_broadcasted_clone.shape
|
size=input_broadcasted_clone.shape
|
||||||
|
|
@ -3930,7 +3930,7 @@ class TestSparseCompressedTritonKernels(TestCase):
|
||||||
try:
|
try:
|
||||||
result = bsr_scatter_mm(bsr, dense, indices_data=indices_data)
|
result = bsr_scatter_mm(bsr, dense, indices_data=indices_data)
|
||||||
except triton.compiler.OutOfResources:
|
except triton.compiler.OutOfResources:
|
||||||
# ensure that there was at least one succesful test:
|
# ensure that there was at least one successful test:
|
||||||
assert SPLIT_N < SPLIT_N_list[0]
|
assert SPLIT_N < SPLIT_N_list[0]
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -210,7 +210,7 @@ class TestStatelessFunctionalAPI(TestCase):
|
||||||
prev_buffer = module.buffer.clone()
|
prev_buffer = module.buffer.clone()
|
||||||
res = functional_call(module, parameters, x, tie_weights=False)
|
res = functional_call(module, parameters, x, tie_weights=False)
|
||||||
self.assertEqual(x, res)
|
self.assertEqual(x, res)
|
||||||
# check that the weights remain unmodified and were correctly accesed
|
# check that the weights remain unmodified and were correctly accessed
|
||||||
cur_weight = module.l1.weight
|
cur_weight = module.l1.weight
|
||||||
cur_buffer = module.buffer
|
cur_buffer = module.buffer
|
||||||
self.assertEqual(cur_weight, prev_weight)
|
self.assertEqual(cur_weight, prev_weight)
|
||||||
|
|
@ -753,7 +753,7 @@ class TestStatelessFunctionalAPI(TestCase):
|
||||||
res = torch.func.functional_call(mod, (), x)
|
res = torch.func.functional_call(mod, (), x)
|
||||||
self.assertEqual(res, mod(x))
|
self.assertEqual(res, mod(x))
|
||||||
|
|
||||||
# three dictonaries
|
# three dictionaries
|
||||||
a = ({'l1.weight': torch.ones(1, 1)}, {'l1.bias': torch.ones(1)}, {'buffer': torch.zeros(1)})
|
a = ({'l1.weight': torch.ones(1, 1)}, {'l1.bias': torch.ones(1)}, {'buffer': torch.zeros(1)})
|
||||||
res = torch.func.functional_call(mod, a, x)
|
res = torch.func.functional_call(mod, a, x)
|
||||||
self.assertEqual(res, x + 1)
|
self.assertEqual(res, x + 1)
|
||||||
|
|
|
||||||
|
|
@ -423,7 +423,7 @@ class TestSympyInterp(TestCase):
|
||||||
sargs = [sympy.sympify(a) for a in args]
|
sargs = [sympy.sympify(a) for a in args]
|
||||||
sympy_expr = getattr(ReferenceAnalysis, fn)(*symbols)
|
sympy_expr = getattr(ReferenceAnalysis, fn)(*symbols)
|
||||||
ref_r = getattr(ReferenceAnalysis, fn)(*sargs)
|
ref_r = getattr(ReferenceAnalysis, fn)(*sargs)
|
||||||
# Yes, I know this is a longwinded way of saying xreplace; the
|
# Yes, I know this is a long-winded way of saying xreplace; the
|
||||||
# point is to test sympy_interp
|
# point is to test sympy_interp
|
||||||
r = sympy_interp(
|
r = sympy_interp(
|
||||||
ReferenceAnalysis, dict(zip(symbols, sargs)), sympy_expr
|
ReferenceAnalysis, dict(zip(symbols, sargs)), sympy_expr
|
||||||
|
|
|
||||||
|
|
@ -1531,7 +1531,7 @@ class TestTensorCreation(TestCase):
|
||||||
expected = torch.empty(0, 5, dtype=a.dtype, device=device)
|
expected = torch.empty(0, 5, dtype=a.dtype, device=device)
|
||||||
self.assertEqual(c, expected)
|
self.assertEqual(c, expected)
|
||||||
|
|
||||||
# test empty imput
|
# test empty input
|
||||||
a = torch.empty(0, device=device)
|
a = torch.empty(0, device=device)
|
||||||
c1 = torch.combinations(a)
|
c1 = torch.combinations(a)
|
||||||
c2 = torch.combinations(a, with_replacement=True)
|
c2 = torch.combinations(a, with_replacement=True)
|
||||||
|
|
|
||||||
|
|
@ -695,12 +695,12 @@ class TestTensorExprFuser(BaseTestClass):
|
||||||
_atol = 2e-3
|
_atol = 2e-3
|
||||||
_rtol = 1e-5
|
_rtol = 1e-5
|
||||||
if data_type is torch.bfloat16:
|
if data_type is torch.bfloat16:
|
||||||
# Compared to aten logic, NNC coudl save addtional BF16/Fp32 conversion.
|
# Compared to aten logic, NNC could save additional BF16/Fp32 conversion.
|
||||||
# Take d = a + b - c as an example, the aten logic is as follows at
|
# Take d = a + b - c as an example, the aten logic is as follows at
|
||||||
# operator level:
|
# operator level:
|
||||||
# tmp = to_bf16(to_fp32(a) + to_fp32(b))
|
# tmp = to_bf16(to_fp32(a) + to_fp32(b))
|
||||||
# d = to_bf16(to_fp32(tmp) + to_fp32(c))
|
# d = to_bf16(to_fp32(tmp) + to_fp32(c))
|
||||||
# But NNC could fuse the compression and remove the redudant conversions.
|
# But NNC could fuse the compression and remove the redundant conversions.
|
||||||
# The final statement is as follows
|
# The final statement is as follows
|
||||||
# d = to_bf16(to_fp32(a) + to_fp32(b) + to_fp32(c))
|
# d = to_bf16(to_fp32(a) + to_fp32(b) + to_fp32(c))
|
||||||
# Hence, we simulate NNC computation by feeding fp32 tensors and converting
|
# Hence, we simulate NNC computation by feeding fp32 tensors and converting
|
||||||
|
|
|
||||||
|
|
@ -1091,7 +1091,7 @@ class TestTorchDeviceType(TestCase):
|
||||||
small2_expanded = small2.expand(*dims_full)
|
small2_expanded = small2.expand(*dims_full)
|
||||||
|
|
||||||
if small.is_cuda and fn in ['map', 'map2']:
|
if small.is_cuda and fn in ['map', 'map2']:
|
||||||
# map and map2 are not implementd on CUDA tensors
|
# map and map2 are not implemented on CUDA tensors
|
||||||
return
|
return
|
||||||
|
|
||||||
if hasattr(large_expanded, fn):
|
if hasattr(large_expanded, fn):
|
||||||
|
|
@ -2677,7 +2677,7 @@ else:
|
||||||
x.requires_grad = True
|
x.requires_grad = True
|
||||||
d = torch.cdist(x, y)
|
d = torch.cdist(x, y)
|
||||||
d.backward(dist_grad)
|
d.backward(dist_grad)
|
||||||
# Check that the backward passs does not contain invalid
|
# Check that the backward pass does not contain invalid
|
||||||
# values such as nan or inf
|
# values such as nan or inf
|
||||||
assert torch.isfinite(x.grad).all()
|
assert torch.isfinite(x.grad).all()
|
||||||
|
|
||||||
|
|
@ -2709,7 +2709,7 @@ else:
|
||||||
[0, 0, 0],
|
[0, 0, 0],
|
||||||
[1, 2, 3]]))
|
[1, 2, 3]]))
|
||||||
|
|
||||||
# Check that cummulative sum over a zero length dimension doesn't crash on backprop.
|
# Check that cumulative sum over a zero length dimension doesn't crash on backprop.
|
||||||
# Also check that cumsum over other dimensions in a tensor with a zero-length
|
# Also check that cumsum over other dimensions in a tensor with a zero-length
|
||||||
# dimensiuon also works
|
# dimensiuon also works
|
||||||
# Also include a basic suite of similar tests for other bases cases.
|
# Also include a basic suite of similar tests for other bases cases.
|
||||||
|
|
@ -2761,7 +2761,7 @@ else:
|
||||||
[0, 0, 0],
|
[0, 0, 0],
|
||||||
[1, 1, 1]]))
|
[1, 1, 1]]))
|
||||||
|
|
||||||
# Check that cummulative prod over a zero length dimension doesn't crash on backprop.
|
# Check that cumulative prod over a zero length dimension doesn't crash on backprop.
|
||||||
# Also check that cumprod over other dimensions in a tensor with a zero-length
|
# Also check that cumprod over other dimensions in a tensor with a zero-length
|
||||||
# dimensiuon also works
|
# dimensiuon also works
|
||||||
# Also include a basic suite of similar tests for other bases cases.
|
# Also include a basic suite of similar tests for other bases cases.
|
||||||
|
|
@ -3806,7 +3806,7 @@ else:
|
||||||
# Test for parallel adds with accumulate == True
|
# Test for parallel adds with accumulate == True
|
||||||
low_precision = dtype == torch.half or dtype == torch.bfloat16
|
low_precision = dtype == torch.half or dtype == torch.bfloat16
|
||||||
# Less numbers to avoid overflow with low_precision
|
# Less numbers to avoid overflow with low_precision
|
||||||
# Grainsize is 3000 for the for_loop to be parallized on CPU
|
# Grainsize is 3000 for the for_loop to be parallelized on CPU
|
||||||
sizes = ((100,)) if low_precision else ((200,), (3002,))
|
sizes = ((100,)) if low_precision else ((200,), (3002,))
|
||||||
# Bfloat16 has a particularly bad performance here
|
# Bfloat16 has a particularly bad performance here
|
||||||
# This operation is nondeterministic on GPU, so we are generous with the rtol
|
# This operation is nondeterministic on GPU, so we are generous with the rtol
|
||||||
|
|
@ -7063,7 +7063,7 @@ class TestTorch(TestCase):
|
||||||
dest.index_add(0, index, source)
|
dest.index_add(0, index, source)
|
||||||
|
|
||||||
def test_linspace_logspace(self):
|
def test_linspace_logspace(self):
|
||||||
# Ensure the output does not require grad regardless of inputs requiring gard or not.
|
# Ensure the output does not require grad regardless of inputs requiring guard or not.
|
||||||
# The output of factory functions should not be part of any computational graph.
|
# The output of factory functions should not be part of any computational graph.
|
||||||
start = 0.0
|
start = 0.0
|
||||||
end = 3.0
|
end = 3.0
|
||||||
|
|
@ -8700,7 +8700,7 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
|
||||||
self.assertEqual(2 * size, (1, 2, 3, 1, 2, 3))
|
self.assertEqual(2 * size, (1, 2, 3, 1, 2, 3))
|
||||||
|
|
||||||
def test_Size_concat_non_tuple_sequence(self):
|
def test_Size_concat_non_tuple_sequence(self):
|
||||||
# check that TypeError get's raised on adding non-tuple sequences.
|
# check that TypeError gets raised on adding non-tuple sequences.
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
|
||||||
class DummySequence(Sequence):
|
class DummySequence(Sequence):
|
||||||
|
|
@ -11104,7 +11104,7 @@ def add_neg_dim_tests():
|
||||||
assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name
|
assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name
|
||||||
setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim))
|
setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim))
|
||||||
|
|
||||||
# TODO: these empy classes are temporarily instantiated for XLA compatibility
|
# TODO: these empty classes are temporarily instantiated for XLA compatibility
|
||||||
# once XLA updates their test suite it should be removed
|
# once XLA updates their test suite it should be removed
|
||||||
class TestViewOps(TestCase):
|
class TestViewOps(TestCase):
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ def _check_equal(
|
||||||
"""
|
"""
|
||||||
Compare test tensor against golden and reference tensors.
|
Compare test tensor against golden and reference tensors.
|
||||||
Golden is the highest precision possible serving as the "ground truth"
|
Golden is the highest precision possible serving as the "ground truth"
|
||||||
Refernce is the same precision as test and should also serve as less precisie ground truth.
|
Reference is the same precision as test and should also serve as less precisie ground truth.
|
||||||
We calcculate the "reference error" by comparing the golden to reference and use this as the
|
We calcculate the "reference error" by comparing the golden to reference and use this as the
|
||||||
measruing stick for the test tensor.
|
measruing stick for the test tensor.
|
||||||
|
|
||||||
|
|
@ -1693,7 +1693,7 @@ class TestSDPAFailureModes(NNTestCase):
|
||||||
@onlyCUDA
|
@onlyCUDA
|
||||||
@unittest.skipIf(not PLATFORM_SUPPORTS_FLASH_ATTENTION, "Does not support fused SDPA or pre-SM80 hardware")
|
@unittest.skipIf(not PLATFORM_SUPPORTS_FLASH_ATTENTION, "Does not support fused SDPA or pre-SM80 hardware")
|
||||||
def test_unaligned_tensors(self, device):
|
def test_unaligned_tensors(self, device):
|
||||||
# The alignment is depdent on arch so we specifiy SM80OrLater
|
# The alignment is dependent on arch so we specify SM80OrLater
|
||||||
dtype = torch.float16
|
dtype = torch.float16
|
||||||
size = SdpaShape(2, 2, 8, 5)
|
size = SdpaShape(2, 2, 8, 5)
|
||||||
make_tensor = partial(torch.rand, size, device=device, dtype=dtype)
|
make_tensor = partial(torch.rand, size, device=device, dtype=dtype)
|
||||||
|
|
@ -3042,7 +3042,7 @@ class TestSDPACudaOnly(NNTestCase):
|
||||||
|
|
||||||
# Cast up and compare
|
# Cast up and compare
|
||||||
# Since we are doing the compute on fp16 we have to bump the tolerance
|
# Since we are doing the compute on fp16 we have to bump the tolerance
|
||||||
# Bump down the tolearnce for blfoat16
|
# Bump down the tolerance for blfoat16
|
||||||
atol = 7e-4 if dtype == torch.float16 else 7e-3
|
atol = 7e-4 if dtype == torch.float16 else 7e-3
|
||||||
rtol = 7e-4 if dtype == torch.float16 else 7e-3
|
rtol = 7e-4 if dtype == torch.float16 else 7e-3
|
||||||
if TEST_WITH_ROCM:
|
if TEST_WITH_ROCM:
|
||||||
|
|
@ -3525,7 +3525,7 @@ class TestSDPACudaOnly(NNTestCase):
|
||||||
query, key, value, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa)
|
query, key, value, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa)
|
||||||
else:
|
else:
|
||||||
# Problem: We pad sizes in the composite region of the top level SDPA. But we need the
|
# Problem: We pad sizes in the composite region of the top level SDPA. But we need the
|
||||||
# Debug mask when have dropout. So I am going to manualy pad up here when testing dropout
|
# Debug mask when have dropout. So I am going to manually pad up here when testing dropout
|
||||||
q_padded, q_og_size = pad_last_dim(query, 8)
|
q_padded, q_og_size = pad_last_dim(query, 8)
|
||||||
k_padded, k_og_size = pad_last_dim(key, 8)
|
k_padded, k_og_size = pad_last_dim(key, 8)
|
||||||
v_padded, v_og_size = pad_last_dim(value, 8)
|
v_padded, v_og_size = pad_last_dim(value, 8)
|
||||||
|
|
|
||||||
|
|
@ -1052,7 +1052,7 @@ class TestTypePromotion(TestCase):
|
||||||
torch.cat([x, y], out=out)
|
torch.cat([x, y], out=out)
|
||||||
self.assertEqual(out, expected_out, exact_dtype=True)
|
self.assertEqual(out, expected_out, exact_dtype=True)
|
||||||
|
|
||||||
# Verfies that unary ops require matching out types
|
# Verifies that unary ops require matching out types
|
||||||
@onlyNativeDeviceTypes
|
@onlyNativeDeviceTypes
|
||||||
@dtypes(*itertools.product((torch.int64,
|
@dtypes(*itertools.product((torch.int64,
|
||||||
torch.float32, torch.float64,
|
torch.float32, torch.float64,
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ CACHE_DIR = os.path.join(DATA_DIR, ".mypy_cache")
|
||||||
|
|
||||||
|
|
||||||
def _key_func(key: str) -> str:
|
def _key_func(key: str) -> str:
|
||||||
"""Split at the first occurance of the ``:`` character.
|
"""Split at the first occurrence of the ``:`` character.
|
||||||
|
|
||||||
Windows drive-letters (*e.g.* ``C:``) are ignored herein.
|
Windows drive-letters (*e.g.* ``C:``) are ignored herein.
|
||||||
"""
|
"""
|
||||||
|
|
@ -135,7 +135,7 @@ def _parse_reveals(file: IO[str]) -> list[str]:
|
||||||
comments = "/n".join(comments_array)
|
comments = "/n".join(comments_array)
|
||||||
|
|
||||||
# Only search for the `{*}` pattern within comments,
|
# Only search for the `{*}` pattern within comments,
|
||||||
# otherwise there is the risk of accidently grabbing dictionaries and sets
|
# otherwise there is the risk of accidentally grabbing dictionaries and sets
|
||||||
key_set = set(re.findall(r"\{(.*?)\}", comments))
|
key_set = set(re.findall(r"\{(.*?)\}", comments))
|
||||||
kwargs = {
|
kwargs = {
|
||||||
k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {k!r}>") for k in key_set
|
k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {k!r}>") for k in key_set
|
||||||
|
|
|
||||||
|
|
@ -1080,7 +1080,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
def test_silu_complex(self, device, dtype):
|
def test_silu_complex(self, device, dtype):
|
||||||
atol = 1e-6
|
atol = 1e-6
|
||||||
rtol = 1e-6
|
rtol = 1e-6
|
||||||
inouts = [
|
inp_outs = [
|
||||||
(0.2 + 0.3j, 0.08775215595960617065 + 0.18024823069572448730j),
|
(0.2 + 0.3j, 0.08775215595960617065 + 0.18024823069572448730j),
|
||||||
(1e-19 + 1e-18j, 4.99999984132761269448e-20 + 5.00000022906852482872e-19j),
|
(1e-19 + 1e-18j, 4.99999984132761269448e-20 + 5.00000022906852482872e-19j),
|
||||||
(-1.0 + 2.0j, -0.78546208143234252930 + -0.44626939296722412109j),
|
(-1.0 + 2.0j, -0.78546208143234252930 + -0.44626939296722412109j),
|
||||||
|
|
@ -1088,7 +1088,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
(2.0j, -1.55740761756896972656 + 0.99999988079071044922j),
|
(2.0j, -1.55740761756896972656 + 0.99999988079071044922j),
|
||||||
]
|
]
|
||||||
|
|
||||||
for inp, out in inouts:
|
for inp, out in inp_outs:
|
||||||
res = torch.nn.functional.silu(
|
res = torch.nn.functional.silu(
|
||||||
torch.tensor(inp, dtype=dtype, device=device)
|
torch.tensor(inp, dtype=dtype, device=device)
|
||||||
)
|
)
|
||||||
|
|
@ -1096,7 +1096,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
self.assertEqual(res.real, out.real, atol=atol, rtol=rtol)
|
self.assertEqual(res.real, out.real, atol=atol, rtol=rtol)
|
||||||
self.assertEqual(res.imag, out.imag, atol=atol, rtol=rtol)
|
self.assertEqual(res.imag, out.imag, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
for inp, out in inouts:
|
for inp, out in inp_outs:
|
||||||
res = torch.nn.functional.silu(
|
res = torch.nn.functional.silu(
|
||||||
torch.tensor(inp, dtype=dtype, device=device), inplace=True
|
torch.tensor(inp, dtype=dtype, device=device), inplace=True
|
||||||
)
|
)
|
||||||
|
|
@ -1170,7 +1170,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
# Not using numpy's log1p here because by the time of writing this,
|
# Not using numpy's log1p here because by the time of writing this,
|
||||||
# np.log1p has precision problems for small complex input values, see here:
|
# np.log1p has precision problems for small complex input values, see here:
|
||||||
# https://github.com/numpy/numpy/issues/22609
|
# https://github.com/numpy/numpy/issues/22609
|
||||||
inouts = [
|
inp_outs = [
|
||||||
(0.2 + 0.3j, 0.21263386770217202 + 0.24497866312686414j),
|
(0.2 + 0.3j, 0.21263386770217202 + 0.24497866312686414j),
|
||||||
(1e-19 + 1e-18j, 1e-19 + 1e-18j),
|
(1e-19 + 1e-18j, 1e-19 + 1e-18j),
|
||||||
(1e-18 + 0.1j, 0.00497517 + 0.0996687j),
|
(1e-18 + 0.1j, 0.00497517 + 0.0996687j),
|
||||||
|
|
@ -1184,7 +1184,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
]
|
]
|
||||||
# test the extreme values
|
# test the extreme values
|
||||||
if dtype == torch.complex128:
|
if dtype == torch.complex128:
|
||||||
inouts += [
|
inp_outs += [
|
||||||
(-1 + 1e250j, 575.6462732485114 + 1.5707963267948966j),
|
(-1 + 1e250j, 575.6462732485114 + 1.5707963267948966j),
|
||||||
(1e250 + 1j, 575.6462732485114 + 1e-250j),
|
(1e250 + 1j, 575.6462732485114 + 1e-250j),
|
||||||
(1e250 + 1e250j, 575.9928468387914 + 0.7853981633974483j),
|
(1e250 + 1e250j, 575.9928468387914 + 0.7853981633974483j),
|
||||||
|
|
@ -1193,7 +1193,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
(1e250 + 1e-250j, 575.6462732485114 + 0.0j),
|
(1e250 + 1e-250j, 575.6462732485114 + 0.0j),
|
||||||
]
|
]
|
||||||
elif dtype == torch.complex64:
|
elif dtype == torch.complex64:
|
||||||
inouts += [
|
inp_outs += [
|
||||||
(-1 + 1e30j, 69.07755278982137 + 1.5707963267948966j),
|
(-1 + 1e30j, 69.07755278982137 + 1.5707963267948966j),
|
||||||
(1e30 + 1j, 69.07755278982137 + 1e-30j),
|
(1e30 + 1j, 69.07755278982137 + 1e-30j),
|
||||||
(1e30 + 1e30j, 69.42412638010134 + 0.7853981633974483j),
|
(1e30 + 1e30j, 69.42412638010134 + 0.7853981633974483j),
|
||||||
|
|
@ -1203,7 +1203,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
]
|
]
|
||||||
|
|
||||||
# test the log1p individually
|
# test the log1p individually
|
||||||
for inp, out in inouts:
|
for inp, out in inp_outs:
|
||||||
res = torch.log1p(torch.tensor(inp, dtype=dtype, device=device))
|
res = torch.log1p(torch.tensor(inp, dtype=dtype, device=device))
|
||||||
self.assertFalse(torch.any(torch.isnan(res)))
|
self.assertFalse(torch.any(torch.isnan(res)))
|
||||||
# setting up atol == 0.0 because some part has very small values
|
# setting up atol == 0.0 because some part has very small values
|
||||||
|
|
@ -1211,7 +1211,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
self.assertEqual(res.imag, out.imag, atol=0.0, rtol=1e-6)
|
self.assertEqual(res.imag, out.imag, atol=0.0, rtol=1e-6)
|
||||||
|
|
||||||
# test the log1p in tensor
|
# test the log1p in tensor
|
||||||
inp_lst, out_lst = (list(elmt) for elmt in zip(*inouts))
|
inp_lst, out_lst = (list(elmt) for elmt in zip(*inp_outs))
|
||||||
inp_tens = torch.tensor(inp_lst, dtype=dtype, device=device)
|
inp_tens = torch.tensor(inp_lst, dtype=dtype, device=device)
|
||||||
out_tens = torch.tensor(out_lst, dtype=dtype, device=device)
|
out_tens = torch.tensor(out_lst, dtype=dtype, device=device)
|
||||||
res_tens = torch.log1p(inp_tens)
|
res_tens = torch.log1p(inp_tens)
|
||||||
|
|
@ -1292,7 +1292,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
zero_to_large = torch.tensor([0.0, 1.0, 1e3], **tkwargs)
|
zero_to_large = torch.tensor([0.0, 1.0, 1e3], **tkwargs)
|
||||||
small_to_inf = torch.tensor([1e-3, 1.0, float("inf")], **tkwargs)
|
small_to_inf = torch.tensor([1e-3, 1.0, float("inf")], **tkwargs)
|
||||||
nans = torch.zeros((3,), **tkwargs) + float("nan")
|
nans = torch.zeros((3,), **tkwargs) + float("nan")
|
||||||
inpouts = [
|
inp_outs = [
|
||||||
# (a , x), out
|
# (a , x), out
|
||||||
((zeros, small_to_inf), ones),
|
((zeros, small_to_inf), ones),
|
||||||
((small_to_inf, zeros), zeros),
|
((small_to_inf, zeros), zeros),
|
||||||
|
|
@ -1302,7 +1302,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
((infs, infs), nans),
|
((infs, infs), nans),
|
||||||
((-small_to_inf, small_to_inf), nans),
|
((-small_to_inf, small_to_inf), nans),
|
||||||
]
|
]
|
||||||
for inputs, output in inpouts:
|
for inputs, output in inp_outs:
|
||||||
input0, input1 = inputs
|
input0, input1 = inputs
|
||||||
calc = torch.igamma(input0, input1)
|
calc = torch.igamma(input0, input1)
|
||||||
if torch.all(torch.isnan(output)):
|
if torch.all(torch.isnan(output)):
|
||||||
|
|
@ -1321,7 +1321,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
zero_to_large = torch.tensor([0.0, 1.0, 1e3], **tkwargs)
|
zero_to_large = torch.tensor([0.0, 1.0, 1e3], **tkwargs)
|
||||||
small_to_inf = torch.tensor([1e-3, 1.0, float("inf")], **tkwargs)
|
small_to_inf = torch.tensor([1e-3, 1.0, float("inf")], **tkwargs)
|
||||||
nans = torch.zeros((3,), **tkwargs) + float("nan")
|
nans = torch.zeros((3,), **tkwargs) + float("nan")
|
||||||
inpouts = [
|
inp_outs = [
|
||||||
# (a , x), out
|
# (a , x), out
|
||||||
((zeros, small_to_inf), zeros),
|
((zeros, small_to_inf), zeros),
|
||||||
((small_to_inf, zeros), ones),
|
((small_to_inf, zeros), ones),
|
||||||
|
|
@ -1331,7 +1331,7 @@ class TestUnaryUfuncs(TestCase):
|
||||||
((infs, infs), nans),
|
((infs, infs), nans),
|
||||||
((-small_to_inf, small_to_inf), nans),
|
((-small_to_inf, small_to_inf), nans),
|
||||||
]
|
]
|
||||||
for inputs, output in inpouts:
|
for inputs, output in inp_outs:
|
||||||
input0, input1 = inputs
|
input0, input1 = inputs
|
||||||
calc = torch.igammac(input0, input1)
|
calc = torch.igammac(input0, input1)
|
||||||
if torch.all(torch.isnan(output)):
|
if torch.all(torch.isnan(output)):
|
||||||
|
|
|
||||||
|
|
@ -1955,7 +1955,7 @@ class TestOldViewOps(TestCase):
|
||||||
with self.assertRaises(numpy_err, msg=msg):
|
with self.assertRaises(numpy_err, msg=msg):
|
||||||
np.array_split(a.cpu().numpy(), sections_or_indices, dim)
|
np.array_split(a.cpu().numpy(), sections_or_indices, dim)
|
||||||
|
|
||||||
# addtional tests for tensor_split with tensor_indices_or_sections
|
# additional tests for tensor_split with tensor_indices_or_sections
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegex(
|
||||||
RuntimeError,
|
RuntimeError,
|
||||||
r"tensor_split expected tensor_indices_or_sections to have dtype of long, but got Float",
|
r"tensor_split expected tensor_indices_or_sections to have dtype of long, but got Float",
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,7 @@ class WeakTest(TestCase):
|
||||||
self.assertRaises(KeyError, d.__delitem__, o)
|
self.assertRaises(KeyError, d.__delitem__, o)
|
||||||
self.assertRaises(KeyError, d.__getitem__, o)
|
self.assertRaises(KeyError, d.__getitem__, o)
|
||||||
|
|
||||||
# If a key isn't of a weakly referencable type, __getitem__ and
|
# If a key isn't of a weakly referenceable type, __getitem__ and
|
||||||
# __setitem__ raise TypeError. __delitem__ should too.
|
# __setitem__ raise TypeError. __delitem__ should too.
|
||||||
self.assertRaises(TypeError, d.__delitem__, 13)
|
self.assertRaises(TypeError, d.__delitem__, 13)
|
||||||
self.assertRaises(TypeError, d.__getitem__, 13)
|
self.assertRaises(TypeError, d.__getitem__, 13)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user