pytorch/test/test_testing.py
Jane Xu 09ce9b5877 Store test file in S3 as well for every TestSuite (#52869)
Summary:
We want to store the file names that triggers each test suite so that we can use this data for categorizing those test files.

~~After considering several solutions, this one is the most backwards compatible, and the current test cases in test_testing.py for print test stats don't break.~~

The previous plan did not work, as there are multiple Python test jobs that spawn the same suites. Instead, the new S3 format will store test files (e.g., `test_nn` and `distributed/test_distributed_fork`) which will contain the suites they spawn, which will contain the test cases run within the suite. (Currently, there is no top layer of test files.)

Because of this major structural change, a lot of changes have now been made (thank you samestep!) to test_history.py and print_test_stats.py to make this new format backwards compatible.

Old test plan:
Make sure that the data is as expected in S3 after https://github.com/pytorch/pytorch/pull/52873 finishes.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/52869

Test Plan: Added tests to test_testing.py which pass, and CI.

Reviewed By: samestep

Differential Revision: D26672561

Pulled By: janeyx99

fbshipit-source-id: f46b91e16c1d9de5e0cb9bfa648b6448d979257e
2021-03-02 07:36:00 -08:00

1310 lines
48 KiB
Python

import torch
import math
from pathlib import PurePosixPath
from torch.testing._internal.common_utils import \
(TestCase, make_tensor, run_tests, slowTest)
from torch.testing._internal.common_device_type import \
(instantiate_device_type_tests, onlyCUDA, onlyOnCPUAndCUDA, dtypes)
from torch.testing._internal import mypy_wrapper
from torch.testing._internal import print_test_stats
# For testing TestCase methods and torch.testing functions
class TestTesting(TestCase):
# Ensure that assertEqual handles numpy arrays properly
@dtypes(*(torch.testing.get_all_dtypes(include_half=True, include_bfloat16=False,
include_bool=True, include_complex=True)))
def test_assertEqual_numpy(self, device, dtype):
S = 10
test_sizes = [
(),
(0,),
(S,),
(S, S),
(0, S),
(S, 0)]
for test_size in test_sizes:
a = make_tensor(test_size, device, dtype, low=-5, high=5)
a_n = a.cpu().numpy()
msg = f'size: {test_size}'
self.assertEqual(a_n, a, rtol=0, atol=0, msg=msg)
self.assertEqual(a, a_n, rtol=0, atol=0, msg=msg)
self.assertEqual(a_n, a_n, rtol=0, atol=0, msg=msg)
# Tests that when rtol or atol (including self.precision) is set, then
# the other is zeroed.
# TODO: this is legacy behavior and should be updated after test
# precisions are reviewed to be consistent with torch.isclose.
@onlyOnCPUAndCUDA
def test__comparetensors_legacy(self, device):
a = torch.tensor((10000000.,))
b = torch.tensor((10000002.,))
x = torch.tensor((1.,))
y = torch.tensor((1. + 1e-5,))
# Helper for reusing the tensor values as scalars
def _scalar_helper(a, b, rtol=None, atol=None):
return self._compareScalars(a.item(), b.item(), rtol=rtol, atol=atol)
for op in (self._compareTensors, _scalar_helper):
# Tests default
result, debug_msg = op(a, b)
self.assertTrue(result)
# Tests setting atol
result, debug_msg = op(a, b, atol=2, rtol=0)
self.assertTrue(result)
# Tests setting atol too small
result, debug_msg = op(a, b, atol=1, rtol=0)
self.assertFalse(result)
# Tests setting rtol too small
result, debug_msg = op(x, y, atol=0, rtol=1.05e-5)
self.assertTrue(result)
# Tests setting rtol too small
result, debug_msg = op(x, y, atol=0, rtol=1e-5)
self.assertFalse(result)
@onlyOnCPUAndCUDA
def test__comparescalars_debug_msg(self, device):
# float x float
result, debug_msg = self._compareScalars(4., 7.)
expected_msg = ("Comparing 4.0 and 7.0 gives a difference of 3.0, "
"but the allowed difference with rtol=1.3e-06 and "
"atol=1e-05 is only 1.9100000000000003e-05!")
self.assertEqual(debug_msg, expected_msg)
# complex x complex, real difference
result, debug_msg = self._compareScalars(complex(1, 3), complex(3, 1))
expected_msg = ("Comparing the real part 1.0 and 3.0 gives a difference "
"of 2.0, but the allowed difference with rtol=1.3e-06 "
"and atol=1e-05 is only 1.39e-05!")
self.assertEqual(debug_msg, expected_msg)
# complex x complex, imaginary difference
result, debug_msg = self._compareScalars(complex(1, 3), complex(1, 5.5))
expected_msg = ("Comparing the imaginary part 3.0 and 5.5 gives a "
"difference of 2.5, but the allowed difference with "
"rtol=1.3e-06 and atol=1e-05 is only 1.715e-05!")
self.assertEqual(debug_msg, expected_msg)
# complex x int
result, debug_msg = self._compareScalars(complex(1, -2), 1)
expected_msg = ("Comparing the imaginary part -2.0 and 0.0 gives a "
"difference of 2.0, but the allowed difference with "
"rtol=1.3e-06 and atol=1e-05 is only 1e-05!")
self.assertEqual(debug_msg, expected_msg)
# NaN x NaN, equal_nan=False
result, debug_msg = self._compareScalars(float('nan'), float('nan'), equal_nan=False)
expected_msg = ("Found nan and nan while comparing and either one is "
"nan and the other isn't, or both are nan and equal_nan "
"is False")
self.assertEqual(debug_msg, expected_msg)
# Checks that compareTensors provides the correct debug info
@onlyOnCPUAndCUDA
def test__comparetensors_debug_msg(self, device):
# Acquires atol that will be used
atol = max(1e-05, self.precision)
# Checks float tensor comparisons (2D tensor)
a = torch.tensor(((0, 6), (7, 9)), device=device, dtype=torch.float32)
b = torch.tensor(((0, 7), (7, 22)), device=device, dtype=torch.float32)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("With rtol=1.3e-06 and atol={0}, found 2 element(s) (out of 4) "
"whose difference(s) exceeded the margin of error (including 0 nan comparisons). "
"The greatest difference was 13.0 (9.0 vs. 22.0), "
"which occurred at index (1, 1).").format(atol)
self.assertEqual(debug_msg, expected_msg)
# Checks float tensor comparisons (with extremal values)
a = torch.tensor((float('inf'), 5, float('inf')), device=device, dtype=torch.float32)
b = torch.tensor((float('inf'), float('nan'), float('-inf')), device=device, dtype=torch.float32)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("With rtol=1.3e-06 and atol={0}, found 2 element(s) (out of 3) "
"whose difference(s) exceeded the margin of error (including 1 nan comparisons). "
"The greatest difference was nan (5.0 vs. nan), "
"which occurred at index 1.").format(atol)
self.assertEqual(debug_msg, expected_msg)
# Checks float tensor comparisons (with finite vs nan differences)
a = torch.tensor((20, -6), device=device, dtype=torch.float32)
b = torch.tensor((-1, float('nan')), device=device, dtype=torch.float32)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("With rtol=1.3e-06 and atol={0}, found 2 element(s) (out of 2) "
"whose difference(s) exceeded the margin of error (including 1 nan comparisons). "
"The greatest difference was nan (-6.0 vs. nan), "
"which occurred at index 1.").format(atol)
self.assertEqual(debug_msg, expected_msg)
# Checks int tensor comparisons (1D tensor)
a = torch.tensor((1, 2, 3, 4), device=device)
b = torch.tensor((2, 5, 3, 4), device=device)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("Found 2 different element(s) (out of 4), "
"with the greatest difference of 3 (2 vs. 5) "
"occuring at index 1.")
self.assertEqual(debug_msg, expected_msg)
# Checks bool tensor comparisons (0D tensor)
a = torch.tensor((True), device=device)
b = torch.tensor((False), device=device)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("Found 1 different element(s) (out of 1), "
"with the greatest difference of 1 (1 vs. 0) "
"occuring at index 0.")
self.assertEqual(debug_msg, expected_msg)
# Checks complex tensor comparisons (real part)
a = torch.tensor((1 - 1j, 4 + 3j), device=device)
b = torch.tensor((1 - 1j, 1 + 3j), device=device)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("Real parts failed to compare as equal! "
"With rtol=1.3e-06 and atol={0}, "
"found 1 element(s) (out of 2) whose difference(s) exceeded the "
"margin of error (including 0 nan comparisons). The greatest difference was "
"3.0 (4.0 vs. 1.0), which occurred at index 1.").format(atol)
self.assertEqual(debug_msg, expected_msg)
# Checks complex tensor comparisons (imaginary part)
a = torch.tensor((1 - 1j, 4 + 3j), device=device)
b = torch.tensor((1 - 1j, 4 - 21j), device=device)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("Imaginary parts failed to compare as equal! "
"With rtol=1.3e-06 and atol={0}, "
"found 1 element(s) (out of 2) whose difference(s) exceeded the "
"margin of error (including 0 nan comparisons). The greatest difference was "
"24.0 (3.0 vs. -21.0), which occurred at index 1.").format(atol)
self.assertEqual(debug_msg, expected_msg)
# Checks size mismatch
a = torch.tensor((1, 2), device=device)
b = torch.tensor((3), device=device)
result, debug_msg = self._compareTensors(a, b)
expected_msg = ("Attempted to compare equality of tensors "
"with different sizes. Got sizes torch.Size([2]) and torch.Size([]).")
self.assertEqual(debug_msg, expected_msg)
# Checks dtype mismatch
a = torch.tensor((1, 2), device=device, dtype=torch.long)
b = torch.tensor((1, 2), device=device, dtype=torch.float32)
result, debug_msg = self._compareTensors(a, b, exact_dtype=True)
expected_msg = ("Attempted to compare equality of tensors "
"with different dtypes. Got dtypes torch.int64 and torch.float32.")
self.assertEqual(debug_msg, expected_msg)
# Checks device mismatch
if self.device_type == 'cuda':
a = torch.tensor((5), device='cpu')
b = torch.tensor((5), device=device)
result, debug_msg = self._compareTensors(a, b, exact_device=True)
expected_msg = ("Attempted to compare equality of tensors "
"on different devices! Got devices cpu and cuda:0.")
self.assertEqual(debug_msg, expected_msg)
# Helper for testing _compareTensors and _compareScalars
# Works on single element tensors
def _comparetensors_helper(self, tests, device, dtype, equal_nan, exact_dtype=True, atol=1e-08, rtol=1e-05):
for test in tests:
a = torch.tensor((test[0],), device=device, dtype=dtype)
b = torch.tensor((test[1],), device=device, dtype=dtype)
# Tensor x Tensor comparison
compare_result, debug_msg = self._compareTensors(a, b, rtol=rtol, atol=atol,
equal_nan=equal_nan,
exact_dtype=exact_dtype)
self.assertEqual(compare_result, test[2])
# Scalar x Scalar comparison
compare_result, debug_msg = self._compareScalars(a.item(), b.item(),
rtol=rtol, atol=atol,
equal_nan=equal_nan)
self.assertEqual(compare_result, test[2])
def _isclose_helper(self, tests, device, dtype, equal_nan, atol=1e-08, rtol=1e-05):
for test in tests:
a = torch.tensor((test[0],), device=device, dtype=dtype)
b = torch.tensor((test[1],), device=device, dtype=dtype)
actual = torch.isclose(a, b, equal_nan=equal_nan, atol=atol, rtol=rtol)
expected = test[2]
self.assertEqual(actual.item(), expected)
# torch.close is not implemented for bool tensors
# see https://github.com/pytorch/pytorch/issues/33048
def test_isclose_comparetensors_bool(self, device):
tests = (
(True, True, True),
(False, False, True),
(True, False, False),
(False, True, False),
)
with self.assertRaises(RuntimeError):
self._isclose_helper(tests, device, torch.bool, False)
self._comparetensors_helper(tests, device, torch.bool, False)
@dtypes(torch.uint8,
torch.int8, torch.int16, torch.int32, torch.int64)
def test_isclose_comparetensors_integer(self, device, dtype):
tests = (
(0, 0, True),
(0, 1, False),
(1, 0, False),
)
self._isclose_helper(tests, device, dtype, False)
# atol and rtol tests
tests = [
(0, 1, True),
(1, 0, False),
(1, 3, True),
]
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
if dtype is torch.uint8:
tests = [
(-1, 1, False),
(1, -1, False)
]
else:
tests = [
(-1, 1, True),
(1, -1, True)
]
self._isclose_helper(tests, device, dtype, False, atol=1.5, rtol=.5)
self._comparetensors_helper(tests, device, dtype, False, atol=1.5, rtol=.5)
@onlyOnCPUAndCUDA
@dtypes(torch.float16, torch.float32, torch.float64)
def test_isclose_comparetensors_float(self, device, dtype):
tests = (
(0, 0, True),
(0, -1, False),
(float('inf'), float('inf'), True),
(-float('inf'), float('inf'), False),
(float('inf'), float('nan'), False),
(float('nan'), float('nan'), False),
(0, float('nan'), False),
(1, 1, True),
)
self._isclose_helper(tests, device, dtype, False)
self._comparetensors_helper(tests, device, dtype, False)
# atol and rtol tests
eps = 1e-2 if dtype is torch.half else 1e-6
tests = (
(0, 1, True),
(0, 1 + eps, False),
(1, 0, False),
(1, 3, True),
(1 - eps, 3, False),
(-.25, .5, True),
(-.25 - eps, .5, False),
(.25, -.5, True),
(.25 + eps, -.5, False),
)
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
# equal_nan = True tests
tests = (
(0, float('nan'), False),
(float('inf'), float('nan'), False),
(float('nan'), float('nan'), True),
)
self._isclose_helper(tests, device, dtype, True)
self._comparetensors_helper(tests, device, dtype, True)
# torch.close with equal_nan=True is not implemented for complex inputs
# see https://github.com/numpy/numpy/issues/15959
# Note: compareTensor will compare the real and imaginary parts of a
# complex tensors separately, unlike isclose.
@dtypes(torch.complex64, torch.complex128)
def test_isclose_comparetensors_complex(self, device, dtype):
tests = (
(complex(1, 1), complex(1, 1 + 1e-8), True),
(complex(0, 1), complex(1, 1), False),
(complex(1, 1), complex(1, 0), False),
(complex(1, 1), complex(1, float('nan')), False),
(complex(1, float('nan')), complex(1, float('nan')), False),
(complex(1, 1), complex(1, float('inf')), False),
(complex(float('inf'), 1), complex(1, float('inf')), False),
(complex(-float('inf'), 1), complex(1, float('inf')), False),
(complex(-float('inf'), 1), complex(float('inf'), 1), False),
(complex(float('inf'), 1), complex(float('inf'), 1), True),
(complex(float('inf'), 1), complex(float('inf'), 1 + 1e-4), False),
)
self._isclose_helper(tests, device, dtype, False)
self._comparetensors_helper(tests, device, dtype, False)
# atol and rtol tests
# atol and rtol tests
eps = 1e-6
tests = (
# Complex versions of float tests (real part)
(complex(0, 0), complex(1, 0), True),
(complex(0, 0), complex(1 + eps, 0), False),
(complex(1, 0), complex(0, 0), False),
(complex(1, 0), complex(3, 0), True),
(complex(1 - eps, 0), complex(3, 0), False),
(complex(-.25, 0), complex(.5, 0), True),
(complex(-.25 - eps, 0), complex(.5, 0), False),
(complex(.25, 0), complex(-.5, 0), True),
(complex(.25 + eps, 0), complex(-.5, 0), False),
# Complex versions of float tests (imaginary part)
(complex(0, 0), complex(0, 1), True),
(complex(0, 0), complex(0, 1 + eps), False),
(complex(0, 1), complex(0, 0), False),
(complex(0, 1), complex(0, 3), True),
(complex(0, 1 - eps), complex(0, 3), False),
(complex(0, -.25), complex(0, .5), True),
(complex(0, -.25 - eps), complex(0, .5), False),
(complex(0, .25), complex(0, -.5), True),
(complex(0, .25 + eps), complex(0, -.5), False),
)
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
# atol and rtol tests for isclose
tests = (
# Complex-specific tests
(complex(1, -1), complex(-1, 1), False),
(complex(1, -1), complex(2, -2), True),
(complex(-math.sqrt(2), math.sqrt(2)),
complex(-math.sqrt(.5), math.sqrt(.5)), True),
(complex(-math.sqrt(2), math.sqrt(2)),
complex(-math.sqrt(.501), math.sqrt(.499)), False),
(complex(2, 4), complex(1., 8.8523607), True),
(complex(2, 4), complex(1., 8.8523607 + eps), False),
(complex(1, 99), complex(4, 100), True),
)
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
# atol and rtol tests for compareTensors
tests = (
(complex(1, -1), complex(-1, 1), False),
(complex(1, -1), complex(2, -2), True),
(complex(1, 99), complex(4, 100), False),
)
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
# equal_nan = True tests
tests = (
(complex(1, 1), complex(1, float('nan')), False),
(complex(float('nan'), 1), complex(1, float('nan')), False),
(complex(float('nan'), 1), complex(float('nan'), 1), True),
)
with self.assertRaises(RuntimeError):
self._isclose_helper(tests, device, dtype, True)
self._comparetensors_helper(tests, device, dtype, True)
# Tests that isclose with rtol or atol values less than zero throws a
# RuntimeError
@dtypes(torch.bool, torch.uint8,
torch.int8, torch.int16, torch.int32, torch.int64,
torch.float16, torch.float32, torch.float64)
def test_isclose_atol_rtol_greater_than_zero(self, device, dtype):
t = torch.tensor((1,), device=device, dtype=dtype)
with self.assertRaises(RuntimeError):
torch.isclose(t, t, atol=-1, rtol=1)
with self.assertRaises(RuntimeError):
torch.isclose(t, t, atol=1, rtol=-1)
with self.assertRaises(RuntimeError):
torch.isclose(t, t, atol=-1, rtol=-1)
@dtypes(torch.bool, torch.long, torch.float, torch.cfloat)
def test_make_tensor(self, device, dtype):
def check(size, low, high, requires_grad, discontiguous):
t = make_tensor(size, device, dtype, low=low, high=high,
requires_grad=requires_grad, discontiguous=discontiguous)
self.assertEqual(t.shape, size)
self.assertEqual(t.device, torch.device(device))
self.assertEqual(t.dtype, dtype)
low = -9 if low is None else low
high = 9 if high is None else high
if t.numel() > 0 and dtype in [torch.long, torch.float]:
self.assertTrue(t.le(high).logical_and(t.ge(low)).all().item())
if dtype in [torch.float, torch.cfloat]:
self.assertEqual(t.requires_grad, requires_grad)
else:
self.assertFalse(t.requires_grad)
if t.numel() > 1:
self.assertEqual(t.is_contiguous(), not discontiguous)
else:
self.assertTrue(t.is_contiguous())
for size in (tuple(), (0,), (1,), (1, 1), (2,), (2, 3), (8, 16, 32)):
check(size, None, None, False, False)
check(size, 2, 4, True, True)
def test_assert_messages(self, device):
self.assertIsNone(self._get_assert_msg(msg=None))
self.assertEqual("\nno_debug_msg", self._get_assert_msg("no_debug_msg"))
self.assertEqual("no_user_msg", self._get_assert_msg(msg=None, debug_msg="no_user_msg"))
self.assertEqual("debug_msg\nuser_msg", self._get_assert_msg(msg="user_msg", debug_msg="debug_msg"))
# The following tests (test_cuda_assert_*) are added to ensure test suite terminates early
# when CUDA assert was thrown. Because all subsequent test will fail if that happens.
# These tests are slow because it spawn another process to run test suite.
# See: https://github.com/pytorch/pytorch/issues/49019
@onlyCUDA
@slowTest
def test_cuda_assert_should_stop_common_utils_test_suite(self, device):
# test to ensure common_utils.py override has early termination for CUDA.
stderr = TestCase.runWithPytorchAPIUsageStderr("""\
#!/usr/bin/env python
import torch
from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest)
class TestThatContainsCUDAAssertFailure(TestCase):
@slowTest
def test_throw_unrecoverable_cuda_exception(self):
x = torch.rand(10, device='cuda')
# cause unrecoverable CUDA exception, recoverable on CPU
y = x[torch.tensor([25])].cpu()
@slowTest
def test_trivial_passing_test_case_on_cpu_cuda(self):
x1 = torch.tensor([0., 1.], device='cuda')
x2 = torch.tensor([0., 1.], device='cpu')
self.assertEqual(x1, x2)
if __name__ == '__main__':
run_tests()
""")
# should capture CUDA error
self.assertIn('CUDA error: device-side assert triggered', stderr)
# should run only 1 test because it throws unrecoverable error.
self.assertIn('Ran 1 test', stderr)
@onlyCUDA
@slowTest
def test_cuda_assert_should_stop_common_device_type_test_suite(self, device):
# test to ensure common_device_type.py override has early termination for CUDA.
stderr = TestCase.runWithPytorchAPIUsageStderr("""\
#!/usr/bin/env python
import torch
from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest)
from torch.testing._internal.common_device_type import instantiate_device_type_tests
class TestThatContainsCUDAAssertFailure(TestCase):
@slowTest
def test_throw_unrecoverable_cuda_exception(self, device):
x = torch.rand(10, device=device)
# cause unrecoverable CUDA exception, recoverable on CPU
y = x[torch.tensor([25])].cpu()
@slowTest
def test_trivial_passing_test_case_on_cpu_cuda(self, device):
x1 = torch.tensor([0., 1.], device=device)
x2 = torch.tensor([0., 1.], device='cpu')
self.assertEqual(x1, x2)
instantiate_device_type_tests(
TestThatContainsCUDAAssertFailure,
globals(),
only_for='cuda'
)
if __name__ == '__main__':
run_tests()
""")
# should capture CUDA error
self.assertIn('CUDA error: device-side assert triggered', stderr)
# should run only 1 test because it throws unrecoverable error.
self.assertIn('Ran 1 test', stderr)
@onlyCUDA
@slowTest
def test_cuda_assert_should_not_stop_common_distributed_test_suite(self, device):
# test to ensure common_distributed.py override should not early terminate CUDA.
stderr = TestCase.runWithPytorchAPIUsageStderr("""\
#!/usr/bin/env python
import torch
from torch.testing._internal.common_utils import (run_tests, slowTest)
from torch.testing._internal.common_device_type import instantiate_device_type_tests
from torch.testing._internal.common_distributed import MultiProcessTestCase
class TestThatContainsCUDAAssertFailure(MultiProcessTestCase):
@slowTest
def test_throw_unrecoverable_cuda_exception(self, device):
x = torch.rand(10, device=device)
# cause unrecoverable CUDA exception, recoverable on CPU
y = x[torch.tensor([25])].cpu()
@slowTest
def test_trivial_passing_test_case_on_cpu_cuda(self, device):
x1 = torch.tensor([0., 1.], device=device)
x2 = torch.tensor([0., 1.], device='cpu')
self.assertEqual(x1, x2)
instantiate_device_type_tests(
TestThatContainsCUDAAssertFailure,
globals(),
only_for='cuda'
)
if __name__ == '__main__':
run_tests()
""")
# we are currently disabling CUDA early termination for distributed tests.
self.assertIn('Ran 2 test', stderr)
instantiate_device_type_tests(TestTesting, globals())
class TestMypyWrapper(TestCase):
def test_glob(self):
# can match individual files
self.assertTrue(mypy_wrapper.glob(
pattern='test/test_torch.py',
filename=PurePosixPath('test/test_torch.py'),
))
self.assertFalse(mypy_wrapper.glob(
pattern='test/test_torch.py',
filename=PurePosixPath('test/test_testing.py'),
))
# dir matters
self.assertFalse(mypy_wrapper.glob(
pattern='tools/codegen/utils.py',
filename=PurePosixPath('torch/nn/modules.py'),
))
self.assertTrue(mypy_wrapper.glob(
pattern='setup.py',
filename=PurePosixPath('setup.py'),
))
self.assertFalse(mypy_wrapper.glob(
pattern='setup.py',
filename=PurePosixPath('foo/setup.py'),
))
self.assertTrue(mypy_wrapper.glob(
pattern='foo/setup.py',
filename=PurePosixPath('foo/setup.py'),
))
# can match dirs
self.assertTrue(mypy_wrapper.glob(
pattern='torch',
filename=PurePosixPath('torch/random.py'),
))
self.assertTrue(mypy_wrapper.glob(
pattern='torch',
filename=PurePosixPath('torch/nn/cpp.py'),
))
self.assertFalse(mypy_wrapper.glob(
pattern='torch',
filename=PurePosixPath('tools/fast_nvcc/fast_nvcc.py'),
))
# can match wildcards
self.assertTrue(mypy_wrapper.glob(
pattern='tools/autograd/*.py',
filename=PurePosixPath('tools/autograd/gen_autograd.py'),
))
self.assertFalse(mypy_wrapper.glob(
pattern='tools/autograd/*.py',
filename=PurePosixPath('tools/autograd/deprecated.yaml'),
))
def fakehash(char):
return char * 40
def dummy_meta_meta() -> print_test_stats.ReportMetaMeta:
return {
'build_pr': '',
'build_tag': '',
'build_sha1': '',
'build_branch': '',
'build_job': '',
'build_workflow_id': '',
}
def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
return {
'name': name,
'seconds': seconds,
'errored': errored,
'failed': failed,
'skipped': skipped,
}
def make_report_v1(tests) -> print_test_stats.Version1Report:
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases),
'cases': cases,
}
for suite_name, cases in tests.items()
}
return {
**dummy_meta_meta(),
'total_seconds': sum(s['total_seconds'] for s in suites.values()),
'suites': suites,
}
def make_case_v2(seconds, status=None) -> print_test_stats.Version2Case:
return {
'seconds': seconds,
'status': status,
}
def make_report_v2(tests) -> print_test_stats.Version2Report:
files = {}
for file_name, file_suites in tests.items():
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases.values()),
'cases': cases,
}
for suite_name, cases in file_suites.items()
}
files[file_name] = {
'suites': suites,
'total_seconds': sum(suite['total_seconds'] for suite in suites.values()),
}
return {
**dummy_meta_meta(),
'format_version': 2,
'total_seconds': sum(s['total_seconds'] for s in files.values()),
'files': files,
}
class TestPrintTestStats(TestCase):
maxDiff = None
version1_report: print_test_stats.Version1Report = make_report_v1({
# input ordering of the suites is ignored
'Grault': [
# not printed: status same and time similar
makecase('test_grault0', 4.78, failed=True),
# status same, but time increased a lot
makecase('test_grault2', 1.473, errored=True),
],
# individual tests times changed, not overall suite
'Qux': [
# input ordering of the test cases is ignored
makecase('test_qux1', 0.001, skipped=True),
makecase('test_qux6', 0.002, skipped=True),
# time in bounds, but status changed
makecase('test_qux4', 7.158, failed=True),
# not printed because it's the same as before
makecase('test_qux7', 0.003, skipped=True),
makecase('test_qux5', 11.968),
makecase('test_qux3', 23.496),
],
# new test suite
'Bar': [
makecase('test_bar2', 3.742, failed=True),
makecase('test_bar1', 50.447),
],
# overall suite time changed but no individual tests
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
# suite doesn't show up if it doesn't change enough
'Foo': [
makecase('test_foo1', 42),
makecase('test_foo2', 56),
],
})
version2_report: print_test_stats.Version2Report = make_report_v2(
{
'test_a': {
'Grault': {
'test_grault0': make_case_v2(4.78, 'failed'),
'test_grault2': make_case_v2(1.473, 'errored'),
},
'Qux': {
'test_qux1': make_case_v2(0.001, 'skipped'),
'test_qux6': make_case_v2(0.002, 'skipped'),
'test_qux4': make_case_v2(7.158, 'failed'),
'test_qux7': make_case_v2(0.003, 'skipped'),
'test_qux8': make_case_v2(11.968),
'test_qux3': make_case_v2(23.496),
}
},
'test_b': {
'Bar': {
'test_bar2': make_case_v2(3.742, 'failed'),
'test_bar1': make_case_v2(50.447),
},
# overall suite time changed but no individual tests
'Norf': {
'test_norf1': make_case_v2(3),
'test_norf2': make_case_v2(3),
'test_norf3': make_case_v2(3),
'test_norf4': make_case_v2(3),
},
},
'test_c': {
'Foo': {
'test_foo1': make_case_v2(42),
'test_foo2': make_case_v2(56),
},
}
})
def test_simplify(self):
self.assertEqual(
{
'': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux5': {'seconds': 11.968, 'status': None},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
},
},
},
print_test_stats.simplify(self.version1_report)
)
self.assertEqual(
{
'test_a': {
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
'test_qux8': {'seconds': 11.968, 'status': None},
},
},
'test_b': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
},
'test_c': {
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
},
},
print_test_stats.simplify(self.version2_report),
)
def test_analysis(self):
head_report = self.version1_report
base_reports = {
# bbbb has no reports, so base is cccc instead
fakehash('b'): [],
fakehash('c'): [
make_report_v1({
'Baz': [
makecase('test_baz2', 13.605),
# no recent suites have & skip this test
makecase('test_baz1', 0.004, skipped=True),
],
'Foo': [
makecase('test_foo1', 43),
# test added since dddd
makecase('test_foo2', 57),
],
'Grault': [
makecase('test_grault0', 4.88, failed=True),
makecase('test_grault1', 11.967, failed=True),
makecase('test_grault2', 0.395, errored=True),
makecase('test_grault3', 30.460),
],
'Norf': [
makecase('test_norf1', 2),
makecase('test_norf2', 2),
makecase('test_norf3', 2),
makecase('test_norf4', 2),
],
'Qux': [
makecase('test_qux3', 4.978, errored=True),
makecase('test_qux7', 0.002, skipped=True),
makecase('test_qux2', 5.618),
makecase('test_qux4', 7.766, errored=True),
makecase('test_qux6', 23.589, failed=True),
],
}),
],
fakehash('d'): [
make_report_v1({
'Foo': [
makecase('test_foo1', 40),
# removed in cccc
makecase('test_foo3', 17),
],
'Baz': [
# not skipped, so not included in stdev
makecase('test_baz1', 3.14),
],
'Qux': [
makecase('test_qux7', 0.004, skipped=True),
makecase('test_qux2', 6.02),
makecase('test_qux4', 20.932),
],
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
'Grault': [
makecase('test_grault0', 5, failed=True),
makecase('test_grault1', 14.325, failed=True),
makecase('test_grault2', 0.31, errored=True),
],
}),
],
fakehash('e'): [],
fakehash('f'): [
make_report_v1({
'Foo': [
makecase('test_foo3', 24),
makecase('test_foo1', 43),
],
'Baz': [
makecase('test_baz2', 16.857),
],
'Qux': [
makecase('test_qux2', 6.422),
makecase('test_qux4', 6.382, errored=True),
],
'Norf': [
makecase('test_norf1', 0.9),
makecase('test_norf3', 0.9),
makecase('test_norf2', 0.9),
makecase('test_norf4', 0.9),
],
'Grault': [
makecase('test_grault0', 4.7, failed=True),
makecase('test_grault1', 13.146, failed=True),
makecase('test_grault2', 0.48, errored=True),
],
}),
],
}
simpler_head = print_test_stats.simplify(head_report)
simpler_base = {}
for commit, reports in base_reports.items():
simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
analysis = print_test_stats.analyze(
head_report=simpler_head,
base_reports=simpler_base,
)
self.assertEqual(
'''\
- class Baz:
- # was 15.23s ± 2.30s
-
- def test_baz1: ...
- # was 0.004s (skipped)
-
- def test_baz2: ...
- # was 15.231s ± 2.300s
class Grault:
# was 48.86s ± 1.19s
# now 6.25s
- def test_grault1: ...
- # was 13.146s ± 1.179s (failed)
- def test_grault3: ...
- # was 30.460s
class Qux:
# was 41.66s ± 1.06s
# now 42.63s
- def test_qux2: ...
- # was 6.020s ± 0.402s
! def test_qux3: ...
! # was 4.978s (errored)
! # now 23.496s
! def test_qux4: ...
! # was 7.074s ± 0.979s (errored)
! # now 7.158s (failed)
! def test_qux6: ...
! # was 23.589s (failed)
! # now 0.002s (skipped)
+ def test_qux1: ...
+ # now 0.001s (skipped)
+ def test_qux5: ...
+ # now 11.968s
+ class Bar:
+ # now 54.19s
+
+ def test_bar1: ...
+ # now 50.447s
+
+ def test_bar2: ...
+ # now 3.742s (failed)
''',
print_test_stats.anomalies(analysis),
)
def test_graph(self):
# HEAD is on master
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
* aaaaaaaaaa (HEAD) total time 502.99s
* bbbbbbbbbb (base) 1 report, total time 47.84s
* cccccccccc 1 report, total time 332.50s
* dddddddddd 0 reports
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=502.99,
base_seconds={
fakehash('b'): [47.84],
fakehash('c'): [332.50],
fakehash('d'): [],
},
on_master=True,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 9988.77s
|/
* bbbbbbbbbb (base) 121 reports, total time 7654.32s ± 55.55s
* cccccccccc 20 reports, total time 5555.55s ± 253.19s
* dddddddddd 1 report, total time 1234.56s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=9988.77,
base_seconds={
fakehash('b'): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
fakehash('c'): [5308.77] * 10 + [5802.33] * 10,
fakehash('d'): [1234.56],
},
on_master=False,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 25.52s
| |
| : (5 commits)
|/
* bbbbbbbbbb 0 reports
* cccccccccc 0 reports
* dddddddddd (base) 15 reports, total time 58.92s ± 25.82s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=25.52,
base_seconds={
fakehash('b'): [],
fakehash('c'): [],
fakehash('d'): [52.25] * 14 + [152.26],
},
on_master=False,
ancestry_path=5,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 0.08s
|/|
| : (1 commit)
|
* bbbbbbbbbb 0 reports
* cccccccccc (base) 1 report, total time 0.09s
* dddddddddd 3 reports, total time 0.10s ± 0.05s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=0.08,
base_seconds={
fakehash('b'): [],
fakehash('c'): [0.09],
fakehash('d'): [0.05, 0.10, 0.15],
},
on_master=False,
other_ancestors=1,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 5.98s
| |
| : (1 commit)
|/|
| : (7 commits)
|
* bbbbbbbbbb (base) 2 reports, total time 6.02s ± 1.71s
* cccccccccc 0 reports
* dddddddddd 10 reports, total time 5.84s ± 0.92s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=5.98,
base_seconds={
fakehash('b'): [4.81, 7.23],
fakehash('c'): [],
fakehash('d'): [4.97] * 5 + [6.71] * 5,
},
on_master=False,
ancestry_path=1,
other_ancestors=7,
)
)
def test_regression_info(self):
self.assertEqual(
'''\
----- Historic stats comparison result ------
job: foo_job
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
class Foo:
# was 42.50s ± 2.12s
# now 3.02s
- def test_bar: ...
- # was 1.000s
! def test_foo: ...
! # was 41.500s ± 2.121s
! # now 0.020s (skipped)
+ def test_baz: ...
+ # now 3.000s
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 3.02s
|/
* bbbbbbbbbb (base) 1 report, total time 41.00s
* cccccccccc 1 report, total time 43.00s
|
:
Removed (across 1 suite) 1 test, totaling - 1.00s
Modified (across 1 suite) 1 test, totaling - 41.48s ± 2.12s
Added (across 1 suite) 1 test, totaling + 3.00s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [
make_report_v1({
'Foo': [
makecase('test_foo', 40),
makecase('test_bar', 1),
],
}),
],
fakehash('c'): [
make_report_v1({
'Foo': [
makecase('test_foo', 43),
],
}),
],
},
job_name='foo_job',
on_master=False,
ancestry_path=0,
other_ancestors=0,
)
)
def test_regression_info_new_job(self):
self.assertEqual(
'''\
----- Historic stats comparison result ------
job: foo_job
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ class Foo:
+ # now 3.02s
+
+ def test_baz: ...
+ # now 3.000s
+
+ def test_foo: ...
+ # now 0.020s (skipped)
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 3.02s
| |
| : (3 commits)
|/|
| : (2 commits)
|
* bbbbbbbbbb 0 reports
* cccccccccc 0 reports
|
:
Removed (across 0 suites) 0 tests, totaling 0.00s
Modified (across 0 suites) 0 tests, totaling 0.00s
Added (across 1 suite) 2 tests, totaling + 3.02s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [],
fakehash('c'): [],
},
job_name='foo_job',
on_master=False,
ancestry_path=3,
other_ancestors=2,
)
)
if __name__ == '__main__':
run_tests()