mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: This is a followup to https://github.com/pytorch/pytorch/issues/49190. Vaguely speaking, the goals are to make it easy to identify test time regressions introduced by PRs. Eventually the hope is to use this information to edit Dr CI comments, but this particular PR just does the analysis and prints it to stdout, so a followup PR would be needed to edit the actual comments on GitHub. **Important:** for uninteresting reasons, this PR moves the `print_test_stats.py` file. - *Before:* `test/print_test_stats.py` - *After:* `torch/testing/_internal/print_test_stats.py` Notes on the approach: - Just getting the mean and stdev for the total job time of the last _N_ commits isn't sufficient, because e.g. if `master` was broken 5 commits ago, then a lot of those job times will be much shorter, breaking the statistics. - We use the commit history to make better estimates for the mean and stdev of individual test (and suite) times, but only when the test in that historical commit is present and its status matches that of the base commit. - We list all the tests that were removed or added, or whose status changed (e.g. skipped to not skipped, or vice versa), along with time (estimate) info for that test case and its containing suite. - We don't list tests whose time changed a lot if their status didn't change, because there's a lot of noise and it's unclear how to do that well without too many false positives. - We show a human-readable commit graph that indicates exactly how many commits are in the pool of commits that could be causing regressions (e.g. if a PR has multiple commits in it, or if the base commit on `master` doesn't have a report in S3). - We don't show an overall estimate of whether the PR increased or decreased the total test job time, because it's noisy and it's a bit tricky to aggregate stdevs up from individual tests to the whole job level. This might change in a followup PR. - Instead, we simply show a summary at the bottom which says how many tests were removed/added/modified (where "modified" means that the status changed), and our best estimates of the mean times (and stdevs) of those changes. - Importantly, the summary at the bottom is only for the test cases that were already shown in the more verbose diff report, and does not include any information about tests whose status didn't change but whose running time got much longer. Pull Request resolved: https://github.com/pytorch/pytorch/pull/50171 Test Plan: To run the unit tests: ``` $ python test/test_testing.py $ python test/print_test_stats.py ``` To verify that this works, check the [CircleCI logs](https://app.circleci.com/pipelines/github/pytorch/pytorch/258628/workflows/9cfadc34-e042-485e-b3b3-dc251f160307) for a test job run on this PR; for example: - pytorch_linux_bionic_py3_6_clang9_test To test locally, use the following steps. First run an arbitrary test suite (you need to have some XML reports so that `test/print_test_stats.py` runs, but we'll be ignoring them here via the `--use-json` CLI option): ``` $ DATA_DIR=/tmp $ ARBITRARY_TEST=testing $ python test/test_$ARBITRARY_TEST.py --save-xml=$DATA_DIR/test/test_$ARBITRARY_TEST ``` Now choose a commit and a test job (it has to be on `master` since we're going to grab the test time data from S3, and [we only upload test times to S3 on the `master`, `nightly`, and `release` branches](https://github.com/pytorch/pytorch/pull/49645)): ``` $ export CIRCLE_SHA1=c39fb9771d89632c5c3a163d3c00af3bef1bd489 $ export CIRCLE_JOB=pytorch_linux_bionic_py3_6_clang9_test ``` Download the `*.json.bz2` file(s) for that commit/job pair: ``` $ aws s3 cp s3://ossci-metrics/test_time/$CIRCLE_SHA1/$CIRCLE_JOB/ $DATA_DIR/ossci-metrics/test_time/$CIRCLE_SHA1/$CIRCLE_JOB --recursive ``` And feed everything into `test/print_test_stats.py`: ``` $ bzip2 -kdc $DATA_DIR/ossci-metrics/test_time/$CIRCLE_SHA1/$CIRCLE_JOB/*Z.json.bz2 | torch/testing/_internal/print_test_stats.py --compare-with-s3 --use-json=/dev/stdin $DATA_DIR/test/test_$ARBITRARY_TEST ``` The first part of the output should be the same as before this PR; here is the new part, at the end of the output: - https://pastebin.com/Jj1svhAn Reviewed By: walterddr Differential Revision: D26232345 Pulled By: samestep fbshipit-source-id: b687b1737519d2eed68fbd591a667e4e029de509
1002 lines
37 KiB
Python
1002 lines
37 KiB
Python
import torch
|
|
|
|
import math
|
|
from pathlib import PurePosixPath
|
|
|
|
from torch.testing._internal.common_utils import \
|
|
(TestCase, make_tensor, run_tests, slowTest)
|
|
from torch.testing._internal.common_device_type import \
|
|
(instantiate_device_type_tests, onlyCUDA, onlyOnCPUAndCUDA, dtypes)
|
|
from torch.testing._internal import mypy_wrapper
|
|
from torch.testing._internal import print_test_stats
|
|
|
|
# For testing TestCase methods and torch.testing functions
|
|
class TestTesting(TestCase):
|
|
# Ensure that assertEqual handles numpy arrays properly
|
|
@dtypes(*(torch.testing.get_all_dtypes(include_half=True, include_bfloat16=False,
|
|
include_bool=True, include_complex=True)))
|
|
def test_assertEqual_numpy(self, device, dtype):
|
|
S = 10
|
|
test_sizes = [
|
|
(),
|
|
(0,),
|
|
(S,),
|
|
(S, S),
|
|
(0, S),
|
|
(S, 0)]
|
|
for test_size in test_sizes:
|
|
a = make_tensor(test_size, device, dtype, low=-5, high=5)
|
|
a_n = a.cpu().numpy()
|
|
msg = f'size: {test_size}'
|
|
self.assertEqual(a_n, a, rtol=0, atol=0, msg=msg)
|
|
self.assertEqual(a, a_n, rtol=0, atol=0, msg=msg)
|
|
self.assertEqual(a_n, a_n, rtol=0, atol=0, msg=msg)
|
|
|
|
# Tests that when rtol or atol (including self.precision) is set, then
|
|
# the other is zeroed.
|
|
# TODO: this is legacy behavior and should be updated after test
|
|
# precisions are reviewed to be consistent with torch.isclose.
|
|
@onlyOnCPUAndCUDA
|
|
def test__comparetensors_legacy(self, device):
|
|
a = torch.tensor((10000000.,))
|
|
b = torch.tensor((10000002.,))
|
|
|
|
x = torch.tensor((1.,))
|
|
y = torch.tensor((1. + 1e-5,))
|
|
|
|
# Helper for reusing the tensor values as scalars
|
|
def _scalar_helper(a, b, rtol=None, atol=None):
|
|
return self._compareScalars(a.item(), b.item(), rtol=rtol, atol=atol)
|
|
|
|
for op in (self._compareTensors, _scalar_helper):
|
|
# Tests default
|
|
result, debug_msg = op(a, b)
|
|
self.assertTrue(result)
|
|
|
|
# Tests setting atol
|
|
result, debug_msg = op(a, b, atol=2, rtol=0)
|
|
self.assertTrue(result)
|
|
|
|
# Tests setting atol too small
|
|
result, debug_msg = op(a, b, atol=1, rtol=0)
|
|
self.assertFalse(result)
|
|
|
|
# Tests setting rtol too small
|
|
result, debug_msg = op(x, y, atol=0, rtol=1.05e-5)
|
|
self.assertTrue(result)
|
|
|
|
# Tests setting rtol too small
|
|
result, debug_msg = op(x, y, atol=0, rtol=1e-5)
|
|
self.assertFalse(result)
|
|
|
|
@onlyOnCPUAndCUDA
|
|
def test__comparescalars_debug_msg(self, device):
|
|
# float x float
|
|
result, debug_msg = self._compareScalars(4., 7.)
|
|
expected_msg = ("Comparing 4.0 and 7.0 gives a difference of 3.0, "
|
|
"but the allowed difference with rtol=1.3e-06 and "
|
|
"atol=1e-05 is only 1.9100000000000003e-05!")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# complex x complex, real difference
|
|
result, debug_msg = self._compareScalars(complex(1, 3), complex(3, 1))
|
|
expected_msg = ("Comparing the real part 1.0 and 3.0 gives a difference "
|
|
"of 2.0, but the allowed difference with rtol=1.3e-06 "
|
|
"and atol=1e-05 is only 1.39e-05!")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# complex x complex, imaginary difference
|
|
result, debug_msg = self._compareScalars(complex(1, 3), complex(1, 5.5))
|
|
expected_msg = ("Comparing the imaginary part 3.0 and 5.5 gives a "
|
|
"difference of 2.5, but the allowed difference with "
|
|
"rtol=1.3e-06 and atol=1e-05 is only 1.715e-05!")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# complex x int
|
|
result, debug_msg = self._compareScalars(complex(1, -2), 1)
|
|
expected_msg = ("Comparing the imaginary part -2.0 and 0.0 gives a "
|
|
"difference of 2.0, but the allowed difference with "
|
|
"rtol=1.3e-06 and atol=1e-05 is only 1e-05!")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# NaN x NaN, equal_nan=False
|
|
result, debug_msg = self._compareScalars(float('nan'), float('nan'), equal_nan=False)
|
|
expected_msg = ("Found nan and nan while comparing and either one is "
|
|
"nan and the other isn't, or both are nan and equal_nan "
|
|
"is False")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks that compareTensors provides the correct debug info
|
|
@onlyOnCPUAndCUDA
|
|
def test__comparetensors_debug_msg(self, device):
|
|
# Acquires atol that will be used
|
|
atol = max(1e-05, self.precision)
|
|
|
|
# Checks float tensor comparisons (2D tensor)
|
|
a = torch.tensor(((0, 6), (7, 9)), device=device, dtype=torch.float32)
|
|
b = torch.tensor(((0, 7), (7, 22)), device=device, dtype=torch.float32)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("With rtol=1.3e-06 and atol={0}, found 2 element(s) (out of 4) "
|
|
"whose difference(s) exceeded the margin of error (including 0 nan comparisons). "
|
|
"The greatest difference was 13.0 (9.0 vs. 22.0), "
|
|
"which occurred at index (1, 1).").format(atol)
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks float tensor comparisons (with extremal values)
|
|
a = torch.tensor((float('inf'), 5, float('inf')), device=device, dtype=torch.float32)
|
|
b = torch.tensor((float('inf'), float('nan'), float('-inf')), device=device, dtype=torch.float32)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("With rtol=1.3e-06 and atol={0}, found 2 element(s) (out of 3) "
|
|
"whose difference(s) exceeded the margin of error (including 1 nan comparisons). "
|
|
"The greatest difference was nan (5.0 vs. nan), "
|
|
"which occurred at index 1.").format(atol)
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks float tensor comparisons (with finite vs nan differences)
|
|
a = torch.tensor((20, -6), device=device, dtype=torch.float32)
|
|
b = torch.tensor((-1, float('nan')), device=device, dtype=torch.float32)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("With rtol=1.3e-06 and atol={0}, found 2 element(s) (out of 2) "
|
|
"whose difference(s) exceeded the margin of error (including 1 nan comparisons). "
|
|
"The greatest difference was nan (-6.0 vs. nan), "
|
|
"which occurred at index 1.").format(atol)
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks int tensor comparisons (1D tensor)
|
|
a = torch.tensor((1, 2, 3, 4), device=device)
|
|
b = torch.tensor((2, 5, 3, 4), device=device)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("Found 2 different element(s) (out of 4), "
|
|
"with the greatest difference of 3 (2 vs. 5) "
|
|
"occuring at index 1.")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks bool tensor comparisons (0D tensor)
|
|
a = torch.tensor((True), device=device)
|
|
b = torch.tensor((False), device=device)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("Found 1 different element(s) (out of 1), "
|
|
"with the greatest difference of 1 (1 vs. 0) "
|
|
"occuring at index 0.")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks complex tensor comparisons (real part)
|
|
a = torch.tensor((1 - 1j, 4 + 3j), device=device)
|
|
b = torch.tensor((1 - 1j, 1 + 3j), device=device)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("Real parts failed to compare as equal! "
|
|
"With rtol=1.3e-06 and atol={0}, "
|
|
"found 1 element(s) (out of 2) whose difference(s) exceeded the "
|
|
"margin of error (including 0 nan comparisons). The greatest difference was "
|
|
"3.0 (4.0 vs. 1.0), which occurred at index 1.").format(atol)
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks complex tensor comparisons (imaginary part)
|
|
a = torch.tensor((1 - 1j, 4 + 3j), device=device)
|
|
b = torch.tensor((1 - 1j, 4 - 21j), device=device)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("Imaginary parts failed to compare as equal! "
|
|
"With rtol=1.3e-06 and atol={0}, "
|
|
"found 1 element(s) (out of 2) whose difference(s) exceeded the "
|
|
"margin of error (including 0 nan comparisons). The greatest difference was "
|
|
"24.0 (3.0 vs. -21.0), which occurred at index 1.").format(atol)
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks size mismatch
|
|
a = torch.tensor((1, 2), device=device)
|
|
b = torch.tensor((3), device=device)
|
|
result, debug_msg = self._compareTensors(a, b)
|
|
expected_msg = ("Attempted to compare equality of tensors "
|
|
"with different sizes. Got sizes torch.Size([2]) and torch.Size([]).")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks dtype mismatch
|
|
a = torch.tensor((1, 2), device=device, dtype=torch.long)
|
|
b = torch.tensor((1, 2), device=device, dtype=torch.float32)
|
|
result, debug_msg = self._compareTensors(a, b, exact_dtype=True)
|
|
expected_msg = ("Attempted to compare equality of tensors "
|
|
"with different dtypes. Got dtypes torch.int64 and torch.float32.")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Checks device mismatch
|
|
if self.device_type == 'cuda':
|
|
a = torch.tensor((5), device='cpu')
|
|
b = torch.tensor((5), device=device)
|
|
result, debug_msg = self._compareTensors(a, b, exact_device=True)
|
|
expected_msg = ("Attempted to compare equality of tensors "
|
|
"on different devices! Got devices cpu and cuda:0.")
|
|
self.assertEqual(debug_msg, expected_msg)
|
|
|
|
# Helper for testing _compareTensors and _compareScalars
|
|
# Works on single element tensors
|
|
def _comparetensors_helper(self, tests, device, dtype, equal_nan, exact_dtype=True, atol=1e-08, rtol=1e-05):
|
|
for test in tests:
|
|
a = torch.tensor((test[0],), device=device, dtype=dtype)
|
|
b = torch.tensor((test[1],), device=device, dtype=dtype)
|
|
|
|
# Tensor x Tensor comparison
|
|
compare_result, debug_msg = self._compareTensors(a, b, rtol=rtol, atol=atol,
|
|
equal_nan=equal_nan,
|
|
exact_dtype=exact_dtype)
|
|
self.assertEqual(compare_result, test[2])
|
|
|
|
# Scalar x Scalar comparison
|
|
compare_result, debug_msg = self._compareScalars(a.item(), b.item(),
|
|
rtol=rtol, atol=atol,
|
|
equal_nan=equal_nan)
|
|
self.assertEqual(compare_result, test[2])
|
|
|
|
def _isclose_helper(self, tests, device, dtype, equal_nan, atol=1e-08, rtol=1e-05):
|
|
for test in tests:
|
|
a = torch.tensor((test[0],), device=device, dtype=dtype)
|
|
b = torch.tensor((test[1],), device=device, dtype=dtype)
|
|
|
|
actual = torch.isclose(a, b, equal_nan=equal_nan, atol=atol, rtol=rtol)
|
|
expected = test[2]
|
|
self.assertEqual(actual.item(), expected)
|
|
|
|
# torch.close is not implemented for bool tensors
|
|
# see https://github.com/pytorch/pytorch/issues/33048
|
|
def test_isclose_comparetensors_bool(self, device):
|
|
tests = (
|
|
(True, True, True),
|
|
(False, False, True),
|
|
(True, False, False),
|
|
(False, True, False),
|
|
)
|
|
|
|
with self.assertRaises(RuntimeError):
|
|
self._isclose_helper(tests, device, torch.bool, False)
|
|
|
|
self._comparetensors_helper(tests, device, torch.bool, False)
|
|
|
|
@dtypes(torch.uint8,
|
|
torch.int8, torch.int16, torch.int32, torch.int64)
|
|
def test_isclose_comparetensors_integer(self, device, dtype):
|
|
tests = (
|
|
(0, 0, True),
|
|
(0, 1, False),
|
|
(1, 0, False),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, False)
|
|
|
|
# atol and rtol tests
|
|
tests = [
|
|
(0, 1, True),
|
|
(1, 0, False),
|
|
(1, 3, True),
|
|
]
|
|
|
|
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
|
|
if dtype is torch.uint8:
|
|
tests = [
|
|
(-1, 1, False),
|
|
(1, -1, False)
|
|
]
|
|
else:
|
|
tests = [
|
|
(-1, 1, True),
|
|
(1, -1, True)
|
|
]
|
|
|
|
self._isclose_helper(tests, device, dtype, False, atol=1.5, rtol=.5)
|
|
self._comparetensors_helper(tests, device, dtype, False, atol=1.5, rtol=.5)
|
|
|
|
@onlyOnCPUAndCUDA
|
|
@dtypes(torch.float16, torch.float32, torch.float64)
|
|
def test_isclose_comparetensors_float(self, device, dtype):
|
|
tests = (
|
|
(0, 0, True),
|
|
(0, -1, False),
|
|
(float('inf'), float('inf'), True),
|
|
(-float('inf'), float('inf'), False),
|
|
(float('inf'), float('nan'), False),
|
|
(float('nan'), float('nan'), False),
|
|
(0, float('nan'), False),
|
|
(1, 1, True),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, False)
|
|
self._comparetensors_helper(tests, device, dtype, False)
|
|
|
|
# atol and rtol tests
|
|
eps = 1e-2 if dtype is torch.half else 1e-6
|
|
tests = (
|
|
(0, 1, True),
|
|
(0, 1 + eps, False),
|
|
(1, 0, False),
|
|
(1, 3, True),
|
|
(1 - eps, 3, False),
|
|
(-.25, .5, True),
|
|
(-.25 - eps, .5, False),
|
|
(.25, -.5, True),
|
|
(.25 + eps, -.5, False),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
|
|
# equal_nan = True tests
|
|
tests = (
|
|
(0, float('nan'), False),
|
|
(float('inf'), float('nan'), False),
|
|
(float('nan'), float('nan'), True),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, True)
|
|
|
|
self._comparetensors_helper(tests, device, dtype, True)
|
|
|
|
# torch.close with equal_nan=True is not implemented for complex inputs
|
|
# see https://github.com/numpy/numpy/issues/15959
|
|
# Note: compareTensor will compare the real and imaginary parts of a
|
|
# complex tensors separately, unlike isclose.
|
|
@dtypes(torch.complex64, torch.complex128)
|
|
def test_isclose_comparetensors_complex(self, device, dtype):
|
|
tests = (
|
|
(complex(1, 1), complex(1, 1 + 1e-8), True),
|
|
(complex(0, 1), complex(1, 1), False),
|
|
(complex(1, 1), complex(1, 0), False),
|
|
(complex(1, 1), complex(1, float('nan')), False),
|
|
(complex(1, float('nan')), complex(1, float('nan')), False),
|
|
(complex(1, 1), complex(1, float('inf')), False),
|
|
(complex(float('inf'), 1), complex(1, float('inf')), False),
|
|
(complex(-float('inf'), 1), complex(1, float('inf')), False),
|
|
(complex(-float('inf'), 1), complex(float('inf'), 1), False),
|
|
(complex(float('inf'), 1), complex(float('inf'), 1), True),
|
|
(complex(float('inf'), 1), complex(float('inf'), 1 + 1e-4), False),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, False)
|
|
self._comparetensors_helper(tests, device, dtype, False)
|
|
|
|
# atol and rtol tests
|
|
|
|
# atol and rtol tests
|
|
eps = 1e-6
|
|
tests = (
|
|
# Complex versions of float tests (real part)
|
|
(complex(0, 0), complex(1, 0), True),
|
|
(complex(0, 0), complex(1 + eps, 0), False),
|
|
(complex(1, 0), complex(0, 0), False),
|
|
(complex(1, 0), complex(3, 0), True),
|
|
(complex(1 - eps, 0), complex(3, 0), False),
|
|
(complex(-.25, 0), complex(.5, 0), True),
|
|
(complex(-.25 - eps, 0), complex(.5, 0), False),
|
|
(complex(.25, 0), complex(-.5, 0), True),
|
|
(complex(.25 + eps, 0), complex(-.5, 0), False),
|
|
# Complex versions of float tests (imaginary part)
|
|
(complex(0, 0), complex(0, 1), True),
|
|
(complex(0, 0), complex(0, 1 + eps), False),
|
|
(complex(0, 1), complex(0, 0), False),
|
|
(complex(0, 1), complex(0, 3), True),
|
|
(complex(0, 1 - eps), complex(0, 3), False),
|
|
(complex(0, -.25), complex(0, .5), True),
|
|
(complex(0, -.25 - eps), complex(0, .5), False),
|
|
(complex(0, .25), complex(0, -.5), True),
|
|
(complex(0, .25 + eps), complex(0, -.5), False),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
|
|
# atol and rtol tests for isclose
|
|
tests = (
|
|
# Complex-specific tests
|
|
(complex(1, -1), complex(-1, 1), False),
|
|
(complex(1, -1), complex(2, -2), True),
|
|
(complex(-math.sqrt(2), math.sqrt(2)),
|
|
complex(-math.sqrt(.5), math.sqrt(.5)), True),
|
|
(complex(-math.sqrt(2), math.sqrt(2)),
|
|
complex(-math.sqrt(.501), math.sqrt(.499)), False),
|
|
(complex(2, 4), complex(1., 8.8523607), True),
|
|
(complex(2, 4), complex(1., 8.8523607 + eps), False),
|
|
(complex(1, 99), complex(4, 100), True),
|
|
)
|
|
|
|
self._isclose_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
|
|
# atol and rtol tests for compareTensors
|
|
tests = (
|
|
(complex(1, -1), complex(-1, 1), False),
|
|
(complex(1, -1), complex(2, -2), True),
|
|
(complex(1, 99), complex(4, 100), False),
|
|
)
|
|
|
|
self._comparetensors_helper(tests, device, dtype, False, atol=.5, rtol=.5)
|
|
|
|
# equal_nan = True tests
|
|
tests = (
|
|
(complex(1, 1), complex(1, float('nan')), False),
|
|
(complex(float('nan'), 1), complex(1, float('nan')), False),
|
|
(complex(float('nan'), 1), complex(float('nan'), 1), True),
|
|
)
|
|
|
|
with self.assertRaises(RuntimeError):
|
|
self._isclose_helper(tests, device, dtype, True)
|
|
|
|
self._comparetensors_helper(tests, device, dtype, True)
|
|
|
|
# Tests that isclose with rtol or atol values less than zero throws a
|
|
# RuntimeError
|
|
@dtypes(torch.bool, torch.uint8,
|
|
torch.int8, torch.int16, torch.int32, torch.int64,
|
|
torch.float16, torch.float32, torch.float64)
|
|
def test_isclose_atol_rtol_greater_than_zero(self, device, dtype):
|
|
t = torch.tensor((1,), device=device, dtype=dtype)
|
|
|
|
with self.assertRaises(RuntimeError):
|
|
torch.isclose(t, t, atol=-1, rtol=1)
|
|
with self.assertRaises(RuntimeError):
|
|
torch.isclose(t, t, atol=1, rtol=-1)
|
|
with self.assertRaises(RuntimeError):
|
|
torch.isclose(t, t, atol=-1, rtol=-1)
|
|
|
|
def test_assert_messages(self, device):
|
|
self.assertIsNone(self._get_assert_msg(msg=None))
|
|
self.assertEqual("\nno_debug_msg", self._get_assert_msg("no_debug_msg"))
|
|
self.assertEqual("no_user_msg", self._get_assert_msg(msg=None, debug_msg="no_user_msg"))
|
|
self.assertEqual("debug_msg\nuser_msg", self._get_assert_msg(msg="user_msg", debug_msg="debug_msg"))
|
|
|
|
@onlyCUDA
|
|
@slowTest
|
|
def test_cuda_assert_should_stop_test_suite(self, device):
|
|
# This test is slow because it spawn another process to run another test suite.
|
|
|
|
# Test running of cuda assert test suite should early terminate.
|
|
stderr = TestCase.runWithPytorchAPIUsageStderr("""\
|
|
#!/usr/bin/env python
|
|
|
|
import torch
|
|
|
|
from torch.testing._internal.common_utils import (TestCase, run_tests, slowTest)
|
|
from torch.testing._internal.common_device_type import instantiate_device_type_tests
|
|
|
|
# This test is added to ensure that test suite terminates early when
|
|
# CUDA assert was thrown since all subsequent test will fail.
|
|
# See: https://github.com/pytorch/pytorch/issues/49019
|
|
# This test file should be invoked from test_testing.py
|
|
class TestThatContainsCUDAAssertFailure(TestCase):
|
|
|
|
@slowTest
|
|
def test_throw_unrecoverable_cuda_exception(self, device):
|
|
x = torch.rand(10, device=device)
|
|
# cause unrecoverable CUDA exception, recoverable on CPU
|
|
y = x[torch.tensor([25])].cpu()
|
|
|
|
@slowTest
|
|
def test_trivial_passing_test_case_on_cpu_cuda(self, device):
|
|
x1 = torch.tensor([0., 1.], device=device)
|
|
x2 = torch.tensor([0., 1.], device='cpu')
|
|
self.assertEqual(x1, x2)
|
|
|
|
instantiate_device_type_tests(
|
|
TestThatContainsCUDAAssertFailure,
|
|
globals(),
|
|
only_for='cuda'
|
|
)
|
|
|
|
if __name__ == '__main__':
|
|
run_tests()
|
|
""")
|
|
# should capture CUDA error
|
|
self.assertIn('CUDA error: device-side assert triggered', stderr)
|
|
# should run only 1 test because it throws unrecoverable error.
|
|
self.assertIn('Ran 1 test', stderr)
|
|
|
|
|
|
instantiate_device_type_tests(TestTesting, globals())
|
|
|
|
|
|
class TestMypyWrapper(TestCase):
|
|
def test_glob(self):
|
|
# can match individual files
|
|
self.assertTrue(mypy_wrapper.glob(
|
|
pattern='test/test_torch.py',
|
|
filename=PurePosixPath('test/test_torch.py'),
|
|
))
|
|
self.assertFalse(mypy_wrapper.glob(
|
|
pattern='test/test_torch.py',
|
|
filename=PurePosixPath('test/test_testing.py'),
|
|
))
|
|
|
|
# dir matters
|
|
self.assertFalse(mypy_wrapper.glob(
|
|
pattern='tools/codegen/utils.py',
|
|
filename=PurePosixPath('torch/nn/modules.py'),
|
|
))
|
|
self.assertTrue(mypy_wrapper.glob(
|
|
pattern='setup.py',
|
|
filename=PurePosixPath('setup.py'),
|
|
))
|
|
self.assertFalse(mypy_wrapper.glob(
|
|
pattern='setup.py',
|
|
filename=PurePosixPath('foo/setup.py'),
|
|
))
|
|
self.assertTrue(mypy_wrapper.glob(
|
|
pattern='foo/setup.py',
|
|
filename=PurePosixPath('foo/setup.py'),
|
|
))
|
|
|
|
# can match dirs
|
|
self.assertTrue(mypy_wrapper.glob(
|
|
pattern='torch',
|
|
filename=PurePosixPath('torch/random.py'),
|
|
))
|
|
self.assertTrue(mypy_wrapper.glob(
|
|
pattern='torch',
|
|
filename=PurePosixPath('torch/nn/cpp.py'),
|
|
))
|
|
self.assertFalse(mypy_wrapper.glob(
|
|
pattern='torch',
|
|
filename=PurePosixPath('tools/fast_nvcc/fast_nvcc.py'),
|
|
))
|
|
|
|
# can match wildcards
|
|
self.assertTrue(mypy_wrapper.glob(
|
|
pattern='tools/autograd/*.py',
|
|
filename=PurePosixPath('tools/autograd/gen_autograd.py'),
|
|
))
|
|
self.assertFalse(mypy_wrapper.glob(
|
|
pattern='tools/autograd/*.py',
|
|
filename=PurePosixPath('tools/autograd/deprecated.yaml'),
|
|
))
|
|
|
|
|
|
def fakehash(char):
|
|
return char * 40
|
|
|
|
|
|
def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
|
|
return {
|
|
'name': name,
|
|
'seconds': seconds,
|
|
'errored': errored,
|
|
'failed': failed,
|
|
'skipped': skipped,
|
|
}
|
|
|
|
|
|
def makereport(tests):
|
|
suites = {
|
|
suite_name: {
|
|
'total_seconds': sum(case['seconds'] for case in cases),
|
|
'cases': cases,
|
|
}
|
|
for suite_name, cases in tests.items()
|
|
}
|
|
return {
|
|
'total_seconds': sum(s['total_seconds'] for s in suites.values()),
|
|
'suites': suites,
|
|
}
|
|
|
|
|
|
class TestPrintTestStats(TestCase):
|
|
maxDiff = None
|
|
|
|
def test_analysis(self):
|
|
head_report = makereport({
|
|
# input ordering of the suites is ignored
|
|
'Grault': [
|
|
# not printed: status same and time similar
|
|
makecase('test_grault0', 4.78, failed=True),
|
|
# status same, but time increased a lot
|
|
makecase('test_grault2', 1.473, errored=True),
|
|
],
|
|
# individual tests times changed, not overall suite
|
|
'Qux': [
|
|
# input ordering of the test cases is ignored
|
|
makecase('test_qux1', 0.001, skipped=True),
|
|
makecase('test_qux6', 0.002, skipped=True),
|
|
# time in bounds, but status changed
|
|
makecase('test_qux4', 7.158, failed=True),
|
|
# not printed because it's the same as before
|
|
makecase('test_qux7', 0.003, skipped=True),
|
|
makecase('test_qux5', 11.968),
|
|
makecase('test_qux3', 23.496),
|
|
],
|
|
# new test suite
|
|
'Bar': [
|
|
makecase('test_bar2', 3.742, failed=True),
|
|
makecase('test_bar1', 50.447),
|
|
],
|
|
# overall suite time changed but no individual tests
|
|
'Norf': [
|
|
makecase('test_norf1', 3),
|
|
makecase('test_norf2', 3),
|
|
makecase('test_norf3', 3),
|
|
makecase('test_norf4', 3),
|
|
],
|
|
# suite doesn't show up if it doesn't change enough
|
|
'Foo': [
|
|
makecase('test_foo1', 42),
|
|
makecase('test_foo2', 56),
|
|
],
|
|
})
|
|
|
|
base_reports = {
|
|
# bbbb has no reports, so base is cccc instead
|
|
fakehash('b'): [],
|
|
fakehash('c'): [
|
|
makereport({
|
|
'Baz': [
|
|
makecase('test_baz2', 13.605),
|
|
# no recent suites have & skip this test
|
|
makecase('test_baz1', 0.004, skipped=True),
|
|
],
|
|
'Foo': [
|
|
makecase('test_foo1', 43),
|
|
# test added since dddd
|
|
makecase('test_foo2', 57),
|
|
],
|
|
'Grault': [
|
|
makecase('test_grault0', 4.88, failed=True),
|
|
makecase('test_grault1', 11.967, failed=True),
|
|
makecase('test_grault2', 0.395, errored=True),
|
|
makecase('test_grault3', 30.460),
|
|
],
|
|
'Norf': [
|
|
makecase('test_norf1', 2),
|
|
makecase('test_norf2', 2),
|
|
makecase('test_norf3', 2),
|
|
makecase('test_norf4', 2),
|
|
],
|
|
'Qux': [
|
|
makecase('test_qux3', 4.978, errored=True),
|
|
makecase('test_qux7', 0.002, skipped=True),
|
|
makecase('test_qux2', 5.618),
|
|
makecase('test_qux4', 7.766, errored=True),
|
|
makecase('test_qux6', 23.589, failed=True),
|
|
],
|
|
}),
|
|
],
|
|
fakehash('d'): [
|
|
makereport({
|
|
'Foo': [
|
|
makecase('test_foo1', 40),
|
|
# removed in cccc
|
|
makecase('test_foo3', 17),
|
|
],
|
|
'Baz': [
|
|
# not skipped, so not included in stdev
|
|
makecase('test_baz1', 3.14),
|
|
],
|
|
'Qux': [
|
|
makecase('test_qux7', 0.004, skipped=True),
|
|
makecase('test_qux2', 6.02),
|
|
makecase('test_qux4', 20.932),
|
|
],
|
|
'Norf': [
|
|
makecase('test_norf1', 3),
|
|
makecase('test_norf2', 3),
|
|
makecase('test_norf3', 3),
|
|
makecase('test_norf4', 3),
|
|
],
|
|
'Grault': [
|
|
makecase('test_grault0', 5, failed=True),
|
|
makecase('test_grault1', 14.325, failed=True),
|
|
makecase('test_grault2', 0.31, errored=True),
|
|
],
|
|
}),
|
|
],
|
|
fakehash('e'): [],
|
|
fakehash('f'): [
|
|
makereport({
|
|
'Foo': [
|
|
makecase('test_foo3', 24),
|
|
makecase('test_foo1', 43),
|
|
],
|
|
'Baz': [
|
|
makecase('test_baz2', 16.857),
|
|
],
|
|
'Qux': [
|
|
makecase('test_qux2', 6.422),
|
|
makecase('test_qux4', 6.382, errored=True),
|
|
],
|
|
'Norf': [
|
|
makecase('test_norf1', 0.9),
|
|
makecase('test_norf3', 0.9),
|
|
makecase('test_norf2', 0.9),
|
|
makecase('test_norf4', 0.9),
|
|
],
|
|
'Grault': [
|
|
makecase('test_grault0', 4.7, failed=True),
|
|
makecase('test_grault1', 13.146, failed=True),
|
|
makecase('test_grault2', 0.48, errored=True),
|
|
],
|
|
}),
|
|
],
|
|
}
|
|
|
|
simpler_head = print_test_stats.simplify(head_report)
|
|
simpler_base = {}
|
|
for commit, reports in base_reports.items():
|
|
simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
|
|
analysis = print_test_stats.analyze(
|
|
head_report=simpler_head,
|
|
base_reports=simpler_base,
|
|
)
|
|
|
|
self.assertEqual(
|
|
'''\
|
|
|
|
- class Baz:
|
|
- # was 15.23s ± 2.30s
|
|
-
|
|
- def test_baz1: ...
|
|
- # was 0.004s (skipped)
|
|
-
|
|
- def test_baz2: ...
|
|
- # was 15.231s ± 2.300s
|
|
|
|
|
|
class Grault:
|
|
# was 48.86s ± 1.19s
|
|
# now 6.25s
|
|
|
|
- def test_grault1: ...
|
|
- # was 13.146s ± 1.179s (failed)
|
|
|
|
- def test_grault3: ...
|
|
- # was 30.460s
|
|
|
|
|
|
class Qux:
|
|
# was 41.66s ± 1.06s
|
|
# now 42.63s
|
|
|
|
- def test_qux2: ...
|
|
- # was 6.020s ± 0.402s
|
|
|
|
! def test_qux3: ...
|
|
! # was 4.978s (errored)
|
|
! # now 23.496s
|
|
|
|
! def test_qux4: ...
|
|
! # was 7.074s ± 0.979s (errored)
|
|
! # now 7.158s (failed)
|
|
|
|
! def test_qux6: ...
|
|
! # was 23.589s (failed)
|
|
! # now 0.002s (skipped)
|
|
|
|
+ def test_qux1: ...
|
|
+ # now 0.001s (skipped)
|
|
|
|
+ def test_qux5: ...
|
|
+ # now 11.968s
|
|
|
|
|
|
+ class Bar:
|
|
+ # now 54.19s
|
|
+
|
|
+ def test_bar1: ...
|
|
+ # now 50.447s
|
|
+
|
|
+ def test_bar2: ...
|
|
+ # now 3.742s (failed)
|
|
|
|
''',
|
|
print_test_stats.anomalies(analysis),
|
|
)
|
|
|
|
def test_graph(self):
|
|
# HEAD is on master
|
|
self.assertEqual(
|
|
'''\
|
|
Commit graph (base is most recent master ancestor with at least one S3 report):
|
|
|
|
: (master)
|
|
|
|
|
* aaaaaaaaaa (HEAD) total time 502.99s
|
|
* bbbbbbbbbb (base) 1 report, total time 47.84s
|
|
* cccccccccc 1 report, total time 332.50s
|
|
* dddddddddd 0 reports
|
|
|
|
|
:
|
|
''',
|
|
print_test_stats.graph(
|
|
head_sha=fakehash('a'),
|
|
head_seconds=502.99,
|
|
base_seconds={
|
|
fakehash('b'): [47.84],
|
|
fakehash('c'): [332.50],
|
|
fakehash('d'): [],
|
|
},
|
|
on_master=True,
|
|
)
|
|
)
|
|
|
|
self.assertEqual(
|
|
'''\
|
|
Commit graph (base is most recent master ancestor with at least one S3 report):
|
|
|
|
: (master)
|
|
|
|
|
| * aaaaaaaaaa (HEAD) total time 9988.77s
|
|
|/
|
|
* bbbbbbbbbb (base) 121 reports, total time 7654.32s ± 55.55s
|
|
* cccccccccc 20 reports, total time 5555.55s ± 253.19s
|
|
* dddddddddd 1 report, total time 1234.56s
|
|
|
|
|
:
|
|
''',
|
|
print_test_stats.graph(
|
|
head_sha=fakehash('a'),
|
|
head_seconds=9988.77,
|
|
base_seconds={
|
|
fakehash('b'): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
|
|
fakehash('c'): [5308.77] * 10 + [5802.33] * 10,
|
|
fakehash('d'): [1234.56],
|
|
},
|
|
on_master=False,
|
|
)
|
|
)
|
|
|
|
self.assertEqual(
|
|
'''\
|
|
Commit graph (base is most recent master ancestor with at least one S3 report):
|
|
|
|
: (master)
|
|
|
|
|
| * aaaaaaaaaa (HEAD) total time 25.52s
|
|
| |
|
|
| : (5 commits)
|
|
|/
|
|
* bbbbbbbbbb 0 reports
|
|
* cccccccccc 0 reports
|
|
* dddddddddd (base) 15 reports, total time 58.92s ± 25.82s
|
|
|
|
|
:
|
|
''',
|
|
print_test_stats.graph(
|
|
head_sha=fakehash('a'),
|
|
head_seconds=25.52,
|
|
base_seconds={
|
|
fakehash('b'): [],
|
|
fakehash('c'): [],
|
|
fakehash('d'): [52.25] * 14 + [152.26],
|
|
},
|
|
on_master=False,
|
|
ancestry_path=5,
|
|
)
|
|
)
|
|
|
|
self.assertEqual(
|
|
'''\
|
|
Commit graph (base is most recent master ancestor with at least one S3 report):
|
|
|
|
: (master)
|
|
|
|
|
| * aaaaaaaaaa (HEAD) total time 0.08s
|
|
|/|
|
|
| : (1 commit)
|
|
|
|
|
* bbbbbbbbbb 0 reports
|
|
* cccccccccc (base) 1 report, total time 0.09s
|
|
* dddddddddd 3 reports, total time 0.10s ± 0.05s
|
|
|
|
|
:
|
|
''',
|
|
print_test_stats.graph(
|
|
head_sha=fakehash('a'),
|
|
head_seconds=0.08,
|
|
base_seconds={
|
|
fakehash('b'): [],
|
|
fakehash('c'): [0.09],
|
|
fakehash('d'): [0.05, 0.10, 0.15],
|
|
},
|
|
on_master=False,
|
|
other_ancestors=1,
|
|
)
|
|
)
|
|
|
|
self.assertEqual(
|
|
'''\
|
|
Commit graph (base is most recent master ancestor with at least one S3 report):
|
|
|
|
: (master)
|
|
|
|
|
| * aaaaaaaaaa (HEAD) total time 5.98s
|
|
| |
|
|
| : (1 commit)
|
|
|/|
|
|
| : (7 commits)
|
|
|
|
|
* bbbbbbbbbb (base) 2 reports, total time 6.02s ± 1.71s
|
|
* cccccccccc 0 reports
|
|
* dddddddddd 10 reports, total time 5.84s ± 0.92s
|
|
|
|
|
:
|
|
''',
|
|
print_test_stats.graph(
|
|
head_sha=fakehash('a'),
|
|
head_seconds=5.98,
|
|
base_seconds={
|
|
fakehash('b'): [4.81, 7.23],
|
|
fakehash('c'): [],
|
|
fakehash('d'): [4.97] * 5 + [6.71] * 5,
|
|
},
|
|
on_master=False,
|
|
ancestry_path=1,
|
|
other_ancestors=7,
|
|
)
|
|
)
|
|
|
|
def test_regression_info(self):
|
|
self.assertEqual(
|
|
'''\
|
|
----- Historic stats comparison result ------
|
|
|
|
job: foo_job
|
|
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
|
|
|
|
|
class Foo:
|
|
# was 42.50s ± 2.12s
|
|
# now 3.02s
|
|
|
|
- def test_bar: ...
|
|
- # was 1.000s
|
|
|
|
! def test_foo: ...
|
|
! # was 41.500s ± 2.121s
|
|
! # now 0.020s (skipped)
|
|
|
|
+ def test_baz: ...
|
|
+ # now 3.000s
|
|
|
|
|
|
Commit graph (base is most recent master ancestor with at least one S3 report):
|
|
|
|
: (master)
|
|
|
|
|
| * aaaaaaaaaa (HEAD) total time 3.02s
|
|
|/
|
|
* bbbbbbbbbb (base) 1 report, total time 41.00s
|
|
* cccccccccc 1 report, total time 43.00s
|
|
|
|
|
:
|
|
|
|
Removed (across 1 suite) 1 test, totaling - 1.00s
|
|
Modified (across 1 suite) 1 test, totaling - 41.48s ± 2.12s
|
|
Added (across 1 suite) 1 test, totaling + 3.00s
|
|
''',
|
|
print_test_stats.regression_info(
|
|
head_sha=fakehash('a'),
|
|
head_report=makereport({
|
|
'Foo': [
|
|
makecase('test_foo', 0.02, skipped=True),
|
|
makecase('test_baz', 3),
|
|
]}),
|
|
base_reports={
|
|
fakehash('b'): [
|
|
makereport({
|
|
'Foo': [
|
|
makecase('test_foo', 40),
|
|
makecase('test_bar', 1),
|
|
],
|
|
}),
|
|
],
|
|
fakehash('c'): [
|
|
makereport({
|
|
'Foo': [
|
|
makecase('test_foo', 43),
|
|
],
|
|
}),
|
|
],
|
|
},
|
|
job_name='foo_job',
|
|
on_master=False,
|
|
ancestry_path=0,
|
|
other_ancestors=0,
|
|
)
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
run_tests()
|