[BE] Fix bug in flaky test uploading

I noticed that the latest windows c10_dist_gloo flakiness did not get caught by flaky test bot. 238d01ec90 The issue was because c10_dist_gloo is a test file that spawns duplicates intentionally, and I had excluded those in my "duplicate flaky test rerun" calculation.

I've now updated the logic to handle when those intentional duplicated tests are flaky as well.

This PR also cleans up some past outdated logic.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/74712
Approved by: https://github.com/malfet
This commit is contained in:
Jane Xu 2022-03-30 19:20:44 +00:00 committed by PyTorch MergeBot
parent 9233af181f
commit b0d7cd0111

View File

@ -576,6 +576,10 @@ class TestCase:
self.class_name = str(dom.attributes['classname'].value)
self.name = str(dom.attributes['name'].value)
self.time = float(dom.attributes['time'].value)
# The following attribute is currently ONLY used in process_intentional_test_runs for validation
# reasons. The test filename that populates TestFile is calculated and passed down through the test report path.
# The reason we don't just use this attribute is because it doesn't exist for cpp tests, e.g., in test_libtorch
self.file = str(dom.attributes['file'].value) if dom.hasAttribute('file') else 'N/A - probably a cpp test'
error_elements = dom.getElementsByTagName('error')
# DISCLAIMER: unexpected successes and expected failures are currently not reported in assemble_s3_object
self.expected_failure = False
@ -601,9 +605,9 @@ class TestCase:
return self.__str__()
def __str__(self) -> str:
return f'[TestCase name: {self.name} | class_name: {self.class_name} | time: {self.time} | ' \
return f'[TestCase name: {self.name} | class_name: {self.class_name} | file: {self.file} | time: {self.time} | ' \
f'expected_failure: {self.expected_failure} | skipped: {self.skipped} | errored: {self.errored} | ' \
f'unexpected_success: {self.unexpected_success} | failed: {self.failed}]'
f'unexpected_success: {self.unexpected_success} | failed: {self.failed}]\n'
class TestSuite:
def __init__(self, name: str) -> None:
@ -644,6 +648,17 @@ class TestSuite:
self.test_cases[name].expected_failure |= test_case.expected_failure
# Tests that spawn duplicates (usually only twice) intentionally
MULTITESTS = [
'test_cpp_extensions_aot',
'distributed/test_distributed_spawn',
'distributed\\test_distributed_spawn', # for windows
'distributed/test_c10d_gloo',
'distributed\\test_c10d_gloo', # for windows
'cpp' # The caffe2 cpp tests spawn duplicate test cases as well.
]
DuplicatedDict = Dict[str, Dict[str, List[TestCase]]]
class TestFile:
@ -653,27 +668,20 @@ class TestFile:
self.test_suites: Dict[str, TestSuite] = dict()
def append(self, test_case: TestCase, test_type: str, duplicated_tests_dict: DuplicatedDict) -> None:
is_multi_test = self.name == 'test_cpp_extensions_aot' or \
self.name == 'distributed/test_distributed_spawn' or \
self.name == 'distributed/test_c10d_gloo' or \
self.name == 'cpp' # The caffe2 cpp tests spawn duplicate test cases as well.
if is_multi_test:
suite_name = test_case.class_name + '__' + test_type
else:
suite_name = test_case.class_name
suite_name = test_case.class_name
if suite_name not in self.test_suites:
self.test_suites[suite_name] = TestSuite(suite_name)
if test_case.name in self.test_suites[suite_name].test_cases:
if is_multi_test:
if self.name in MULTITESTS:
self.test_suites[suite_name].update(test_case)
self.total_time += test_case.time
else:
# Gather up duplicated test cases
if suite_name not in duplicated_tests_dict:
duplicated_tests_dict[suite_name] = dict()
if test_case.name not in duplicated_tests_dict[suite_name]:
duplicated_tests_dict[suite_name][test_case.name] = [self.test_suites[suite_name].test_cases[test_case.name]]
duplicated_tests_dict[suite_name][test_case.name].append(test_case)
# Gather up duplicated test cases to parse for flaky reruns
if suite_name not in duplicated_tests_dict:
duplicated_tests_dict[suite_name] = dict()
if test_case.name not in duplicated_tests_dict[suite_name]:
duplicated_tests_dict[suite_name][test_case.name] = [self.test_suites[suite_name].test_cases[test_case.name]]
duplicated_tests_dict[suite_name][test_case.name].append(test_case)
else:
self.test_suites[suite_name].append(test_case)
self.total_time += test_case.time
@ -743,17 +751,9 @@ def process_intentional_test_runs(runs: List[TestCase]) -> Tuple[int, int]:
else:
num_pass += 1
REPEAT_TEST_FOR_TYPES_TESTS = [
"test_data_parallel_module",
"test_data_parallel_module_kwargs_only",
"test_data_parallel_module_kwargs_only_empty_list",
"test_data_parallel_module_kwargs_only_empty_dict",
"test_data_parallel_module_kwargs_only_empty_tuple"
]
# Do not run checks for tests that use repeat_test_for_types decorator as they do not go well with our retry
# functionality. Once issue https://github.com/pytorch/pytorch/issues/69865 is fixed, we should remove the exception
if not any([x in test_run.name for x in REPEAT_TEST_FOR_TYPES_TESTS]):
# Do not run duplication checks for test files that spawn duplicate tests intentionally
# and are not necessarily flaky test reruns.
if not any(x in test_run.file for x in MULTITESTS):
err_msg = f'Warning: unintentional test case duplicates found for {test_run.name} in suite {test_run.class_name}.'
report_only = os.getenv('PYTORCH_OVERRIDE_FLAKY_SIGNAL') != '1'
if report_only and num_fail + num_errored + num_unexpected_success < 1 or not report_only and num_expected_fail < 1:
@ -780,7 +780,7 @@ def assemble_flaky_test_stats(duplicated_tests_by_file: Dict[str, DuplicatedDict
for suite_name, testcase_to_runs in suite_to_dict.items():
for testcase_name, list_of_runs in testcase_to_runs.items():
num_green, num_red = process_intentional_test_runs(list_of_runs)
if num_green > 0: # Otherwise, it's likely just a failing test
if num_green > 0 and num_red > 0: # Flaky tests show different results in consecutive reruns
flaky_tests.append({
"name": testcase_name,
"suite": suite_name,