Add script to export a JSON of slow test case times (#54907)

Summary:
This PR introduces a script to spit our a list of slow tests into a file `.pytorch-slow-tests`. The format is currently JSON, and is simply a dictionary with entries that look like: `("test_case_name (__main__.test_suite)" -> average time in seconds)`. This is one of the steps in maintaining a list of slow tests so we could retire the manual slowTest labeling process.

The script reads data from the previous day's viable/strict's data (to ensure we have fully uploaded data), and aggregates the test times for **passed** test cases. It then filters the individual test cases to exclude those faster than 60 seconds.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/54907

Test Plan:
`python tools/export_slow_test.py`
Check that `.pytorch-slow-tests` contains data. Mine looks like:
```
{
    "test_matmul_4d_4d_complex_cpu (__main__.TestAutogradDeviceTypeCPU)": 91.22675,
    "test_unary_ops (__main__.TestTEFuser)": 68.6,
    "test_fn_gradgrad_unfold_cpu_complex128 (__main__.TestGradientsCPU)": 82.49153333333334,
    "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 94.0914375,
    "test_ddp_uneven_inputs (__main__.TestDistBackendWithFork)": 134.4995,
    "test_pdist_norm_large_cuda (__main__.TestTorchDeviceTypeCUDA)": 60.2634,
    "test_cusparse_multiple_threads_same_device (__main__.TestCuda)": 97.9022,
    "test_fn_gradgrad_unfold_cuda_complex128 (__main__.TestGradientsCUDA)": 130.7222,
    "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 136.08133333333333,
    "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 112.80733333333333,
    "test_lobpcg_ortho_cuda_float64 (__main__.TestLinalgCUDA)": 63.8312,
    "test_matmul_4d_4d_complex_cuda (__main__.TestAutogradDeviceTypeCUDA)": 62.1062,
    "test_inverse_many_batches_cuda_complex128 (__main__.TestLinalgCUDA)": 1434.505,
    "test_inverse_many_batches_cuda_complex64 (__main__.TestLinalgCUDA)": 1403.846,
    "test_inverse_many_batches_cuda_float32 (__main__.TestLinalgCUDA)": 2081.614,
    "test_inverse_many_batches_cuda_float64 (__main__.TestLinalgCUDA)": 1410.788,
    "test_matrix_exp_analytic_cuda_complex128 (__main__.TestLinalgCUDA)": 172.167,
    "test_matrix_exp_analytic_cuda_complex64 (__main__.TestLinalgCUDA)": 172.57,
    "test_matrix_exp_analytic_cuda_float32 (__main__.TestLinalgCUDA)": 258.61,
    "test_matrix_exp_analytic_cuda_float64 (__main__.TestLinalgCUDA)": 174.793,
    "test_inverse_many_batches_cpu_complex128 (__main__.TestLinalgCPU)": 666.464,
    "test_inverse_many_batches_cpu_complex64 (__main__.TestLinalgCPU)": 667.26,
    "test_inverse_many_batches_cpu_float32 (__main__.TestLinalgCPU)": 1100.719,
    "test_inverse_many_batches_cpu_float64 (__main__.TestLinalgCPU)": 651.037,
    "test_matrix_exp_analytic_cpu_complex128 (__main__.TestLinalgCPU)": 72.965,
    "test_matrix_exp_analytic_cpu_complex64 (__main__.TestLinalgCPU)": 74.184,
    "test_matrix_exp_analytic_cpu_float32 (__main__.TestLinalgCPU)": 128.768,
    "test_matrix_exp_analytic_cpu_float64 (__main__.TestLinalgCPU)": 72.138,
    "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 123.728,
    "test_fn_gradgrad_linalg_householder_product_cuda_complex128 (__main__.TestGradientsCUDA)": 60.708,
    "test_lobpcg (__main__.TestAutograd)": 120.408,
    "test_collect_callgrind (__main__.TestBenchmarkUtils)": 206.896,
    "test_collect_cpp_callgrind (__main__.TestBenchmarkUtils)": 122.507,
    "test_proper_exit (__main__.TestDataLoader)": 172.356,
    "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 172.02,
    "testNBit (__main__.operator_test.fused_nbit_rowwise_conversion_ops_test.TestNBitGreedyFused)": 96.9435,
    "IntegerDivider (__main__.TestCUDAIntegerDivider)": 156.73700000000002
}
```

Reviewed By: walterddr, malfet

Differential Revision: D27412861

Pulled By: janeyx99

fbshipit-source-id: ec3d327e0dc6c93093e8b1c8454e3166b0649909
This commit is contained in:
Jane Xu 2021-03-29 20:43:07 -07:00 committed by Facebook GitHub Bot
parent a1bd7918cc
commit 5c12d97d96
3 changed files with 71 additions and 1 deletions

1
.gitignore vendored
View File

@ -15,6 +15,7 @@ coverage.xml
.hypothesis
.mypy_cache
**/.pytorch-test-times
**/.pytorch-slow-tests
*/*.pyc
*/*.so*
*/**/__pycache__

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
import argparse
import json
import os
import statistics
from collections import defaultdict
from tools.stats_utils.s3_stat_parser import get_previous_reports_for_branch, Report, Version2Report
from typing import cast, DefaultDict, Dict, List
SLOW_TESTS_FILE = '.pytorch-slow-tests'
SLOW_TEST_CASE_THRESHOLD_SEC = 60.0
def get_test_case_times() -> Dict[str, float]:
reports: List[Report] = get_previous_reports_for_branch('origin/viable/strict', "")
# an entry will be like ("test_doc_examples (__main__.TestTypeHints)" -> [values]))
test_names_to_times: DefaultDict[str, List[float]] = defaultdict(list)
for report in reports:
if report.get('format_version', 1) != 2:
raise RuntimeError("S3 format currently handled is version 2 only")
v2report = cast(Version2Report, report)
for test_file in v2report['files'].values():
for suitename, test_suite in test_file['suites'].items():
for casename, test_case in test_suite['cases'].items():
# The below attaches a __main__ as that matches the format of test.__class__ in
# common_utils.py (where this data will be used), and also matches what the output
# of a running test would look like.
name = f'{casename} (__main__.{suitename})'
succeeded: bool = test_case['status'] is None
if succeeded:
test_names_to_times[name].append(test_case['seconds'])
return {test_case: statistics.mean(times) for test_case, times in test_names_to_times.items()}
def filter_slow_tests(test_cases_dict: Dict[str, float]) -> Dict[str, float]:
return {test_case: time for test_case, time in test_cases_dict.items() if time >= SLOW_TEST_CASE_THRESHOLD_SEC}
def export_slow_tests(filename: str) -> None:
if os.path.exists(filename):
print(f'Overwriting existent file: {filename}')
with open(filename, 'w+') as file:
slow_test_times: Dict[str, float] = filter_slow_tests(get_test_case_times())
json.dump(slow_test_times, file, indent=' ', separators=(',', ': '))
def parse_args():
parser = argparse.ArgumentParser(
description='Export a JSON of slow test cases in PyTorch unit test suite')
parser.add_argument(
'-f',
'--filename',
nargs='?',
type=str,
default=SLOW_TESTS_FILE,
const=SLOW_TESTS_FILE,
help='Specify a file path to dump slow test times from previous S3 stats. Default file path: .pytorch-slow-tests',
)
return parser.parse_args()
def main():
options = parse_args()
export_slow_tests(options.filename)
if __name__ == '__main__':
main()

View File

@ -189,6 +189,6 @@ def get_previous_reports_for_branch(branch: str, ci_job_prefix: str = "") -> Lis
for job_name, summary in summaries.items():
reports.append(summary[0])
if len(summary) > 1:
logger.info(f'Warning: multiple summary objects found for {commit}/{job_name}')
logger.warning(f'WARNING: Multiple summary objects found for {commit}/{job_name}')
commit_index += 1
return reports