Add script to export a JSON of slow test case times (#54907)

Summary: This PR introduces a script to spit our a list of slow tests into a file `.pytorch-slow-tests`. The format is currently JSON, and is simply a dictionary with entries that look like: `("test_case_name (__main__.test_suite)" -> average time in seconds)`. This is one of the steps in maintaining a list of slow tests so we could retire the manual slowTest labeling process. The script reads data from the previous day's viable/strict's data (to ensure we have fully uploaded data), and aggregates the test times for **passed** test cases. It then filters the individual test cases to exclude those faster than 60 seconds. Pull Request resolved: https://github.com/pytorch/pytorch/pull/54907 Test Plan: `python tools/export_slow_test.py` Check that `.pytorch-slow-tests` contains data. Mine looks like: ``` { "test_matmul_4d_4d_complex_cpu (__main__.TestAutogradDeviceTypeCPU)": 91.22675, "test_unary_ops (__main__.TestTEFuser)": 68.6, "test_fn_gradgrad_unfold_cpu_complex128 (__main__.TestGradientsCPU)": 82.49153333333334, "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 94.0914375, "test_ddp_uneven_inputs (__main__.TestDistBackendWithFork)": 134.4995, "test_pdist_norm_large_cuda (__main__.TestTorchDeviceTypeCUDA)": 60.2634, "test_cusparse_multiple_threads_same_device (__main__.TestCuda)": 97.9022, "test_fn_gradgrad_unfold_cuda_complex128 (__main__.TestGradientsCUDA)": 130.7222, "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 136.08133333333333, "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 112.80733333333333, "test_lobpcg_ortho_cuda_float64 (__main__.TestLinalgCUDA)": 63.8312, "test_matmul_4d_4d_complex_cuda (__main__.TestAutogradDeviceTypeCUDA)": 62.1062, "test_inverse_many_batches_cuda_complex128 (__main__.TestLinalgCUDA)": 1434.505, "test_inverse_many_batches_cuda_complex64 (__main__.TestLinalgCUDA)": 1403.846, "test_inverse_many_batches_cuda_float32 (__main__.TestLinalgCUDA)": 2081.614, "test_inverse_many_batches_cuda_float64 (__main__.TestLinalgCUDA)": 1410.788, "test_matrix_exp_analytic_cuda_complex128 (__main__.TestLinalgCUDA)": 172.167, "test_matrix_exp_analytic_cuda_complex64 (__main__.TestLinalgCUDA)": 172.57, "test_matrix_exp_analytic_cuda_float32 (__main__.TestLinalgCUDA)": 258.61, "test_matrix_exp_analytic_cuda_float64 (__main__.TestLinalgCUDA)": 174.793, "test_inverse_many_batches_cpu_complex128 (__main__.TestLinalgCPU)": 666.464, "test_inverse_many_batches_cpu_complex64 (__main__.TestLinalgCPU)": 667.26, "test_inverse_many_batches_cpu_float32 (__main__.TestLinalgCPU)": 1100.719, "test_inverse_many_batches_cpu_float64 (__main__.TestLinalgCPU)": 651.037, "test_matrix_exp_analytic_cpu_complex128 (__main__.TestLinalgCPU)": 72.965, "test_matrix_exp_analytic_cpu_complex64 (__main__.TestLinalgCPU)": 74.184, "test_matrix_exp_analytic_cpu_float32 (__main__.TestLinalgCPU)": 128.768, "test_matrix_exp_analytic_cpu_float64 (__main__.TestLinalgCPU)": 72.138, "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 123.728, "test_fn_gradgrad_linalg_householder_product_cuda_complex128 (__main__.TestGradientsCUDA)": 60.708, "test_lobpcg (__main__.TestAutograd)": 120.408, "test_collect_callgrind (__main__.TestBenchmarkUtils)": 206.896, "test_collect_cpp_callgrind (__main__.TestBenchmarkUtils)": 122.507, "test_proper_exit (__main__.TestDataLoader)": 172.356, "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 172.02, "testNBit (__main__.operator_test.fused_nbit_rowwise_conversion_ops_test.TestNBitGreedyFused)": 96.9435, "IntegerDivider (__main__.TestCUDAIntegerDivider)": 156.73700000000002 } ``` Reviewed By: walterddr, malfet Differential Revision: D27412861 Pulled By: janeyx99 fbshipit-source-id: ec3d327e0dc6c93093e8b1c8454e3166b0649909
2025-12-06 12:20:52 +01:00 · 2021-03-29 20:43:07 -07:00 · 2021-03-29 20:43:07 -07:00 · 5c12d97d96
commit 5c12d97d96
parent a1bd7918cc
3 changed files with 71 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ coverage.xml
 .hypothesis
 .mypy_cache
 **/.pytorch-test-times
+**/.pytorch-slow-tests
 */*.pyc
 */*.so*
 */**/__pycache__
--- a/tools/export_slow_tests.py
+++ b/tools/export_slow_tests.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import statistics
+from collections import defaultdict
+from tools.stats_utils.s3_stat_parser import get_previous_reports_for_branch, Report, Version2Report
+from typing import cast, DefaultDict, Dict, List
+
+SLOW_TESTS_FILE = '.pytorch-slow-tests'
+SLOW_TEST_CASE_THRESHOLD_SEC = 60.0
+
+
+def get_test_case_times() -> Dict[str, float]:
+    reports: List[Report] = get_previous_reports_for_branch('origin/viable/strict', "")
+    # an entry will be like ("test_doc_examples (__main__.TestTypeHints)" -> [values]))
+    test_names_to_times: DefaultDict[str, List[float]] = defaultdict(list)
+    for report in reports:
+        if report.get('format_version', 1) != 2:
+            raise RuntimeError("S3 format currently handled is version 2 only")
+        v2report = cast(Version2Report, report)
+        for test_file in v2report['files'].values():
+            for suitename, test_suite in test_file['suites'].items():
+                for casename, test_case in test_suite['cases'].items():
+                    # The below attaches a __main__ as that matches the format of test.__class__ in
+                    # common_utils.py (where this data will be used), and also matches what the output
+                    # of a running test would look like.
+                    name = f'{casename} (__main__.{suitename})'
+                    succeeded: bool = test_case['status'] is None
+                    if succeeded:
+                        test_names_to_times[name].append(test_case['seconds'])
+    return {test_case: statistics.mean(times) for test_case, times in test_names_to_times.items()}
+
+
+def filter_slow_tests(test_cases_dict: Dict[str, float]) -> Dict[str, float]:
+    return {test_case: time for test_case, time in test_cases_dict.items() if time >= SLOW_TEST_CASE_THRESHOLD_SEC}
+
+
+def export_slow_tests(filename: str) -> None:
+    if os.path.exists(filename):
+        print(f'Overwriting existent file: {filename}')
+    with open(filename, 'w+') as file:
+        slow_test_times: Dict[str, float] = filter_slow_tests(get_test_case_times())
+        json.dump(slow_test_times, file, indent='    ', separators=(',', ': '))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Export a JSON of slow test cases in PyTorch unit test suite')
+    parser.add_argument(
+        '-f',
+        '--filename',
+        nargs='?',
+        type=str,
+        default=SLOW_TESTS_FILE,
+        const=SLOW_TESTS_FILE,
+        help='Specify a file path to dump slow test times from previous S3 stats. Default file path: .pytorch-slow-tests',
+    )
+    return parser.parse_args()
+
+
+def main():
+    options = parse_args()
+    export_slow_tests(options.filename)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/stats_utils/s3_stat_parser.py
+++ b/tools/stats_utils/s3_stat_parser.py
@ -189,6 +189,6 @@ def get_previous_reports_for_branch(branch: str, ci_job_prefix: str = "") -> Lis
        for job_name, summary in summaries.items():
            reports.append(summary[0])
            if len(summary) > 1:
-                logger.info(f'Warning: multiple summary objects found for {commit}/{job_name}')
+                logger.warning(f'WARNING: Multiple summary objects found for {commit}/{job_name}')
        commit_index += 1
    return reports