diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 364675b9011..0d5873d357f 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -172,19 +172,24 @@ test_libtorch() { # Start background download python tools/download_mnist.py --quiet -d test/cpp/api/mnist & + # Make test_reports directory + # NB: the ending test_libtorch must match the current function name for the current + # test reporting process (in print_test_stats.py) to function as expected. + TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch + mkdir -p $TEST_REPORTS_DIR + # Run JIT cpp tests - mkdir -p test/test-reports/cpp-unittest python test/cpp/jit/tests_setup.py setup if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then - build/bin/test_jit --gtest_output=xml:test/test-reports/cpp-unittest/test_jit.xml + build/bin/test_jit --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml else - build/bin/test_jit --gtest_filter='-*CUDA' --gtest_output=xml:test/test-reports/cpp-unittest/test_jit.xml + build/bin/test_jit --gtest_filter='-*CUDA' --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml fi python test/cpp/jit/tests_setup.py shutdown # Wait for background download to finish wait - OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:test/test-reports/cpp-unittest/test_api.xml - build/bin/test_tensorexpr --gtest_output=xml:test/test-reports/cpp-unittests/test_tensorexpr.xml + OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml + build/bin/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml assert_git_not_dirty fi } @@ -192,30 +197,39 @@ test_libtorch() { test_vulkan() { if [[ "$BUILD_ENVIRONMENT" == *vulkan-linux* ]]; then export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/build/Linux/vk_swiftshader_icd.json - mkdir -p test/test-reports/cpp-vulkan - build/bin/vulkan_test --gtest_output=xml:test/test-reports/cpp-vulkan/vulkan_test.xml + # NB: the ending test_vulkan must match the current function name for the current + # test reporting process (in print_test_stats.py) to function as expected. + TEST_REPORTS_DIR=test/test-reports/cpp-vulkan/test_vulkan + mkdir -p $TEST_REPORTS_DIR + build/bin/vulkan_test --gtest_output=xml:$TEST_REPORTS_DIR/vulkan_test.xml fi } test_distributed() { if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then echo "Testing distributed C++ tests" - mkdir -p test/test-reports/cpp-distributed - build/bin/FileStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/FileStoreTest.xml - build/bin/HashStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/HashStoreTest.xml - build/bin/TCPStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/TCPStoreTest.xml + # NB: the ending test_distributed must match the current function name for the current + # test reporting process (in print_test_stats.py) to function as expected. + TEST_REPORTS_DIR=test/test-reports/cpp-distributed/test_distributed + mkdir -p $TEST_REPORTS_DIR + build/bin/FileStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/FileStoreTest.xml + build/bin/HashStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/HashStoreTest.xml + build/bin/TCPStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/TCPStoreTest.xml - build/bin/ProcessGroupGlooTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupGlooTest.xml - build/bin/ProcessGroupNCCLTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLTest.xml - build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLErrorsTest.xml + build/bin/ProcessGroupGlooTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupGlooTest.xml + build/bin/ProcessGroupNCCLTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupNCCLTest.xml + build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupNCCLErrorsTest.xml fi } test_rpc() { if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then echo "Testing RPC C++ tests" - mkdir -p test/test-reports/cpp-rpc - build/bin/test_cpp_rpc --gtest_output=xml:test/test-reports/cpp-rpc/test_cpp_rpc.xml + # NB: the ending test_rpc must match the current function name for the current + # test reporting process (in print_test_stats.py) to function as expected. + TEST_REPORTS_DIR=test/test-reports/cpp-rpc/test_rpc + mkdir -p $TEST_REPORTS_DIR + build/bin/test_cpp_rpc --gtest_output=xml:$TEST_REPORTS_DIR/test_cpp_rpc.xml fi } diff --git a/test/test_testing.py b/test/test_testing.py index 588979f652c..e1722ba785c 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -649,6 +649,17 @@ def fakehash(char): return char * 40 +def dummy_meta_meta() -> print_test_stats.ReportMetaMeta: + return { + 'build_pr': '', + 'build_tag': '', + 'build_sha1': '', + 'build_branch': '', + 'build_job': '', + 'build_workflow_id': '', + } + + def makecase(name, seconds, *, errored=False, failed=False, skipped=False): return { 'name': name, @@ -659,7 +670,7 @@ def makecase(name, seconds, *, errored=False, failed=False, skipped=False): } -def makereport(tests): +def make_report_v1(tests) -> print_test_stats.Version1Report: suites = { suite_name: { 'total_seconds': sum(case['seconds'] for case in cases), @@ -668,59 +679,201 @@ def makereport(tests): for suite_name, cases in tests.items() } return { + **dummy_meta_meta(), 'total_seconds': sum(s['total_seconds'] for s in suites.values()), 'suites': suites, } +def make_case_v2(seconds, status=None) -> print_test_stats.Version2Case: + return { + 'seconds': seconds, + 'status': status, + } + + +def make_report_v2(tests) -> print_test_stats.Version2Report: + files = {} + for file_name, file_suites in tests.items(): + suites = { + suite_name: { + 'total_seconds': sum(case['seconds'] for case in cases.values()), + 'cases': cases, + } + for suite_name, cases in file_suites.items() + } + files[file_name] = { + 'suites': suites, + 'total_seconds': sum(suite['total_seconds'] for suite in suites.values()), + } + return { + **dummy_meta_meta(), + 'format_version': 2, + 'total_seconds': sum(s['total_seconds'] for s in files.values()), + 'files': files, + } + + class TestPrintTestStats(TestCase): maxDiff = None - def test_analysis(self): - head_report = makereport({ - # input ordering of the suites is ignored - 'Grault': [ - # not printed: status same and time similar - makecase('test_grault0', 4.78, failed=True), - # status same, but time increased a lot - makecase('test_grault2', 1.473, errored=True), - ], - # individual tests times changed, not overall suite - 'Qux': [ - # input ordering of the test cases is ignored - makecase('test_qux1', 0.001, skipped=True), - makecase('test_qux6', 0.002, skipped=True), - # time in bounds, but status changed - makecase('test_qux4', 7.158, failed=True), - # not printed because it's the same as before - makecase('test_qux7', 0.003, skipped=True), - makecase('test_qux5', 11.968), - makecase('test_qux3', 23.496), - ], - # new test suite - 'Bar': [ - makecase('test_bar2', 3.742, failed=True), - makecase('test_bar1', 50.447), - ], - # overall suite time changed but no individual tests - 'Norf': [ - makecase('test_norf1', 3), - makecase('test_norf2', 3), - makecase('test_norf3', 3), - makecase('test_norf4', 3), - ], - # suite doesn't show up if it doesn't change enough - 'Foo': [ - makecase('test_foo1', 42), - makecase('test_foo2', 56), - ], + version1_report: print_test_stats.Version1Report = make_report_v1({ + # input ordering of the suites is ignored + 'Grault': [ + # not printed: status same and time similar + makecase('test_grault0', 4.78, failed=True), + # status same, but time increased a lot + makecase('test_grault2', 1.473, errored=True), + ], + # individual tests times changed, not overall suite + 'Qux': [ + # input ordering of the test cases is ignored + makecase('test_qux1', 0.001, skipped=True), + makecase('test_qux6', 0.002, skipped=True), + # time in bounds, but status changed + makecase('test_qux4', 7.158, failed=True), + # not printed because it's the same as before + makecase('test_qux7', 0.003, skipped=True), + makecase('test_qux5', 11.968), + makecase('test_qux3', 23.496), + ], + # new test suite + 'Bar': [ + makecase('test_bar2', 3.742, failed=True), + makecase('test_bar1', 50.447), + ], + # overall suite time changed but no individual tests + 'Norf': [ + makecase('test_norf1', 3), + makecase('test_norf2', 3), + makecase('test_norf3', 3), + makecase('test_norf4', 3), + ], + # suite doesn't show up if it doesn't change enough + 'Foo': [ + makecase('test_foo1', 42), + makecase('test_foo2', 56), + ], + }) + + version2_report: print_test_stats.Version2Report = make_report_v2( + { + 'test_a': { + 'Grault': { + 'test_grault0': make_case_v2(4.78, 'failed'), + 'test_grault2': make_case_v2(1.473, 'errored'), + }, + 'Qux': { + 'test_qux1': make_case_v2(0.001, 'skipped'), + 'test_qux6': make_case_v2(0.002, 'skipped'), + 'test_qux4': make_case_v2(7.158, 'failed'), + 'test_qux7': make_case_v2(0.003, 'skipped'), + 'test_qux8': make_case_v2(11.968), + 'test_qux3': make_case_v2(23.496), + } + }, + 'test_b': { + 'Bar': { + 'test_bar2': make_case_v2(3.742, 'failed'), + 'test_bar1': make_case_v2(50.447), + }, + # overall suite time changed but no individual tests + 'Norf': { + 'test_norf1': make_case_v2(3), + 'test_norf2': make_case_v2(3), + 'test_norf3': make_case_v2(3), + 'test_norf4': make_case_v2(3), + }, + }, + 'test_c': { + 'Foo': { + 'test_foo1': make_case_v2(42), + 'test_foo2': make_case_v2(56), + }, + } }) + def test_simplify(self): + self.assertEqual( + { + '': { + 'Bar': { + 'test_bar1': {'seconds': 50.447, 'status': None}, + 'test_bar2': {'seconds': 3.742, 'status': 'failed'}, + }, + 'Foo': { + 'test_foo1': {'seconds': 42, 'status': None}, + 'test_foo2': {'seconds': 56, 'status': None}, + }, + 'Grault': { + 'test_grault0': {'seconds': 4.78, 'status': 'failed'}, + 'test_grault2': {'seconds': 1.473, 'status': 'errored'}, + }, + 'Norf': { + 'test_norf1': {'seconds': 3, 'status': None}, + 'test_norf3': {'seconds': 3, 'status': None}, + 'test_norf2': {'seconds': 3, 'status': None}, + 'test_norf4': {'seconds': 3, 'status': None}, + }, + 'Qux': { + 'test_qux1': {'seconds': 0.001, 'status': 'skipped'}, + 'test_qux3': {'seconds': 23.496, 'status': None}, + 'test_qux4': {'seconds': 7.158, 'status': 'failed'}, + 'test_qux5': {'seconds': 11.968, 'status': None}, + 'test_qux6': {'seconds': 0.002, 'status': 'skipped'}, + 'test_qux7': {'seconds': 0.003, 'status': 'skipped'}, + }, + }, + }, + print_test_stats.simplify(self.version1_report) + ) + + self.assertEqual( + { + 'test_a': { + 'Grault': { + 'test_grault0': {'seconds': 4.78, 'status': 'failed'}, + 'test_grault2': {'seconds': 1.473, 'status': 'errored'}, + }, + 'Qux': { + 'test_qux1': {'seconds': 0.001, 'status': 'skipped'}, + 'test_qux3': {'seconds': 23.496, 'status': None}, + 'test_qux4': {'seconds': 7.158, 'status': 'failed'}, + 'test_qux6': {'seconds': 0.002, 'status': 'skipped'}, + 'test_qux7': {'seconds': 0.003, 'status': 'skipped'}, + 'test_qux8': {'seconds': 11.968, 'status': None}, + }, + }, + 'test_b': { + 'Bar': { + 'test_bar1': {'seconds': 50.447, 'status': None}, + 'test_bar2': {'seconds': 3.742, 'status': 'failed'}, + }, + 'Norf': { + 'test_norf1': {'seconds': 3, 'status': None}, + 'test_norf2': {'seconds': 3, 'status': None}, + 'test_norf3': {'seconds': 3, 'status': None}, + 'test_norf4': {'seconds': 3, 'status': None}, + }, + }, + 'test_c': { + 'Foo': { + 'test_foo1': {'seconds': 42, 'status': None}, + 'test_foo2': {'seconds': 56, 'status': None}, + }, + }, + }, + print_test_stats.simplify(self.version2_report), + ) + + def test_analysis(self): + head_report = self.version1_report + base_reports = { # bbbb has no reports, so base is cccc instead fakehash('b'): [], fakehash('c'): [ - makereport({ + make_report_v1({ 'Baz': [ makecase('test_baz2', 13.605), # no recent suites have & skip this test @@ -753,7 +906,7 @@ class TestPrintTestStats(TestCase): }), ], fakehash('d'): [ - makereport({ + make_report_v1({ 'Foo': [ makecase('test_foo1', 40), # removed in cccc @@ -783,7 +936,7 @@ class TestPrintTestStats(TestCase): ], fakehash('e'): [], fakehash('f'): [ - makereport({ + make_report_v1({ 'Foo': [ makecase('test_foo3', 24), makecase('test_foo1', 43), @@ -1066,14 +1219,14 @@ Added (across 1 suite) 1 test, totaling + 3.00s ''', print_test_stats.regression_info( head_sha=fakehash('a'), - head_report=makereport({ + head_report=make_report_v1({ 'Foo': [ makecase('test_foo', 0.02, skipped=True), makecase('test_baz', 3), ]}), base_reports={ fakehash('b'): [ - makereport({ + make_report_v1({ 'Foo': [ makecase('test_foo', 40), makecase('test_bar', 1), @@ -1081,7 +1234,7 @@ Added (across 1 suite) 1 test, totaling + 3.00s }), ], fakehash('c'): [ - makereport({ + make_report_v1({ 'Foo': [ makecase('test_foo', 43), ], @@ -1135,7 +1288,7 @@ Added (across 1 suite) 2 tests, totaling + 3.02s ''', print_test_stats.regression_info( head_sha=fakehash('a'), - head_report=makereport({ + head_report=make_report_v1({ 'Foo': [ makecase('test_foo', 0.02, skipped=True), makecase('test_baz', 3), diff --git a/test/test_utils.py b/test/test_utils.py index 49d662d8f4c..78ca0fc8b3e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -289,7 +289,7 @@ class TestCheckpoint(TestCase): out = checkpoint(run_fn2, input_var, input_var2) out.sum().backward() -class TestDataLoader(TestCase): +class TestDataLoaderUtils(TestCase): def setUp(self): self.dataset = torch.randn(5, 3, 3, 2) self.batch_size = 3 diff --git a/tools/test_history.py b/tools/test_history.py index e941f148bfe..352d9c7b17d 100755 --- a/tools/test_history.py +++ b/tools/test_history.py @@ -6,11 +6,11 @@ import json import subprocess from collections import defaultdict from datetime import datetime -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast import boto3 # type: ignore[import] import botocore # type: ignore[import] -from typing_extensions import TypedDict +from typing_extensions import Literal, TypedDict def get_git_commit_history( @@ -36,31 +36,70 @@ def get_object_summaries(*, bucket: Any, sha: str) -> Dict[str, List[Any]]: return dict(by_job) -class Case(TypedDict): - name: str +# TODO: consolidate these typedefs with the identical ones in +# torch/testing/_internal/print_test_stats.py + +Commit = str # 40-digit SHA-1 hex string +Status = Optional[Literal['errored', 'failed', 'skipped']] + + +class CaseMeta(TypedDict): seconds: float + + +class Version1Case(CaseMeta): + name: str errored: bool failed: bool skipped: bool -class Suite(TypedDict): +class Version1Suite(TypedDict): total_seconds: float - cases: List[Case] + cases: List[Version1Case] -class ReportMeta(TypedDict): +class ReportMetaMeta(TypedDict): build_pr: str build_tag: str - build_sha1: str + build_sha1: Commit build_branch: str build_job: str build_workflow_id: str -class Report(ReportMeta): +class ReportMeta(ReportMetaMeta): total_seconds: float - suites: Dict[str, Suite] + + +class Version1Report(ReportMeta): + suites: Dict[str, Version1Suite] + + +class Version2Case(CaseMeta): + status: Status + + +class Version2Suite(TypedDict): + total_seconds: float + cases: Dict[str, Version2Case] + + +class Version2File(TypedDict): + total_seconds: float + suites: Dict[str, Version2Suite] + + +class VersionedReport(ReportMeta): + format_version: int + + +# report: Version2Report implies report['format_version'] == 2 +class Version2Report(VersionedReport): + files: Dict[str, Version2File] + + +Report = Union[Version1Report, VersionedReport] def get_jsons( @@ -77,32 +116,63 @@ def get_jsons( } +# TODO: consolidate this with the case_status function from +# torch/testing/_internal/print_test_stats.py +def case_status(case: Version1Case) -> Status: + for k in {'errored', 'failed', 'skipped'}: + if case[k]: # type: ignore[misc] + return cast(Status, k) + return None + + +# TODO: consolidate this with the newify_case function from +# torch/testing/_internal/print_test_stats.py +def newify_case(case: Version1Case) -> Version2Case: + return { + 'seconds': case['seconds'], + 'status': case_status(case), + } + + +# TODO: consolidate this with the simplify function from +# torch/testing/_internal/print_test_stats.py def get_cases( *, data: Report, + filename: Optional[str], suite_name: Optional[str], test_name: str, -) -> List[Case]: - cases = [] - suites = data['suites'] - for name, suite in suites.items(): - if name == suite_name or not suite_name: - for case in suite['cases']: - if case['name'] == test_name: - cases.append(case) +) -> List[Version2Case]: + cases: List[Version2Case] = [] + if 'format_version' not in data: # version 1 implicitly + v1report = cast(Version1Report, data) + suites = v1report['suites'] + for sname, v1suite in suites.items(): + if sname == suite_name or not suite_name: + for v1case in v1suite['cases']: + if v1case['name'] == test_name: + cases.append(newify_case(v1case)) + else: + v_report = cast(VersionedReport, data) + version = v_report['format_version'] + if version == 2: + v2report = cast(Version2Report, v_report) + for fname, v2file in v2report['files'].items(): + if fname == filename or not filename: + for sname, v2suite in v2file['suites'].items(): + if sname == suite_name or not suite_name: + v2case = v2suite['cases'].get(test_name) + if v2case: + cases.append(v2case) + else: + raise RuntimeError(f'Unknown format version: {version}') return cases -def case_status(case: Case) -> Optional[str]: - for k in {'errored', 'failed', 'skipped'}: - if case[k]: # type: ignore[misc] - return k - return None - - def make_column( *, data: Optional[Report], + filename: Optional[str], suite_name: Optional[str], test_name: str, digits: int, @@ -112,12 +182,13 @@ def make_column( if data: cases = get_cases( data=data, + filename=filename, suite_name=suite_name, test_name=test_name ) if cases: case = cases[0] - status = case_status(case) + status = case['status'] omitted = len(cases) - 1 if status: return f'{status.rjust(num_length)} ', omitted @@ -134,6 +205,7 @@ def make_columns( jobs: List[str], jsons: Dict[str, Report], omitted: Dict[str, int], + filename: Optional[str], suite_name: Optional[str], test_name: str, digits: int, @@ -145,6 +217,7 @@ def make_columns( data = jsons.get(job) column, omitted_suites = make_column( data=data, + filename=filename, suite_name=suite_name, test_name=test_name, digits=digits, @@ -165,6 +238,7 @@ def make_lines( jobs: Set[str], jsons: Dict[str, Report], omitted: Dict[str, int], + filename: Optional[str], suite_name: Optional[str], test_name: str, ) -> List[str]: @@ -172,12 +246,13 @@ def make_lines( for job, data in jsons.items(): cases = get_cases( data=data, + filename=filename, suite_name=suite_name, test_name=test_name, ) if cases: case = cases[0] - status = case_status(case) + status = case['status'] line = f'{job} {case["seconds"]}s{f" {status}" if status else ""}' if job in omitted and omitted[job] > 0: line += f' ({omitted[job]} S3 reports omitted)' @@ -197,6 +272,7 @@ def display_history( bucket: Any, commits: List[Tuple[str, datetime]], jobs: Optional[List[str]], + filename: Optional[str], suite_name: Optional[str], test_name: str, delta: int, @@ -226,6 +302,7 @@ def display_history( jobs=jobs, jsons=jsons, omitted=omitted, + filename=filename, suite_name=suite_name, test_name=test_name, digits=digits, @@ -236,6 +313,7 @@ def display_history( jobs=set(jobs or []), jsons=jsons, omitted=omitted, + filename=filename, suite_name=suite_name, test_name=test_name, ) @@ -352,6 +430,10 @@ indicated test was not found in that report. action='store_true', help='(multiline) ignore listed jobs, show all jobs for each commit', ) + parser.add_argument( + '--file', + help='name of the file containing the test', + ) parser.add_argument( '--suite', help='name of the suite containing the test', @@ -381,6 +463,7 @@ indicated test was not found in that report. bucket=bucket, commits=commits, jobs=jobs, + filename=args.file, suite_name=args.suite, test_name=args.test, delta=args.delta, diff --git a/torch/testing/_internal/print_test_stats.py b/torch/testing/_internal/print_test_stats.py index 062c9bf7ffb..ce5d4e806f7 100755 --- a/torch/testing/_internal/print_test_stats.py +++ b/torch/testing/_internal/print_test_stats.py @@ -5,6 +5,7 @@ import datetime import json import math import os +import re import statistics import subprocess import time @@ -12,11 +13,11 @@ from collections import defaultdict from glob import glob from pathlib import Path from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional, - Tuple) + Set, Tuple, Union, cast) from xml.dom import minidom # type: ignore[import] import requests -from typing_extensions import TypedDict +from typing_extensions import Literal, TypedDict try: import boto3 # type: ignore[import] @@ -24,29 +25,30 @@ try: except ImportError: HAVE_BOTO3 = False +# TODO: consolidate these typedefs with the identical ones in +# tools/test_history.py + Commit = str # 40-digit SHA-1 hex string -Status = Optional[str] # errored, failed, skipped, or None - -# represent suite as dict because indexing is useful -SimplerCase = Tuple[float, Status] -SimplerSuite = Dict[str, SimplerCase] -SimplerReport = Dict[str, SimplerSuite] +Status = Optional[Literal['errored', 'failed', 'skipped']] -class Case(TypedDict): - name: str +class CaseMeta(TypedDict): seconds: float + + +class Version1Case(CaseMeta): + name: str errored: bool failed: bool skipped: bool -class Suite(TypedDict): +class Version1Suite(TypedDict): total_seconds: float - cases: List[Case] + cases: List[Version1Case] -class ReportMeta(TypedDict, total=False): +class ReportMetaMeta(TypedDict): build_pr: str build_tag: str build_sha1: Commit @@ -55,9 +57,42 @@ class ReportMeta(TypedDict, total=False): build_workflow_id: str -class Report(ReportMeta): +class ReportMeta(ReportMetaMeta): total_seconds: float - suites: Dict[str, Suite] + + +class Version1Report(ReportMeta): + suites: Dict[str, Version1Suite] + + +class Version2Case(CaseMeta): + status: Status + + +class Version2Suite(TypedDict): + total_seconds: float + cases: Dict[str, Version2Case] + + +class Version2File(TypedDict): + total_seconds: float + suites: Dict[str, Version2Suite] + + +class VersionedReport(ReportMeta): + format_version: int + + +# report: Version2Report implies report['format_version'] == 2 +class Version2Report(VersionedReport): + files: Dict[str, Version2File] + + +Report = Union[Version1Report, VersionedReport] + +SimplerSuite = Dict[str, Version2Case] +SimplerFile = Dict[str, SimplerSuite] +SimplerReport = Dict[str, SimplerFile] class Stat(TypedDict): @@ -69,7 +104,7 @@ class CaseDiff(TypedDict): margin: str name: str was: Optional[Tuple[Stat, Status]] - now: Optional[SimplerCase] + now: Optional[Version2Case] class SuiteDiff(TypedDict): @@ -80,23 +115,78 @@ class SuiteDiff(TypedDict): cases: List[CaseDiff] -def case_status(case: Case) -> Status: +# TODO: consolidate this with the case_status function from +# tools/test_history.py +def case_status(case: Version1Case) -> Status: for k in {'errored', 'failed', 'skipped'}: if case[k]: # type: ignore[misc] - return k + return cast(Status, k) return None -def simplify(report: Report) -> SimplerReport: +# TODO: consolidate this with the newify_case function from +# tools/test_history.py +def newify_case(case: Version1Case) -> Version2Case: return { - suite_name: { - case['name']: (case['seconds'], case_status(case)) - for case in suite['cases'] - } - for suite_name, suite in report['suites'].items() + 'seconds': case['seconds'], + 'status': case_status(case), } +# TODO: consolidate this with the get_cases function from +# tools/test_history.py + +# Here we translate to a three-layer format (file -> suite -> case) +# rather than a two-layer format (suite -> case) because as mentioned in +# a comment in the body of this function, if we consolidate suites that +# share a name, there will be test case name collisions, and once we +# have those, there's no clean way to deal with it in the diffing logic. +# It's not great to have to add a dummy empty string for the filename +# for version 1 reports, but it's better than either losing cases that +# share a name (for version 2 reports) or using a list of cases rather +# than a dict. +def simplify(report: Report) -> SimplerReport: + if 'format_version' not in report: # version 1 implicitly + v1report = cast(Version1Report, report) + return { + # we just don't have test filename information sadly, so we + # just make one fake filename that is the empty string + '': { + suite_name: { + # This clobbers some cases that have duplicate names + # because in version 1, we would merge together all + # the suites with a given name (even if they came + # from different files), so there were actually + # situations in which two cases in the same suite + # shared a name (because they actually originally + # came from two suites that were then merged). It + # would probably be better to warn about the cases + # that we're silently discarding here, but since + # we're only uploading in the new format (where + # everything is also keyed by filename) going + # forward, it shouldn't matter too much. + case['name']: newify_case(case) + for case in suite['cases'] + } + for suite_name, suite in v1report['suites'].items() + } + } + else: + v_report = cast(VersionedReport, report) + version = v_report['format_version'] + if version == 2: + v2report = cast(Version2Report, v_report) + return { + filename: { + suite_name: suite['cases'] + for suite_name, suite in file_data['suites'].items() + } + for filename, file_data in v2report['files'].items() + } + else: + raise RuntimeError(f'Unknown format version: {version}') + + def plural(n: int) -> str: return '' if n == 1 else 's' @@ -165,7 +255,9 @@ def unlines(lines: List[str]) -> str: def matching_test_times( + *, base_reports: Dict[Commit, List[SimplerReport]], + filename: str, suite_name: str, case_name: str, status: Status, @@ -173,13 +265,16 @@ def matching_test_times( times: List[float] = [] for reports in base_reports.values(): for report in reports: - suite = report.get(suite_name) - if suite: - case = suite.get(case_name) - if case: - t, s = case - if s == status: - times.append(t) + file_data = report.get(filename) + if file_data: + suite = file_data.get(suite_name) + if suite: + case = suite.get(case_name) + if case: + t = case['seconds'] + s = case['status'] + if s == status: + times.append(t) return times @@ -195,30 +290,43 @@ def analyze( # find all relevant suites (those in either base or head or both) all_reports = [head_report] + base_report - all_suites = {k for r in all_reports for k in r.keys()} + all_suites: Set[Tuple[str, str]] = { + (filename, suite_name) + for r in all_reports + for filename, file_data in r.items() + for suite_name in file_data.keys() + } removed_suites: List[SuiteDiff] = [] modified_suites: List[SuiteDiff] = [] added_suites: List[SuiteDiff] = [] - for suite_name in sorted(all_suites): + for filename, suite_name in sorted(all_suites): case_diffs: List[CaseDiff] = [] - head_suite = head_report.get(suite_name) + head_suite = head_report.get(filename, {}).get(suite_name) base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[ - {(n, s) for n, (_, s) in report.get(suite_name, {}).items()} + { + (n, case['status']) + for n, case + in report.get(filename, {}).get(suite_name, {}).items() + } for report in base_report ] or [set()]))) case_stats: Dict[str, Stat] = {} if head_suite: - now = sum(case[0] for case in head_suite.values()) - if any(suite_name in report for report in base_report): + now = sum(case['seconds'] for case in head_suite.values()) + if any( + filename in report and suite_name in report[filename] + for report in base_report + ): removed_cases: List[CaseDiff] = [] for case_name, case_status in base_cases.items(): case_stats[case_name] = list_stat(matching_test_times( - base_reports, - suite_name, - case_name, - case_status, + base_reports=base_reports, + filename=filename, + suite_name=suite_name, + case_name=case_name, + status=case_status, )) if case_name not in head_suite: removed_cases.append({ @@ -234,7 +342,7 @@ def analyze( if head_case_name in base_cases: stat = case_stats[head_case_name] base_status = base_cases[head_case_name] - if head_case[1] != base_status: + if head_case['status'] != base_status: modified_cases.append({ 'margin': '!', 'name': head_case_name, @@ -278,10 +386,11 @@ def analyze( else: for case_name, case_status in base_cases.items(): case_stats[case_name] = list_stat(matching_test_times( - base_reports, - suite_name, - case_name, - case_status, + base_reports=base_reports, + filename=filename, + suite_name=suite_name, + case_name=case_name, + status=case_status, )) case_diffs.append({ 'margin': ' ', @@ -316,9 +425,9 @@ def case_diff_lines(diff: CaseDiff) -> List[str]: now = diff['now'] if now: - now_stat: Stat = {'center': now[0], 'spread': None} + now_stat: Stat = {'center': now['seconds'], 'spread': None} now_line = f' # now {display_stat(now_stat, case_fmt)}' - now_status = now[1] + now_status = now['status'] if now_status: now_line += f' ({now_status})' lines.append(now_line) @@ -410,7 +519,7 @@ def case_delta(case: CaseDiff) -> Stat: now = case['now'] return recenter( was[0] if was else zero_stat(), - now[0] if now else 0, + now['seconds'] if now else 0, ) @@ -542,7 +651,7 @@ class TestCase: class TestSuite: def __init__(self, name: str) -> None: self.name = name - self.test_cases: List[TestCase] = [] + self.test_cases: Dict[str, TestCase] = dict() self.failed_count = 0 self.skipped_count = 0 self.errored_count = 0 @@ -555,14 +664,14 @@ class TestSuite: return f'TestSuite({rc})' def append(self, test_case: TestCase) -> None: - self.test_cases.append(test_case) + self.test_cases[test_case.name] = test_case self.total_time += test_case.time self.failed_count += 1 if test_case.failed else 0 self.skipped_count += 1 if test_case.skipped else 0 self.errored_count += 1 if test_case.errored else 0 def print_report(self, num_longest: int = 3) -> None: - sorted_tests = sorted(self.test_cases, key=lambda x: x.time) + sorted_tests = sorted(self.test_cases.values(), key=lambda x: x.time) test_count = len(sorted_tests) print(f"class {self.name}:") print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}") @@ -577,25 +686,48 @@ class TestSuite: print("") +class TestFile: + def __init__(self, name: str) -> None: + self.name = name + self.total_time = 0.0 + self.test_suites: Dict[str, TestSuite] = dict() + + def append(self, test_case: TestCase) -> None: + suite_name = test_case.class_name + if suite_name not in self.test_suites: + self.test_suites[suite_name] = TestSuite(suite_name) + if test_case.name in self.test_suites[suite_name].test_cases: + # This behaviour is expected for test_cpp_extensions_aot, distributed/test_distributed_fork, + # and distributed/test_distributed_spawn. In these cases, we just lump the duplicate tests together-- + # which is admittedly inaccurate for test_cpp_extensions_aot, though this is negligible as the test is short. + # For other unexpected cases, we should raise a warning. + if self.name != 'test_cpp_extensions_aot' and \ + self.name != 'distributed/test_distributed_fork' and \ + self.name != 'distributed/test_distributed_spawn' and \ + self.name != 'cpp': # Also allow this cpp one as it run twice in caffe2 ort jobs + raise RuntimeWarning(f'Duplicate test case {test_case.name} in suite {suite_name} called from {self.name}') + self.test_suites[suite_name].append(test_case) + self.total_time += test_case.time + + def parse_report(path: str) -> Iterator[TestCase]: dom = minidom.parse(path) for test_case in dom.getElementsByTagName('testcase'): yield TestCase(test_case) -def parse_reports(folder: str) -> Dict[str, TestSuite]: +def parse_reports(folder: str) -> Dict[str, TestFile]: reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True) - tests_by_class = dict() + tests_by_file = dict() for report in reports: + test_filename = re.sub(r'\.', '/', os.path.basename(os.path.dirname(report))) + if test_filename not in tests_by_file: + tests_by_file[test_filename] = TestFile(test_filename) for test_case in parse_report(report): - class_name = test_case.class_name - if class_name not in tests_by_class: - tests_by_class[class_name] = TestSuite(class_name) - tests_by_class[class_name].append(test_case) - return tests_by_class + tests_by_file[test_filename].append(test_case) + return tests_by_file - -def build_info() -> ReportMeta: +def build_info() -> ReportMetaMeta: return { "build_pr": os.environ.get("CIRCLE_PR_NUMBER", ""), "build_tag": os.environ.get("CIRCLE_TAG", ""), @@ -624,7 +756,7 @@ def build_message(test_case: TestCase) -> Dict[str, Dict[str, Any]]: } -def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None: +def send_report_to_scribe(reports: Dict[str, TestFile]) -> None: access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN") if not access_token: @@ -643,8 +775,9 @@ def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None: "message": json.dumps(build_message(test_case)), "line_escape": False, } - for name in sorted(reports.keys()) - for test_case in reports[name].test_cases + for test_file in reports.values() + for test_suite in test_file.test_suites.values() + for test_case in test_suite.test_cases.values() ] ), }, @@ -653,33 +786,40 @@ def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None: def assemble_s3_object( - reports: Dict[str, TestSuite], + reports: Dict[str, TestFile], *, total_seconds: float, -) -> Report: +) -> Version2Report: return { **build_info(), # type: ignore[misc] 'total_seconds': total_seconds, - 'suites': { + 'format_version': 2, + 'files' : { name: { - 'total_seconds': suite.total_time, - 'cases': [ - { - 'name': case.name, - 'seconds': case.time, - 'errored': case.errored, - 'failed': case.failed, - 'skipped': case.skipped, + 'total_seconds': test_file.total_time, + 'filename': test_file.name, + 'suites': { + name: { + 'total_seconds': suite.total_time, + 'cases': { + name: { + 'seconds': case.time, + 'status': 'skipped' if case.skipped else + 'errored' if case.errored else + 'failed' if case.failed else None + } + for name, case in suite.test_cases.items() + }, } - for case in suite.test_cases - ], + for name, suite in test_file.test_suites.items() + } } - for name, suite in reports.items() + for name, test_file in reports.items() } } -def send_report_to_s3(head_report: Report) -> None: +def send_report_to_s3(head_report: Version2Report) -> None: job = os.environ.get('CIRCLE_JOB') sha1 = os.environ.get('CIRCLE_SHA1') branch = os.environ.get('CIRCLE_BRANCH', '') @@ -773,6 +913,13 @@ def positive_float(value: str) -> float: return parsed +def reports_has_no_tests(reports: Dict[str, TestFile]) -> bool: + for test_file in reports.values(): + for test_suite in test_file.test_suites.values(): + if len(test_suite.test_cases) > 0: + return False + return True + if __name__ == '__main__': import argparse import sys @@ -830,24 +977,25 @@ if __name__ == '__main__': ) args = parser.parse_args() - reports = parse_reports(args.folder) - if len(reports) == 0: - print(f"No test reports found in {args.folder}") + reports_by_file = parse_reports(args.folder) + if reports_has_no_tests(reports_by_file): + print(f"No tests in reports found in {args.folder}") sys.exit(0) - send_report_to_scribe(reports) + send_report_to_scribe(reports_by_file) - longest_tests = [] + # longest_tests can contain duplicatesas the same tests can be spawned from different files + longest_tests : List[TestCase] = [] total_time = 0.0 - for name in sorted(reports.keys()): - test_suite = reports[name] - if test_suite.total_time >= args.class_print_threshold: - test_suite.print_report(args.longest_of_class) - total_time += test_suite.total_time - longest_tests.extend(test_suite.test_cases) + for filename, test_filename in reports_by_file.items(): + for suite_name, test_suite in test_filename.test_suites.items(): + if test_suite.total_time >= args.class_print_threshold: + test_suite.print_report(args.longest_of_class) + total_time += test_suite.total_time + longest_tests.extend(test_suite.test_cases.values()) longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:] - obj = assemble_s3_object(reports, total_seconds=total_time) + obj = assemble_s3_object(reports_by_file, total_seconds=total_time) if args.upload_to_s3: send_report_to_s3(obj)