Store test file in S3 as well for every TestSuite (#52869)

Summary:
We want to store the file names that triggers each test suite so that we can use this data for categorizing those test files.

~~After considering several solutions, this one is the most backwards compatible, and the current test cases in test_testing.py for print test stats don't break.~~

The previous plan did not work, as there are multiple Python test jobs that spawn the same suites. Instead, the new S3 format will store test files (e.g., `test_nn` and `distributed/test_distributed_fork`) which will contain the suites they spawn, which will contain the test cases run within the suite. (Currently, there is no top layer of test files.)

Because of this major structural change, a lot of changes have now been made (thank you samestep!) to test_history.py and print_test_stats.py to make this new format backwards compatible.

Old test plan:
Make sure that the data is as expected in S3 after https://github.com/pytorch/pytorch/pull/52873 finishes.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/52869

Test Plan: Added tests to test_testing.py which pass, and CI.

Reviewed By: samestep

Differential Revision: D26672561

Pulled By: janeyx99

fbshipit-source-id: f46b91e16c1d9de5e0cb9bfa648b6448d979257e
This commit is contained in:
Jane Xu 2021-03-02 07:33:57 -08:00 committed by Facebook GitHub Bot
parent 931100f829
commit 09ce9b5877
5 changed files with 579 additions and 181 deletions

View File

@ -172,19 +172,24 @@ test_libtorch() {
# Start background download
python tools/download_mnist.py --quiet -d test/cpp/api/mnist &
# Make test_reports directory
# NB: the ending test_libtorch must match the current function name for the current
# test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch
mkdir -p $TEST_REPORTS_DIR
# Run JIT cpp tests
mkdir -p test/test-reports/cpp-unittest
python test/cpp/jit/tests_setup.py setup
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
build/bin/test_jit --gtest_output=xml:test/test-reports/cpp-unittest/test_jit.xml
build/bin/test_jit --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml
else
build/bin/test_jit --gtest_filter='-*CUDA' --gtest_output=xml:test/test-reports/cpp-unittest/test_jit.xml
build/bin/test_jit --gtest_filter='-*CUDA' --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml
fi
python test/cpp/jit/tests_setup.py shutdown
# Wait for background download to finish
wait
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:test/test-reports/cpp-unittest/test_api.xml
build/bin/test_tensorexpr --gtest_output=xml:test/test-reports/cpp-unittests/test_tensorexpr.xml
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
build/bin/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
assert_git_not_dirty
fi
}
@ -192,30 +197,39 @@ test_libtorch() {
test_vulkan() {
if [[ "$BUILD_ENVIRONMENT" == *vulkan-linux* ]]; then
export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/build/Linux/vk_swiftshader_icd.json
mkdir -p test/test-reports/cpp-vulkan
build/bin/vulkan_test --gtest_output=xml:test/test-reports/cpp-vulkan/vulkan_test.xml
# NB: the ending test_vulkan must match the current function name for the current
# test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-vulkan/test_vulkan
mkdir -p $TEST_REPORTS_DIR
build/bin/vulkan_test --gtest_output=xml:$TEST_REPORTS_DIR/vulkan_test.xml
fi
}
test_distributed() {
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
echo "Testing distributed C++ tests"
mkdir -p test/test-reports/cpp-distributed
build/bin/FileStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/FileStoreTest.xml
build/bin/HashStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/HashStoreTest.xml
build/bin/TCPStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/TCPStoreTest.xml
# NB: the ending test_distributed must match the current function name for the current
# test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-distributed/test_distributed
mkdir -p $TEST_REPORTS_DIR
build/bin/FileStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/FileStoreTest.xml
build/bin/HashStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/HashStoreTest.xml
build/bin/TCPStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/TCPStoreTest.xml
build/bin/ProcessGroupGlooTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupGlooTest.xml
build/bin/ProcessGroupNCCLTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLTest.xml
build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLErrorsTest.xml
build/bin/ProcessGroupGlooTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupGlooTest.xml
build/bin/ProcessGroupNCCLTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupNCCLTest.xml
build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupNCCLErrorsTest.xml
fi
}
test_rpc() {
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
echo "Testing RPC C++ tests"
mkdir -p test/test-reports/cpp-rpc
build/bin/test_cpp_rpc --gtest_output=xml:test/test-reports/cpp-rpc/test_cpp_rpc.xml
# NB: the ending test_rpc must match the current function name for the current
# test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-rpc/test_rpc
mkdir -p $TEST_REPORTS_DIR
build/bin/test_cpp_rpc --gtest_output=xml:$TEST_REPORTS_DIR/test_cpp_rpc.xml
fi
}

View File

@ -649,6 +649,17 @@ def fakehash(char):
return char * 40
def dummy_meta_meta() -> print_test_stats.ReportMetaMeta:
return {
'build_pr': '',
'build_tag': '',
'build_sha1': '',
'build_branch': '',
'build_job': '',
'build_workflow_id': '',
}
def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
return {
'name': name,
@ -659,7 +670,7 @@ def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
}
def makereport(tests):
def make_report_v1(tests) -> print_test_stats.Version1Report:
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases),
@ -668,59 +679,201 @@ def makereport(tests):
for suite_name, cases in tests.items()
}
return {
**dummy_meta_meta(),
'total_seconds': sum(s['total_seconds'] for s in suites.values()),
'suites': suites,
}
def make_case_v2(seconds, status=None) -> print_test_stats.Version2Case:
return {
'seconds': seconds,
'status': status,
}
def make_report_v2(tests) -> print_test_stats.Version2Report:
files = {}
for file_name, file_suites in tests.items():
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases.values()),
'cases': cases,
}
for suite_name, cases in file_suites.items()
}
files[file_name] = {
'suites': suites,
'total_seconds': sum(suite['total_seconds'] for suite in suites.values()),
}
return {
**dummy_meta_meta(),
'format_version': 2,
'total_seconds': sum(s['total_seconds'] for s in files.values()),
'files': files,
}
class TestPrintTestStats(TestCase):
maxDiff = None
def test_analysis(self):
head_report = makereport({
# input ordering of the suites is ignored
'Grault': [
# not printed: status same and time similar
makecase('test_grault0', 4.78, failed=True),
# status same, but time increased a lot
makecase('test_grault2', 1.473, errored=True),
],
# individual tests times changed, not overall suite
'Qux': [
# input ordering of the test cases is ignored
makecase('test_qux1', 0.001, skipped=True),
makecase('test_qux6', 0.002, skipped=True),
# time in bounds, but status changed
makecase('test_qux4', 7.158, failed=True),
# not printed because it's the same as before
makecase('test_qux7', 0.003, skipped=True),
makecase('test_qux5', 11.968),
makecase('test_qux3', 23.496),
],
# new test suite
'Bar': [
makecase('test_bar2', 3.742, failed=True),
makecase('test_bar1', 50.447),
],
# overall suite time changed but no individual tests
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
# suite doesn't show up if it doesn't change enough
'Foo': [
makecase('test_foo1', 42),
makecase('test_foo2', 56),
],
version1_report: print_test_stats.Version1Report = make_report_v1({
# input ordering of the suites is ignored
'Grault': [
# not printed: status same and time similar
makecase('test_grault0', 4.78, failed=True),
# status same, but time increased a lot
makecase('test_grault2', 1.473, errored=True),
],
# individual tests times changed, not overall suite
'Qux': [
# input ordering of the test cases is ignored
makecase('test_qux1', 0.001, skipped=True),
makecase('test_qux6', 0.002, skipped=True),
# time in bounds, but status changed
makecase('test_qux4', 7.158, failed=True),
# not printed because it's the same as before
makecase('test_qux7', 0.003, skipped=True),
makecase('test_qux5', 11.968),
makecase('test_qux3', 23.496),
],
# new test suite
'Bar': [
makecase('test_bar2', 3.742, failed=True),
makecase('test_bar1', 50.447),
],
# overall suite time changed but no individual tests
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
# suite doesn't show up if it doesn't change enough
'Foo': [
makecase('test_foo1', 42),
makecase('test_foo2', 56),
],
})
version2_report: print_test_stats.Version2Report = make_report_v2(
{
'test_a': {
'Grault': {
'test_grault0': make_case_v2(4.78, 'failed'),
'test_grault2': make_case_v2(1.473, 'errored'),
},
'Qux': {
'test_qux1': make_case_v2(0.001, 'skipped'),
'test_qux6': make_case_v2(0.002, 'skipped'),
'test_qux4': make_case_v2(7.158, 'failed'),
'test_qux7': make_case_v2(0.003, 'skipped'),
'test_qux8': make_case_v2(11.968),
'test_qux3': make_case_v2(23.496),
}
},
'test_b': {
'Bar': {
'test_bar2': make_case_v2(3.742, 'failed'),
'test_bar1': make_case_v2(50.447),
},
# overall suite time changed but no individual tests
'Norf': {
'test_norf1': make_case_v2(3),
'test_norf2': make_case_v2(3),
'test_norf3': make_case_v2(3),
'test_norf4': make_case_v2(3),
},
},
'test_c': {
'Foo': {
'test_foo1': make_case_v2(42),
'test_foo2': make_case_v2(56),
},
}
})
def test_simplify(self):
self.assertEqual(
{
'': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux5': {'seconds': 11.968, 'status': None},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
},
},
},
print_test_stats.simplify(self.version1_report)
)
self.assertEqual(
{
'test_a': {
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
'test_qux8': {'seconds': 11.968, 'status': None},
},
},
'test_b': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
},
'test_c': {
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
},
},
print_test_stats.simplify(self.version2_report),
)
def test_analysis(self):
head_report = self.version1_report
base_reports = {
# bbbb has no reports, so base is cccc instead
fakehash('b'): [],
fakehash('c'): [
makereport({
make_report_v1({
'Baz': [
makecase('test_baz2', 13.605),
# no recent suites have & skip this test
@ -753,7 +906,7 @@ class TestPrintTestStats(TestCase):
}),
],
fakehash('d'): [
makereport({
make_report_v1({
'Foo': [
makecase('test_foo1', 40),
# removed in cccc
@ -783,7 +936,7 @@ class TestPrintTestStats(TestCase):
],
fakehash('e'): [],
fakehash('f'): [
makereport({
make_report_v1({
'Foo': [
makecase('test_foo3', 24),
makecase('test_foo1', 43),
@ -1066,14 +1219,14 @@ Added (across 1 suite) 1 test, totaling + 3.00s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=makereport({
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [
makereport({
make_report_v1({
'Foo': [
makecase('test_foo', 40),
makecase('test_bar', 1),
@ -1081,7 +1234,7 @@ Added (across 1 suite) 1 test, totaling + 3.00s
}),
],
fakehash('c'): [
makereport({
make_report_v1({
'Foo': [
makecase('test_foo', 43),
],
@ -1135,7 +1288,7 @@ Added (across 1 suite) 2 tests, totaling + 3.02s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=makereport({
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),

View File

@ -289,7 +289,7 @@ class TestCheckpoint(TestCase):
out = checkpoint(run_fn2, input_var, input_var2)
out.sum().backward()
class TestDataLoader(TestCase):
class TestDataLoaderUtils(TestCase):
def setUp(self):
self.dataset = torch.randn(5, 3, 3, 2)
self.batch_size = 3

View File

@ -6,11 +6,11 @@ import json
import subprocess
from collections import defaultdict
from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
import boto3 # type: ignore[import]
import botocore # type: ignore[import]
from typing_extensions import TypedDict
from typing_extensions import Literal, TypedDict
def get_git_commit_history(
@ -36,31 +36,70 @@ def get_object_summaries(*, bucket: Any, sha: str) -> Dict[str, List[Any]]:
return dict(by_job)
class Case(TypedDict):
name: str
# TODO: consolidate these typedefs with the identical ones in
# torch/testing/_internal/print_test_stats.py
Commit = str # 40-digit SHA-1 hex string
Status = Optional[Literal['errored', 'failed', 'skipped']]
class CaseMeta(TypedDict):
seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool
failed: bool
skipped: bool
class Suite(TypedDict):
class Version1Suite(TypedDict):
total_seconds: float
cases: List[Case]
cases: List[Version1Case]
class ReportMeta(TypedDict):
class ReportMetaMeta(TypedDict):
build_pr: str
build_tag: str
build_sha1: str
build_sha1: Commit
build_branch: str
build_job: str
build_workflow_id: str
class Report(ReportMeta):
class ReportMeta(ReportMetaMeta):
total_seconds: float
suites: Dict[str, Suite]
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
def get_jsons(
@ -77,32 +116,63 @@ def get_jsons(
}
# TODO: consolidate this with the case_status function from
# torch/testing/_internal/print_test_stats.py
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return cast(Status, k)
return None
# TODO: consolidate this with the newify_case function from
# torch/testing/_internal/print_test_stats.py
def newify_case(case: Version1Case) -> Version2Case:
return {
'seconds': case['seconds'],
'status': case_status(case),
}
# TODO: consolidate this with the simplify function from
# torch/testing/_internal/print_test_stats.py
def get_cases(
*,
data: Report,
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
) -> List[Case]:
cases = []
suites = data['suites']
for name, suite in suites.items():
if name == suite_name or not suite_name:
for case in suite['cases']:
if case['name'] == test_name:
cases.append(case)
) -> List[Version2Case]:
cases: List[Version2Case] = []
if 'format_version' not in data: # version 1 implicitly
v1report = cast(Version1Report, data)
suites = v1report['suites']
for sname, v1suite in suites.items():
if sname == suite_name or not suite_name:
for v1case in v1suite['cases']:
if v1case['name'] == test_name:
cases.append(newify_case(v1case))
else:
v_report = cast(VersionedReport, data)
version = v_report['format_version']
if version == 2:
v2report = cast(Version2Report, v_report)
for fname, v2file in v2report['files'].items():
if fname == filename or not filename:
for sname, v2suite in v2file['suites'].items():
if sname == suite_name or not suite_name:
v2case = v2suite['cases'].get(test_name)
if v2case:
cases.append(v2case)
else:
raise RuntimeError(f'Unknown format version: {version}')
return cases
def case_status(case: Case) -> Optional[str]:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return k
return None
def make_column(
*,
data: Optional[Report],
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
digits: int,
@ -112,12 +182,13 @@ def make_column(
if data:
cases = get_cases(
data=data,
filename=filename,
suite_name=suite_name,
test_name=test_name
)
if cases:
case = cases[0]
status = case_status(case)
status = case['status']
omitted = len(cases) - 1
if status:
return f'{status.rjust(num_length)} ', omitted
@ -134,6 +205,7 @@ def make_columns(
jobs: List[str],
jsons: Dict[str, Report],
omitted: Dict[str, int],
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
digits: int,
@ -145,6 +217,7 @@ def make_columns(
data = jsons.get(job)
column, omitted_suites = make_column(
data=data,
filename=filename,
suite_name=suite_name,
test_name=test_name,
digits=digits,
@ -165,6 +238,7 @@ def make_lines(
jobs: Set[str],
jsons: Dict[str, Report],
omitted: Dict[str, int],
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
) -> List[str]:
@ -172,12 +246,13 @@ def make_lines(
for job, data in jsons.items():
cases = get_cases(
data=data,
filename=filename,
suite_name=suite_name,
test_name=test_name,
)
if cases:
case = cases[0]
status = case_status(case)
status = case['status']
line = f'{job} {case["seconds"]}s{f" {status}" if status else ""}'
if job in omitted and omitted[job] > 0:
line += f' ({omitted[job]} S3 reports omitted)'
@ -197,6 +272,7 @@ def display_history(
bucket: Any,
commits: List[Tuple[str, datetime]],
jobs: Optional[List[str]],
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
delta: int,
@ -226,6 +302,7 @@ def display_history(
jobs=jobs,
jsons=jsons,
omitted=omitted,
filename=filename,
suite_name=suite_name,
test_name=test_name,
digits=digits,
@ -236,6 +313,7 @@ def display_history(
jobs=set(jobs or []),
jsons=jsons,
omitted=omitted,
filename=filename,
suite_name=suite_name,
test_name=test_name,
)
@ -352,6 +430,10 @@ indicated test was not found in that report.
action='store_true',
help='(multiline) ignore listed jobs, show all jobs for each commit',
)
parser.add_argument(
'--file',
help='name of the file containing the test',
)
parser.add_argument(
'--suite',
help='name of the suite containing the test',
@ -381,6 +463,7 @@ indicated test was not found in that report.
bucket=bucket,
commits=commits,
jobs=jobs,
filename=args.file,
suite_name=args.suite,
test_name=args.test,
delta=args.delta,

View File

@ -5,6 +5,7 @@ import datetime
import json
import math
import os
import re
import statistics
import subprocess
import time
@ -12,11 +13,11 @@ from collections import defaultdict
from glob import glob
from pathlib import Path
from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional,
Tuple)
Set, Tuple, Union, cast)
from xml.dom import minidom # type: ignore[import]
import requests
from typing_extensions import TypedDict
from typing_extensions import Literal, TypedDict
try:
import boto3 # type: ignore[import]
@ -24,29 +25,30 @@ try:
except ImportError:
HAVE_BOTO3 = False
# TODO: consolidate these typedefs with the identical ones in
# tools/test_history.py
Commit = str # 40-digit SHA-1 hex string
Status = Optional[str] # errored, failed, skipped, or None
# represent suite as dict because indexing is useful
SimplerCase = Tuple[float, Status]
SimplerSuite = Dict[str, SimplerCase]
SimplerReport = Dict[str, SimplerSuite]
Status = Optional[Literal['errored', 'failed', 'skipped']]
class Case(TypedDict):
name: str
class CaseMeta(TypedDict):
seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool
failed: bool
skipped: bool
class Suite(TypedDict):
class Version1Suite(TypedDict):
total_seconds: float
cases: List[Case]
cases: List[Version1Case]
class ReportMeta(TypedDict, total=False):
class ReportMetaMeta(TypedDict):
build_pr: str
build_tag: str
build_sha1: Commit
@ -55,9 +57,42 @@ class ReportMeta(TypedDict, total=False):
build_workflow_id: str
class Report(ReportMeta):
class ReportMeta(ReportMetaMeta):
total_seconds: float
suites: Dict[str, Suite]
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
SimplerSuite = Dict[str, Version2Case]
SimplerFile = Dict[str, SimplerSuite]
SimplerReport = Dict[str, SimplerFile]
class Stat(TypedDict):
@ -69,7 +104,7 @@ class CaseDiff(TypedDict):
margin: str
name: str
was: Optional[Tuple[Stat, Status]]
now: Optional[SimplerCase]
now: Optional[Version2Case]
class SuiteDiff(TypedDict):
@ -80,23 +115,78 @@ class SuiteDiff(TypedDict):
cases: List[CaseDiff]
def case_status(case: Case) -> Status:
# TODO: consolidate this with the case_status function from
# tools/test_history.py
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return k
return cast(Status, k)
return None
def simplify(report: Report) -> SimplerReport:
# TODO: consolidate this with the newify_case function from
# tools/test_history.py
def newify_case(case: Version1Case) -> Version2Case:
return {
suite_name: {
case['name']: (case['seconds'], case_status(case))
for case in suite['cases']
}
for suite_name, suite in report['suites'].items()
'seconds': case['seconds'],
'status': case_status(case),
}
# TODO: consolidate this with the get_cases function from
# tools/test_history.py
# Here we translate to a three-layer format (file -> suite -> case)
# rather than a two-layer format (suite -> case) because as mentioned in
# a comment in the body of this function, if we consolidate suites that
# share a name, there will be test case name collisions, and once we
# have those, there's no clean way to deal with it in the diffing logic.
# It's not great to have to add a dummy empty string for the filename
# for version 1 reports, but it's better than either losing cases that
# share a name (for version 2 reports) or using a list of cases rather
# than a dict.
def simplify(report: Report) -> SimplerReport:
if 'format_version' not in report: # version 1 implicitly
v1report = cast(Version1Report, report)
return {
# we just don't have test filename information sadly, so we
# just make one fake filename that is the empty string
'': {
suite_name: {
# This clobbers some cases that have duplicate names
# because in version 1, we would merge together all
# the suites with a given name (even if they came
# from different files), so there were actually
# situations in which two cases in the same suite
# shared a name (because they actually originally
# came from two suites that were then merged). It
# would probably be better to warn about the cases
# that we're silently discarding here, but since
# we're only uploading in the new format (where
# everything is also keyed by filename) going
# forward, it shouldn't matter too much.
case['name']: newify_case(case)
for case in suite['cases']
}
for suite_name, suite in v1report['suites'].items()
}
}
else:
v_report = cast(VersionedReport, report)
version = v_report['format_version']
if version == 2:
v2report = cast(Version2Report, v_report)
return {
filename: {
suite_name: suite['cases']
for suite_name, suite in file_data['suites'].items()
}
for filename, file_data in v2report['files'].items()
}
else:
raise RuntimeError(f'Unknown format version: {version}')
def plural(n: int) -> str:
return '' if n == 1 else 's'
@ -165,7 +255,9 @@ def unlines(lines: List[str]) -> str:
def matching_test_times(
*,
base_reports: Dict[Commit, List[SimplerReport]],
filename: str,
suite_name: str,
case_name: str,
status: Status,
@ -173,13 +265,16 @@ def matching_test_times(
times: List[float] = []
for reports in base_reports.values():
for report in reports:
suite = report.get(suite_name)
if suite:
case = suite.get(case_name)
if case:
t, s = case
if s == status:
times.append(t)
file_data = report.get(filename)
if file_data:
suite = file_data.get(suite_name)
if suite:
case = suite.get(case_name)
if case:
t = case['seconds']
s = case['status']
if s == status:
times.append(t)
return times
@ -195,30 +290,43 @@ def analyze(
# find all relevant suites (those in either base or head or both)
all_reports = [head_report] + base_report
all_suites = {k for r in all_reports for k in r.keys()}
all_suites: Set[Tuple[str, str]] = {
(filename, suite_name)
for r in all_reports
for filename, file_data in r.items()
for suite_name in file_data.keys()
}
removed_suites: List[SuiteDiff] = []
modified_suites: List[SuiteDiff] = []
added_suites: List[SuiteDiff] = []
for suite_name in sorted(all_suites):
for filename, suite_name in sorted(all_suites):
case_diffs: List[CaseDiff] = []
head_suite = head_report.get(suite_name)
head_suite = head_report.get(filename, {}).get(suite_name)
base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[
{(n, s) for n, (_, s) in report.get(suite_name, {}).items()}
{
(n, case['status'])
for n, case
in report.get(filename, {}).get(suite_name, {}).items()
}
for report in base_report
] or [set()])))
case_stats: Dict[str, Stat] = {}
if head_suite:
now = sum(case[0] for case in head_suite.values())
if any(suite_name in report for report in base_report):
now = sum(case['seconds'] for case in head_suite.values())
if any(
filename in report and suite_name in report[filename]
for report in base_report
):
removed_cases: List[CaseDiff] = []
for case_name, case_status in base_cases.items():
case_stats[case_name] = list_stat(matching_test_times(
base_reports,
suite_name,
case_name,
case_status,
base_reports=base_reports,
filename=filename,
suite_name=suite_name,
case_name=case_name,
status=case_status,
))
if case_name not in head_suite:
removed_cases.append({
@ -234,7 +342,7 @@ def analyze(
if head_case_name in base_cases:
stat = case_stats[head_case_name]
base_status = base_cases[head_case_name]
if head_case[1] != base_status:
if head_case['status'] != base_status:
modified_cases.append({
'margin': '!',
'name': head_case_name,
@ -278,10 +386,11 @@ def analyze(
else:
for case_name, case_status in base_cases.items():
case_stats[case_name] = list_stat(matching_test_times(
base_reports,
suite_name,
case_name,
case_status,
base_reports=base_reports,
filename=filename,
suite_name=suite_name,
case_name=case_name,
status=case_status,
))
case_diffs.append({
'margin': ' ',
@ -316,9 +425,9 @@ def case_diff_lines(diff: CaseDiff) -> List[str]:
now = diff['now']
if now:
now_stat: Stat = {'center': now[0], 'spread': None}
now_stat: Stat = {'center': now['seconds'], 'spread': None}
now_line = f' # now {display_stat(now_stat, case_fmt)}'
now_status = now[1]
now_status = now['status']
if now_status:
now_line += f' ({now_status})'
lines.append(now_line)
@ -410,7 +519,7 @@ def case_delta(case: CaseDiff) -> Stat:
now = case['now']
return recenter(
was[0] if was else zero_stat(),
now[0] if now else 0,
now['seconds'] if now else 0,
)
@ -542,7 +651,7 @@ class TestCase:
class TestSuite:
def __init__(self, name: str) -> None:
self.name = name
self.test_cases: List[TestCase] = []
self.test_cases: Dict[str, TestCase] = dict()
self.failed_count = 0
self.skipped_count = 0
self.errored_count = 0
@ -555,14 +664,14 @@ class TestSuite:
return f'TestSuite({rc})'
def append(self, test_case: TestCase) -> None:
self.test_cases.append(test_case)
self.test_cases[test_case.name] = test_case
self.total_time += test_case.time
self.failed_count += 1 if test_case.failed else 0
self.skipped_count += 1 if test_case.skipped else 0
self.errored_count += 1 if test_case.errored else 0
def print_report(self, num_longest: int = 3) -> None:
sorted_tests = sorted(self.test_cases, key=lambda x: x.time)
sorted_tests = sorted(self.test_cases.values(), key=lambda x: x.time)
test_count = len(sorted_tests)
print(f"class {self.name}:")
print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
@ -577,25 +686,48 @@ class TestSuite:
print("")
class TestFile:
def __init__(self, name: str) -> None:
self.name = name
self.total_time = 0.0
self.test_suites: Dict[str, TestSuite] = dict()
def append(self, test_case: TestCase) -> None:
suite_name = test_case.class_name
if suite_name not in self.test_suites:
self.test_suites[suite_name] = TestSuite(suite_name)
if test_case.name in self.test_suites[suite_name].test_cases:
# This behaviour is expected for test_cpp_extensions_aot, distributed/test_distributed_fork,
# and distributed/test_distributed_spawn. In these cases, we just lump the duplicate tests together--
# which is admittedly inaccurate for test_cpp_extensions_aot, though this is negligible as the test is short.
# For other unexpected cases, we should raise a warning.
if self.name != 'test_cpp_extensions_aot' and \
self.name != 'distributed/test_distributed_fork' and \
self.name != 'distributed/test_distributed_spawn' and \
self.name != 'cpp': # Also allow this cpp one as it run twice in caffe2 ort jobs
raise RuntimeWarning(f'Duplicate test case {test_case.name} in suite {suite_name} called from {self.name}')
self.test_suites[suite_name].append(test_case)
self.total_time += test_case.time
def parse_report(path: str) -> Iterator[TestCase]:
dom = minidom.parse(path)
for test_case in dom.getElementsByTagName('testcase'):
yield TestCase(test_case)
def parse_reports(folder: str) -> Dict[str, TestSuite]:
def parse_reports(folder: str) -> Dict[str, TestFile]:
reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
tests_by_class = dict()
tests_by_file = dict()
for report in reports:
test_filename = re.sub(r'\.', '/', os.path.basename(os.path.dirname(report)))
if test_filename not in tests_by_file:
tests_by_file[test_filename] = TestFile(test_filename)
for test_case in parse_report(report):
class_name = test_case.class_name
if class_name not in tests_by_class:
tests_by_class[class_name] = TestSuite(class_name)
tests_by_class[class_name].append(test_case)
return tests_by_class
tests_by_file[test_filename].append(test_case)
return tests_by_file
def build_info() -> ReportMeta:
def build_info() -> ReportMetaMeta:
return {
"build_pr": os.environ.get("CIRCLE_PR_NUMBER", ""),
"build_tag": os.environ.get("CIRCLE_TAG", ""),
@ -624,7 +756,7 @@ def build_message(test_case: TestCase) -> Dict[str, Dict[str, Any]]:
}
def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None:
def send_report_to_scribe(reports: Dict[str, TestFile]) -> None:
access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
if not access_token:
@ -643,8 +775,9 @@ def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None:
"message": json.dumps(build_message(test_case)),
"line_escape": False,
}
for name in sorted(reports.keys())
for test_case in reports[name].test_cases
for test_file in reports.values()
for test_suite in test_file.test_suites.values()
for test_case in test_suite.test_cases.values()
]
),
},
@ -653,33 +786,40 @@ def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None:
def assemble_s3_object(
reports: Dict[str, TestSuite],
reports: Dict[str, TestFile],
*,
total_seconds: float,
) -> Report:
) -> Version2Report:
return {
**build_info(), # type: ignore[misc]
'total_seconds': total_seconds,
'suites': {
'format_version': 2,
'files' : {
name: {
'total_seconds': suite.total_time,
'cases': [
{
'name': case.name,
'seconds': case.time,
'errored': case.errored,
'failed': case.failed,
'skipped': case.skipped,
'total_seconds': test_file.total_time,
'filename': test_file.name,
'suites': {
name: {
'total_seconds': suite.total_time,
'cases': {
name: {
'seconds': case.time,
'status': 'skipped' if case.skipped else
'errored' if case.errored else
'failed' if case.failed else None
}
for name, case in suite.test_cases.items()
},
}
for case in suite.test_cases
],
for name, suite in test_file.test_suites.items()
}
}
for name, suite in reports.items()
for name, test_file in reports.items()
}
}
def send_report_to_s3(head_report: Report) -> None:
def send_report_to_s3(head_report: Version2Report) -> None:
job = os.environ.get('CIRCLE_JOB')
sha1 = os.environ.get('CIRCLE_SHA1')
branch = os.environ.get('CIRCLE_BRANCH', '')
@ -773,6 +913,13 @@ def positive_float(value: str) -> float:
return parsed
def reports_has_no_tests(reports: Dict[str, TestFile]) -> bool:
for test_file in reports.values():
for test_suite in test_file.test_suites.values():
if len(test_suite.test_cases) > 0:
return False
return True
if __name__ == '__main__':
import argparse
import sys
@ -830,24 +977,25 @@ if __name__ == '__main__':
)
args = parser.parse_args()
reports = parse_reports(args.folder)
if len(reports) == 0:
print(f"No test reports found in {args.folder}")
reports_by_file = parse_reports(args.folder)
if reports_has_no_tests(reports_by_file):
print(f"No tests in reports found in {args.folder}")
sys.exit(0)
send_report_to_scribe(reports)
send_report_to_scribe(reports_by_file)
longest_tests = []
# longest_tests can contain duplicatesas the same tests can be spawned from different files
longest_tests : List[TestCase] = []
total_time = 0.0
for name in sorted(reports.keys()):
test_suite = reports[name]
if test_suite.total_time >= args.class_print_threshold:
test_suite.print_report(args.longest_of_class)
total_time += test_suite.total_time
longest_tests.extend(test_suite.test_cases)
for filename, test_filename in reports_by_file.items():
for suite_name, test_suite in test_filename.test_suites.items():
if test_suite.total_time >= args.class_print_threshold:
test_suite.print_report(args.longest_of_class)
total_time += test_suite.total_time
longest_tests.extend(test_suite.test_cases.values())
longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
obj = assemble_s3_object(reports, total_seconds=total_time)
obj = assemble_s3_object(reports_by_file, total_seconds=total_time)
if args.upload_to_s3:
send_report_to_s3(obj)