Store test file in S3 as well for every TestSuite (#52869)

Summary:
We want to store the file names that triggers each test suite so that we can use this data for categorizing those test files.

~~After considering several solutions, this one is the most backwards compatible, and the current test cases in test_testing.py for print test stats don't break.~~

The previous plan did not work, as there are multiple Python test jobs that spawn the same suites. Instead, the new S3 format will store test files (e.g., `test_nn` and `distributed/test_distributed_fork`) which will contain the suites they spawn, which will contain the test cases run within the suite. (Currently, there is no top layer of test files.)

Because of this major structural change, a lot of changes have now been made (thank you samestep!) to test_history.py and print_test_stats.py to make this new format backwards compatible.

Old test plan:
Make sure that the data is as expected in S3 after https://github.com/pytorch/pytorch/pull/52873 finishes.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/52869

Test Plan: Added tests to test_testing.py which pass, and CI.

Reviewed By: samestep

Differential Revision: D26672561

Pulled By: janeyx99

fbshipit-source-id: f46b91e16c1d9de5e0cb9bfa648b6448d979257e
This commit is contained in:
Jane Xu 2021-03-02 07:33:57 -08:00 committed by Facebook GitHub Bot
parent 931100f829
commit 09ce9b5877
5 changed files with 579 additions and 181 deletions

View File

@ -172,19 +172,24 @@ test_libtorch() {
# Start background download # Start background download
python tools/download_mnist.py --quiet -d test/cpp/api/mnist & python tools/download_mnist.py --quiet -d test/cpp/api/mnist &
# Make test_reports directory
# NB: the ending test_libtorch must match the current function name for the current
# test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch
mkdir -p $TEST_REPORTS_DIR
# Run JIT cpp tests # Run JIT cpp tests
mkdir -p test/test-reports/cpp-unittest
python test/cpp/jit/tests_setup.py setup python test/cpp/jit/tests_setup.py setup
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
build/bin/test_jit --gtest_output=xml:test/test-reports/cpp-unittest/test_jit.xml build/bin/test_jit --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml
else else
build/bin/test_jit --gtest_filter='-*CUDA' --gtest_output=xml:test/test-reports/cpp-unittest/test_jit.xml build/bin/test_jit --gtest_filter='-*CUDA' --gtest_output=xml:$TEST_REPORTS_DIR/test_jit.xml
fi fi
python test/cpp/jit/tests_setup.py shutdown python test/cpp/jit/tests_setup.py shutdown
# Wait for background download to finish # Wait for background download to finish
wait wait
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:test/test-reports/cpp-unittest/test_api.xml OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
build/bin/test_tensorexpr --gtest_output=xml:test/test-reports/cpp-unittests/test_tensorexpr.xml build/bin/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
assert_git_not_dirty assert_git_not_dirty
fi fi
} }
@ -192,30 +197,39 @@ test_libtorch() {
test_vulkan() { test_vulkan() {
if [[ "$BUILD_ENVIRONMENT" == *vulkan-linux* ]]; then if [[ "$BUILD_ENVIRONMENT" == *vulkan-linux* ]]; then
export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/build/Linux/vk_swiftshader_icd.json export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/build/Linux/vk_swiftshader_icd.json
mkdir -p test/test-reports/cpp-vulkan # NB: the ending test_vulkan must match the current function name for the current
build/bin/vulkan_test --gtest_output=xml:test/test-reports/cpp-vulkan/vulkan_test.xml # test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-vulkan/test_vulkan
mkdir -p $TEST_REPORTS_DIR
build/bin/vulkan_test --gtest_output=xml:$TEST_REPORTS_DIR/vulkan_test.xml
fi fi
} }
test_distributed() { test_distributed() {
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
echo "Testing distributed C++ tests" echo "Testing distributed C++ tests"
mkdir -p test/test-reports/cpp-distributed # NB: the ending test_distributed must match the current function name for the current
build/bin/FileStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/FileStoreTest.xml # test reporting process (in print_test_stats.py) to function as expected.
build/bin/HashStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/HashStoreTest.xml TEST_REPORTS_DIR=test/test-reports/cpp-distributed/test_distributed
build/bin/TCPStoreTest --gtest_output=xml:test/test-reports/cpp-distributed/TCPStoreTest.xml mkdir -p $TEST_REPORTS_DIR
build/bin/FileStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/FileStoreTest.xml
build/bin/HashStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/HashStoreTest.xml
build/bin/TCPStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/TCPStoreTest.xml
build/bin/ProcessGroupGlooTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupGlooTest.xml build/bin/ProcessGroupGlooTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupGlooTest.xml
build/bin/ProcessGroupNCCLTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLTest.xml build/bin/ProcessGroupNCCLTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupNCCLTest.xml
build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLErrorsTest.xml build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:$TEST_REPORTS_DIR/ProcessGroupNCCLErrorsTest.xml
fi fi
} }
test_rpc() { test_rpc() {
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
echo "Testing RPC C++ tests" echo "Testing RPC C++ tests"
mkdir -p test/test-reports/cpp-rpc # NB: the ending test_rpc must match the current function name for the current
build/bin/test_cpp_rpc --gtest_output=xml:test/test-reports/cpp-rpc/test_cpp_rpc.xml # test reporting process (in print_test_stats.py) to function as expected.
TEST_REPORTS_DIR=test/test-reports/cpp-rpc/test_rpc
mkdir -p $TEST_REPORTS_DIR
build/bin/test_cpp_rpc --gtest_output=xml:$TEST_REPORTS_DIR/test_cpp_rpc.xml
fi fi
} }

View File

@ -649,6 +649,17 @@ def fakehash(char):
return char * 40 return char * 40
def dummy_meta_meta() -> print_test_stats.ReportMetaMeta:
return {
'build_pr': '',
'build_tag': '',
'build_sha1': '',
'build_branch': '',
'build_job': '',
'build_workflow_id': '',
}
def makecase(name, seconds, *, errored=False, failed=False, skipped=False): def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
return { return {
'name': name, 'name': name,
@ -659,7 +670,7 @@ def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
} }
def makereport(tests): def make_report_v1(tests) -> print_test_stats.Version1Report:
suites = { suites = {
suite_name: { suite_name: {
'total_seconds': sum(case['seconds'] for case in cases), 'total_seconds': sum(case['seconds'] for case in cases),
@ -668,16 +679,45 @@ def makereport(tests):
for suite_name, cases in tests.items() for suite_name, cases in tests.items()
} }
return { return {
**dummy_meta_meta(),
'total_seconds': sum(s['total_seconds'] for s in suites.values()), 'total_seconds': sum(s['total_seconds'] for s in suites.values()),
'suites': suites, 'suites': suites,
} }
def make_case_v2(seconds, status=None) -> print_test_stats.Version2Case:
return {
'seconds': seconds,
'status': status,
}
def make_report_v2(tests) -> print_test_stats.Version2Report:
files = {}
for file_name, file_suites in tests.items():
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases.values()),
'cases': cases,
}
for suite_name, cases in file_suites.items()
}
files[file_name] = {
'suites': suites,
'total_seconds': sum(suite['total_seconds'] for suite in suites.values()),
}
return {
**dummy_meta_meta(),
'format_version': 2,
'total_seconds': sum(s['total_seconds'] for s in files.values()),
'files': files,
}
class TestPrintTestStats(TestCase): class TestPrintTestStats(TestCase):
maxDiff = None maxDiff = None
def test_analysis(self): version1_report: print_test_stats.Version1Report = make_report_v1({
head_report = makereport({
# input ordering of the suites is ignored # input ordering of the suites is ignored
'Grault': [ 'Grault': [
# not printed: status same and time similar # not printed: status same and time similar
@ -716,11 +756,124 @@ class TestPrintTestStats(TestCase):
], ],
}) })
version2_report: print_test_stats.Version2Report = make_report_v2(
{
'test_a': {
'Grault': {
'test_grault0': make_case_v2(4.78, 'failed'),
'test_grault2': make_case_v2(1.473, 'errored'),
},
'Qux': {
'test_qux1': make_case_v2(0.001, 'skipped'),
'test_qux6': make_case_v2(0.002, 'skipped'),
'test_qux4': make_case_v2(7.158, 'failed'),
'test_qux7': make_case_v2(0.003, 'skipped'),
'test_qux8': make_case_v2(11.968),
'test_qux3': make_case_v2(23.496),
}
},
'test_b': {
'Bar': {
'test_bar2': make_case_v2(3.742, 'failed'),
'test_bar1': make_case_v2(50.447),
},
# overall suite time changed but no individual tests
'Norf': {
'test_norf1': make_case_v2(3),
'test_norf2': make_case_v2(3),
'test_norf3': make_case_v2(3),
'test_norf4': make_case_v2(3),
},
},
'test_c': {
'Foo': {
'test_foo1': make_case_v2(42),
'test_foo2': make_case_v2(56),
},
}
})
def test_simplify(self):
self.assertEqual(
{
'': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux5': {'seconds': 11.968, 'status': None},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
},
},
},
print_test_stats.simplify(self.version1_report)
)
self.assertEqual(
{
'test_a': {
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
'test_qux8': {'seconds': 11.968, 'status': None},
},
},
'test_b': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
},
'test_c': {
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
},
},
print_test_stats.simplify(self.version2_report),
)
def test_analysis(self):
head_report = self.version1_report
base_reports = { base_reports = {
# bbbb has no reports, so base is cccc instead # bbbb has no reports, so base is cccc instead
fakehash('b'): [], fakehash('b'): [],
fakehash('c'): [ fakehash('c'): [
makereport({ make_report_v1({
'Baz': [ 'Baz': [
makecase('test_baz2', 13.605), makecase('test_baz2', 13.605),
# no recent suites have & skip this test # no recent suites have & skip this test
@ -753,7 +906,7 @@ class TestPrintTestStats(TestCase):
}), }),
], ],
fakehash('d'): [ fakehash('d'): [
makereport({ make_report_v1({
'Foo': [ 'Foo': [
makecase('test_foo1', 40), makecase('test_foo1', 40),
# removed in cccc # removed in cccc
@ -783,7 +936,7 @@ class TestPrintTestStats(TestCase):
], ],
fakehash('e'): [], fakehash('e'): [],
fakehash('f'): [ fakehash('f'): [
makereport({ make_report_v1({
'Foo': [ 'Foo': [
makecase('test_foo3', 24), makecase('test_foo3', 24),
makecase('test_foo1', 43), makecase('test_foo1', 43),
@ -1066,14 +1219,14 @@ Added (across 1 suite) 1 test, totaling + 3.00s
''', ''',
print_test_stats.regression_info( print_test_stats.regression_info(
head_sha=fakehash('a'), head_sha=fakehash('a'),
head_report=makereport({ head_report=make_report_v1({
'Foo': [ 'Foo': [
makecase('test_foo', 0.02, skipped=True), makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3), makecase('test_baz', 3),
]}), ]}),
base_reports={ base_reports={
fakehash('b'): [ fakehash('b'): [
makereport({ make_report_v1({
'Foo': [ 'Foo': [
makecase('test_foo', 40), makecase('test_foo', 40),
makecase('test_bar', 1), makecase('test_bar', 1),
@ -1081,7 +1234,7 @@ Added (across 1 suite) 1 test, totaling + 3.00s
}), }),
], ],
fakehash('c'): [ fakehash('c'): [
makereport({ make_report_v1({
'Foo': [ 'Foo': [
makecase('test_foo', 43), makecase('test_foo', 43),
], ],
@ -1135,7 +1288,7 @@ Added (across 1 suite) 2 tests, totaling + 3.02s
''', ''',
print_test_stats.regression_info( print_test_stats.regression_info(
head_sha=fakehash('a'), head_sha=fakehash('a'),
head_report=makereport({ head_report=make_report_v1({
'Foo': [ 'Foo': [
makecase('test_foo', 0.02, skipped=True), makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3), makecase('test_baz', 3),

View File

@ -289,7 +289,7 @@ class TestCheckpoint(TestCase):
out = checkpoint(run_fn2, input_var, input_var2) out = checkpoint(run_fn2, input_var, input_var2)
out.sum().backward() out.sum().backward()
class TestDataLoader(TestCase): class TestDataLoaderUtils(TestCase):
def setUp(self): def setUp(self):
self.dataset = torch.randn(5, 3, 3, 2) self.dataset = torch.randn(5, 3, 3, 2)
self.batch_size = 3 self.batch_size = 3

View File

@ -6,11 +6,11 @@ import json
import subprocess import subprocess
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
import boto3 # type: ignore[import] import boto3 # type: ignore[import]
import botocore # type: ignore[import] import botocore # type: ignore[import]
from typing_extensions import TypedDict from typing_extensions import Literal, TypedDict
def get_git_commit_history( def get_git_commit_history(
@ -36,31 +36,70 @@ def get_object_summaries(*, bucket: Any, sha: str) -> Dict[str, List[Any]]:
return dict(by_job) return dict(by_job)
class Case(TypedDict): # TODO: consolidate these typedefs with the identical ones in
name: str # torch/testing/_internal/print_test_stats.py
Commit = str # 40-digit SHA-1 hex string
Status = Optional[Literal['errored', 'failed', 'skipped']]
class CaseMeta(TypedDict):
seconds: float seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool errored: bool
failed: bool failed: bool
skipped: bool skipped: bool
class Suite(TypedDict): class Version1Suite(TypedDict):
total_seconds: float total_seconds: float
cases: List[Case] cases: List[Version1Case]
class ReportMeta(TypedDict): class ReportMetaMeta(TypedDict):
build_pr: str build_pr: str
build_tag: str build_tag: str
build_sha1: str build_sha1: Commit
build_branch: str build_branch: str
build_job: str build_job: str
build_workflow_id: str build_workflow_id: str
class Report(ReportMeta): class ReportMeta(ReportMetaMeta):
total_seconds: float total_seconds: float
suites: Dict[str, Suite]
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
def get_jsons( def get_jsons(
@ -77,32 +116,63 @@ def get_jsons(
} }
# TODO: consolidate this with the case_status function from
# torch/testing/_internal/print_test_stats.py
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return cast(Status, k)
return None
# TODO: consolidate this with the newify_case function from
# torch/testing/_internal/print_test_stats.py
def newify_case(case: Version1Case) -> Version2Case:
return {
'seconds': case['seconds'],
'status': case_status(case),
}
# TODO: consolidate this with the simplify function from
# torch/testing/_internal/print_test_stats.py
def get_cases( def get_cases(
*, *,
data: Report, data: Report,
filename: Optional[str],
suite_name: Optional[str], suite_name: Optional[str],
test_name: str, test_name: str,
) -> List[Case]: ) -> List[Version2Case]:
cases = [] cases: List[Version2Case] = []
suites = data['suites'] if 'format_version' not in data: # version 1 implicitly
for name, suite in suites.items(): v1report = cast(Version1Report, data)
if name == suite_name or not suite_name: suites = v1report['suites']
for case in suite['cases']: for sname, v1suite in suites.items():
if case['name'] == test_name: if sname == suite_name or not suite_name:
cases.append(case) for v1case in v1suite['cases']:
if v1case['name'] == test_name:
cases.append(newify_case(v1case))
else:
v_report = cast(VersionedReport, data)
version = v_report['format_version']
if version == 2:
v2report = cast(Version2Report, v_report)
for fname, v2file in v2report['files'].items():
if fname == filename or not filename:
for sname, v2suite in v2file['suites'].items():
if sname == suite_name or not suite_name:
v2case = v2suite['cases'].get(test_name)
if v2case:
cases.append(v2case)
else:
raise RuntimeError(f'Unknown format version: {version}')
return cases return cases
def case_status(case: Case) -> Optional[str]:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return k
return None
def make_column( def make_column(
*, *,
data: Optional[Report], data: Optional[Report],
filename: Optional[str],
suite_name: Optional[str], suite_name: Optional[str],
test_name: str, test_name: str,
digits: int, digits: int,
@ -112,12 +182,13 @@ def make_column(
if data: if data:
cases = get_cases( cases = get_cases(
data=data, data=data,
filename=filename,
suite_name=suite_name, suite_name=suite_name,
test_name=test_name test_name=test_name
) )
if cases: if cases:
case = cases[0] case = cases[0]
status = case_status(case) status = case['status']
omitted = len(cases) - 1 omitted = len(cases) - 1
if status: if status:
return f'{status.rjust(num_length)} ', omitted return f'{status.rjust(num_length)} ', omitted
@ -134,6 +205,7 @@ def make_columns(
jobs: List[str], jobs: List[str],
jsons: Dict[str, Report], jsons: Dict[str, Report],
omitted: Dict[str, int], omitted: Dict[str, int],
filename: Optional[str],
suite_name: Optional[str], suite_name: Optional[str],
test_name: str, test_name: str,
digits: int, digits: int,
@ -145,6 +217,7 @@ def make_columns(
data = jsons.get(job) data = jsons.get(job)
column, omitted_suites = make_column( column, omitted_suites = make_column(
data=data, data=data,
filename=filename,
suite_name=suite_name, suite_name=suite_name,
test_name=test_name, test_name=test_name,
digits=digits, digits=digits,
@ -165,6 +238,7 @@ def make_lines(
jobs: Set[str], jobs: Set[str],
jsons: Dict[str, Report], jsons: Dict[str, Report],
omitted: Dict[str, int], omitted: Dict[str, int],
filename: Optional[str],
suite_name: Optional[str], suite_name: Optional[str],
test_name: str, test_name: str,
) -> List[str]: ) -> List[str]:
@ -172,12 +246,13 @@ def make_lines(
for job, data in jsons.items(): for job, data in jsons.items():
cases = get_cases( cases = get_cases(
data=data, data=data,
filename=filename,
suite_name=suite_name, suite_name=suite_name,
test_name=test_name, test_name=test_name,
) )
if cases: if cases:
case = cases[0] case = cases[0]
status = case_status(case) status = case['status']
line = f'{job} {case["seconds"]}s{f" {status}" if status else ""}' line = f'{job} {case["seconds"]}s{f" {status}" if status else ""}'
if job in omitted and omitted[job] > 0: if job in omitted and omitted[job] > 0:
line += f' ({omitted[job]} S3 reports omitted)' line += f' ({omitted[job]} S3 reports omitted)'
@ -197,6 +272,7 @@ def display_history(
bucket: Any, bucket: Any,
commits: List[Tuple[str, datetime]], commits: List[Tuple[str, datetime]],
jobs: Optional[List[str]], jobs: Optional[List[str]],
filename: Optional[str],
suite_name: Optional[str], suite_name: Optional[str],
test_name: str, test_name: str,
delta: int, delta: int,
@ -226,6 +302,7 @@ def display_history(
jobs=jobs, jobs=jobs,
jsons=jsons, jsons=jsons,
omitted=omitted, omitted=omitted,
filename=filename,
suite_name=suite_name, suite_name=suite_name,
test_name=test_name, test_name=test_name,
digits=digits, digits=digits,
@ -236,6 +313,7 @@ def display_history(
jobs=set(jobs or []), jobs=set(jobs or []),
jsons=jsons, jsons=jsons,
omitted=omitted, omitted=omitted,
filename=filename,
suite_name=suite_name, suite_name=suite_name,
test_name=test_name, test_name=test_name,
) )
@ -352,6 +430,10 @@ indicated test was not found in that report.
action='store_true', action='store_true',
help='(multiline) ignore listed jobs, show all jobs for each commit', help='(multiline) ignore listed jobs, show all jobs for each commit',
) )
parser.add_argument(
'--file',
help='name of the file containing the test',
)
parser.add_argument( parser.add_argument(
'--suite', '--suite',
help='name of the suite containing the test', help='name of the suite containing the test',
@ -381,6 +463,7 @@ indicated test was not found in that report.
bucket=bucket, bucket=bucket,
commits=commits, commits=commits,
jobs=jobs, jobs=jobs,
filename=args.file,
suite_name=args.suite, suite_name=args.suite,
test_name=args.test, test_name=args.test,
delta=args.delta, delta=args.delta,

View File

@ -5,6 +5,7 @@ import datetime
import json import json
import math import math
import os import os
import re
import statistics import statistics
import subprocess import subprocess
import time import time
@ -12,11 +13,11 @@ from collections import defaultdict
from glob import glob from glob import glob
from pathlib import Path from pathlib import Path
from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional, from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional,
Tuple) Set, Tuple, Union, cast)
from xml.dom import minidom # type: ignore[import] from xml.dom import minidom # type: ignore[import]
import requests import requests
from typing_extensions import TypedDict from typing_extensions import Literal, TypedDict
try: try:
import boto3 # type: ignore[import] import boto3 # type: ignore[import]
@ -24,29 +25,30 @@ try:
except ImportError: except ImportError:
HAVE_BOTO3 = False HAVE_BOTO3 = False
# TODO: consolidate these typedefs with the identical ones in
# tools/test_history.py
Commit = str # 40-digit SHA-1 hex string Commit = str # 40-digit SHA-1 hex string
Status = Optional[str] # errored, failed, skipped, or None Status = Optional[Literal['errored', 'failed', 'skipped']]
# represent suite as dict because indexing is useful
SimplerCase = Tuple[float, Status]
SimplerSuite = Dict[str, SimplerCase]
SimplerReport = Dict[str, SimplerSuite]
class Case(TypedDict): class CaseMeta(TypedDict):
name: str
seconds: float seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool errored: bool
failed: bool failed: bool
skipped: bool skipped: bool
class Suite(TypedDict): class Version1Suite(TypedDict):
total_seconds: float total_seconds: float
cases: List[Case] cases: List[Version1Case]
class ReportMeta(TypedDict, total=False): class ReportMetaMeta(TypedDict):
build_pr: str build_pr: str
build_tag: str build_tag: str
build_sha1: Commit build_sha1: Commit
@ -55,9 +57,42 @@ class ReportMeta(TypedDict, total=False):
build_workflow_id: str build_workflow_id: str
class Report(ReportMeta): class ReportMeta(ReportMetaMeta):
total_seconds: float total_seconds: float
suites: Dict[str, Suite]
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
SimplerSuite = Dict[str, Version2Case]
SimplerFile = Dict[str, SimplerSuite]
SimplerReport = Dict[str, SimplerFile]
class Stat(TypedDict): class Stat(TypedDict):
@ -69,7 +104,7 @@ class CaseDiff(TypedDict):
margin: str margin: str
name: str name: str
was: Optional[Tuple[Stat, Status]] was: Optional[Tuple[Stat, Status]]
now: Optional[SimplerCase] now: Optional[Version2Case]
class SuiteDiff(TypedDict): class SuiteDiff(TypedDict):
@ -80,21 +115,76 @@ class SuiteDiff(TypedDict):
cases: List[CaseDiff] cases: List[CaseDiff]
def case_status(case: Case) -> Status: # TODO: consolidate this with the case_status function from
# tools/test_history.py
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}: for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc] if case[k]: # type: ignore[misc]
return k return cast(Status, k)
return None return None
def simplify(report: Report) -> SimplerReport: # TODO: consolidate this with the newify_case function from
# tools/test_history.py
def newify_case(case: Version1Case) -> Version2Case:
return { return {
'seconds': case['seconds'],
'status': case_status(case),
}
# TODO: consolidate this with the get_cases function from
# tools/test_history.py
# Here we translate to a three-layer format (file -> suite -> case)
# rather than a two-layer format (suite -> case) because as mentioned in
# a comment in the body of this function, if we consolidate suites that
# share a name, there will be test case name collisions, and once we
# have those, there's no clean way to deal with it in the diffing logic.
# It's not great to have to add a dummy empty string for the filename
# for version 1 reports, but it's better than either losing cases that
# share a name (for version 2 reports) or using a list of cases rather
# than a dict.
def simplify(report: Report) -> SimplerReport:
if 'format_version' not in report: # version 1 implicitly
v1report = cast(Version1Report, report)
return {
# we just don't have test filename information sadly, so we
# just make one fake filename that is the empty string
'': {
suite_name: { suite_name: {
case['name']: (case['seconds'], case_status(case)) # This clobbers some cases that have duplicate names
# because in version 1, we would merge together all
# the suites with a given name (even if they came
# from different files), so there were actually
# situations in which two cases in the same suite
# shared a name (because they actually originally
# came from two suites that were then merged). It
# would probably be better to warn about the cases
# that we're silently discarding here, but since
# we're only uploading in the new format (where
# everything is also keyed by filename) going
# forward, it shouldn't matter too much.
case['name']: newify_case(case)
for case in suite['cases'] for case in suite['cases']
} }
for suite_name, suite in report['suites'].items() for suite_name, suite in v1report['suites'].items()
} }
}
else:
v_report = cast(VersionedReport, report)
version = v_report['format_version']
if version == 2:
v2report = cast(Version2Report, v_report)
return {
filename: {
suite_name: suite['cases']
for suite_name, suite in file_data['suites'].items()
}
for filename, file_data in v2report['files'].items()
}
else:
raise RuntimeError(f'Unknown format version: {version}')
def plural(n: int) -> str: def plural(n: int) -> str:
@ -165,7 +255,9 @@ def unlines(lines: List[str]) -> str:
def matching_test_times( def matching_test_times(
*,
base_reports: Dict[Commit, List[SimplerReport]], base_reports: Dict[Commit, List[SimplerReport]],
filename: str,
suite_name: str, suite_name: str,
case_name: str, case_name: str,
status: Status, status: Status,
@ -173,11 +265,14 @@ def matching_test_times(
times: List[float] = [] times: List[float] = []
for reports in base_reports.values(): for reports in base_reports.values():
for report in reports: for report in reports:
suite = report.get(suite_name) file_data = report.get(filename)
if file_data:
suite = file_data.get(suite_name)
if suite: if suite:
case = suite.get(case_name) case = suite.get(case_name)
if case: if case:
t, s = case t = case['seconds']
s = case['status']
if s == status: if s == status:
times.append(t) times.append(t)
return times return times
@ -195,30 +290,43 @@ def analyze(
# find all relevant suites (those in either base or head or both) # find all relevant suites (those in either base or head or both)
all_reports = [head_report] + base_report all_reports = [head_report] + base_report
all_suites = {k for r in all_reports for k in r.keys()} all_suites: Set[Tuple[str, str]] = {
(filename, suite_name)
for r in all_reports
for filename, file_data in r.items()
for suite_name in file_data.keys()
}
removed_suites: List[SuiteDiff] = [] removed_suites: List[SuiteDiff] = []
modified_suites: List[SuiteDiff] = [] modified_suites: List[SuiteDiff] = []
added_suites: List[SuiteDiff] = [] added_suites: List[SuiteDiff] = []
for suite_name in sorted(all_suites): for filename, suite_name in sorted(all_suites):
case_diffs: List[CaseDiff] = [] case_diffs: List[CaseDiff] = []
head_suite = head_report.get(suite_name) head_suite = head_report.get(filename, {}).get(suite_name)
base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[ base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[
{(n, s) for n, (_, s) in report.get(suite_name, {}).items()} {
(n, case['status'])
for n, case
in report.get(filename, {}).get(suite_name, {}).items()
}
for report in base_report for report in base_report
] or [set()]))) ] or [set()])))
case_stats: Dict[str, Stat] = {} case_stats: Dict[str, Stat] = {}
if head_suite: if head_suite:
now = sum(case[0] for case in head_suite.values()) now = sum(case['seconds'] for case in head_suite.values())
if any(suite_name in report for report in base_report): if any(
filename in report and suite_name in report[filename]
for report in base_report
):
removed_cases: List[CaseDiff] = [] removed_cases: List[CaseDiff] = []
for case_name, case_status in base_cases.items(): for case_name, case_status in base_cases.items():
case_stats[case_name] = list_stat(matching_test_times( case_stats[case_name] = list_stat(matching_test_times(
base_reports, base_reports=base_reports,
suite_name, filename=filename,
case_name, suite_name=suite_name,
case_status, case_name=case_name,
status=case_status,
)) ))
if case_name not in head_suite: if case_name not in head_suite:
removed_cases.append({ removed_cases.append({
@ -234,7 +342,7 @@ def analyze(
if head_case_name in base_cases: if head_case_name in base_cases:
stat = case_stats[head_case_name] stat = case_stats[head_case_name]
base_status = base_cases[head_case_name] base_status = base_cases[head_case_name]
if head_case[1] != base_status: if head_case['status'] != base_status:
modified_cases.append({ modified_cases.append({
'margin': '!', 'margin': '!',
'name': head_case_name, 'name': head_case_name,
@ -278,10 +386,11 @@ def analyze(
else: else:
for case_name, case_status in base_cases.items(): for case_name, case_status in base_cases.items():
case_stats[case_name] = list_stat(matching_test_times( case_stats[case_name] = list_stat(matching_test_times(
base_reports, base_reports=base_reports,
suite_name, filename=filename,
case_name, suite_name=suite_name,
case_status, case_name=case_name,
status=case_status,
)) ))
case_diffs.append({ case_diffs.append({
'margin': ' ', 'margin': ' ',
@ -316,9 +425,9 @@ def case_diff_lines(diff: CaseDiff) -> List[str]:
now = diff['now'] now = diff['now']
if now: if now:
now_stat: Stat = {'center': now[0], 'spread': None} now_stat: Stat = {'center': now['seconds'], 'spread': None}
now_line = f' # now {display_stat(now_stat, case_fmt)}' now_line = f' # now {display_stat(now_stat, case_fmt)}'
now_status = now[1] now_status = now['status']
if now_status: if now_status:
now_line += f' ({now_status})' now_line += f' ({now_status})'
lines.append(now_line) lines.append(now_line)
@ -410,7 +519,7 @@ def case_delta(case: CaseDiff) -> Stat:
now = case['now'] now = case['now']
return recenter( return recenter(
was[0] if was else zero_stat(), was[0] if was else zero_stat(),
now[0] if now else 0, now['seconds'] if now else 0,
) )
@ -542,7 +651,7 @@ class TestCase:
class TestSuite: class TestSuite:
def __init__(self, name: str) -> None: def __init__(self, name: str) -> None:
self.name = name self.name = name
self.test_cases: List[TestCase] = [] self.test_cases: Dict[str, TestCase] = dict()
self.failed_count = 0 self.failed_count = 0
self.skipped_count = 0 self.skipped_count = 0
self.errored_count = 0 self.errored_count = 0
@ -555,14 +664,14 @@ class TestSuite:
return f'TestSuite({rc})' return f'TestSuite({rc})'
def append(self, test_case: TestCase) -> None: def append(self, test_case: TestCase) -> None:
self.test_cases.append(test_case) self.test_cases[test_case.name] = test_case
self.total_time += test_case.time self.total_time += test_case.time
self.failed_count += 1 if test_case.failed else 0 self.failed_count += 1 if test_case.failed else 0
self.skipped_count += 1 if test_case.skipped else 0 self.skipped_count += 1 if test_case.skipped else 0
self.errored_count += 1 if test_case.errored else 0 self.errored_count += 1 if test_case.errored else 0
def print_report(self, num_longest: int = 3) -> None: def print_report(self, num_longest: int = 3) -> None:
sorted_tests = sorted(self.test_cases, key=lambda x: x.time) sorted_tests = sorted(self.test_cases.values(), key=lambda x: x.time)
test_count = len(sorted_tests) test_count = len(sorted_tests)
print(f"class {self.name}:") print(f"class {self.name}:")
print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}") print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
@ -577,25 +686,48 @@ class TestSuite:
print("") print("")
class TestFile:
def __init__(self, name: str) -> None:
self.name = name
self.total_time = 0.0
self.test_suites: Dict[str, TestSuite] = dict()
def append(self, test_case: TestCase) -> None:
suite_name = test_case.class_name
if suite_name not in self.test_suites:
self.test_suites[suite_name] = TestSuite(suite_name)
if test_case.name in self.test_suites[suite_name].test_cases:
# This behaviour is expected for test_cpp_extensions_aot, distributed/test_distributed_fork,
# and distributed/test_distributed_spawn. In these cases, we just lump the duplicate tests together--
# which is admittedly inaccurate for test_cpp_extensions_aot, though this is negligible as the test is short.
# For other unexpected cases, we should raise a warning.
if self.name != 'test_cpp_extensions_aot' and \
self.name != 'distributed/test_distributed_fork' and \
self.name != 'distributed/test_distributed_spawn' and \
self.name != 'cpp': # Also allow this cpp one as it run twice in caffe2 ort jobs
raise RuntimeWarning(f'Duplicate test case {test_case.name} in suite {suite_name} called from {self.name}')
self.test_suites[suite_name].append(test_case)
self.total_time += test_case.time
def parse_report(path: str) -> Iterator[TestCase]: def parse_report(path: str) -> Iterator[TestCase]:
dom = minidom.parse(path) dom = minidom.parse(path)
for test_case in dom.getElementsByTagName('testcase'): for test_case in dom.getElementsByTagName('testcase'):
yield TestCase(test_case) yield TestCase(test_case)
def parse_reports(folder: str) -> Dict[str, TestSuite]: def parse_reports(folder: str) -> Dict[str, TestFile]:
reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True) reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
tests_by_class = dict() tests_by_file = dict()
for report in reports: for report in reports:
test_filename = re.sub(r'\.', '/', os.path.basename(os.path.dirname(report)))
if test_filename not in tests_by_file:
tests_by_file[test_filename] = TestFile(test_filename)
for test_case in parse_report(report): for test_case in parse_report(report):
class_name = test_case.class_name tests_by_file[test_filename].append(test_case)
if class_name not in tests_by_class: return tests_by_file
tests_by_class[class_name] = TestSuite(class_name)
tests_by_class[class_name].append(test_case)
return tests_by_class
def build_info() -> ReportMetaMeta:
def build_info() -> ReportMeta:
return { return {
"build_pr": os.environ.get("CIRCLE_PR_NUMBER", ""), "build_pr": os.environ.get("CIRCLE_PR_NUMBER", ""),
"build_tag": os.environ.get("CIRCLE_TAG", ""), "build_tag": os.environ.get("CIRCLE_TAG", ""),
@ -624,7 +756,7 @@ def build_message(test_case: TestCase) -> Dict[str, Dict[str, Any]]:
} }
def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None: def send_report_to_scribe(reports: Dict[str, TestFile]) -> None:
access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN") access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
if not access_token: if not access_token:
@ -643,8 +775,9 @@ def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None:
"message": json.dumps(build_message(test_case)), "message": json.dumps(build_message(test_case)),
"line_escape": False, "line_escape": False,
} }
for name in sorted(reports.keys()) for test_file in reports.values()
for test_case in reports[name].test_cases for test_suite in test_file.test_suites.values()
for test_case in test_suite.test_cases.values()
] ]
), ),
}, },
@ -653,33 +786,40 @@ def send_report_to_scribe(reports: Dict[str, TestSuite]) -> None:
def assemble_s3_object( def assemble_s3_object(
reports: Dict[str, TestSuite], reports: Dict[str, TestFile],
*, *,
total_seconds: float, total_seconds: float,
) -> Report: ) -> Version2Report:
return { return {
**build_info(), # type: ignore[misc] **build_info(), # type: ignore[misc]
'total_seconds': total_seconds, 'total_seconds': total_seconds,
'format_version': 2,
'files' : {
name: {
'total_seconds': test_file.total_time,
'filename': test_file.name,
'suites': { 'suites': {
name: { name: {
'total_seconds': suite.total_time, 'total_seconds': suite.total_time,
'cases': [ 'cases': {
{ name: {
'name': case.name,
'seconds': case.time, 'seconds': case.time,
'errored': case.errored, 'status': 'skipped' if case.skipped else
'failed': case.failed, 'errored' if case.errored else
'skipped': case.skipped, 'failed' if case.failed else None
} }
for case in suite.test_cases for name, case in suite.test_cases.items()
], },
} }
for name, suite in reports.items() for name, suite in test_file.test_suites.items()
}
}
for name, test_file in reports.items()
} }
} }
def send_report_to_s3(head_report: Report) -> None: def send_report_to_s3(head_report: Version2Report) -> None:
job = os.environ.get('CIRCLE_JOB') job = os.environ.get('CIRCLE_JOB')
sha1 = os.environ.get('CIRCLE_SHA1') sha1 = os.environ.get('CIRCLE_SHA1')
branch = os.environ.get('CIRCLE_BRANCH', '') branch = os.environ.get('CIRCLE_BRANCH', '')
@ -773,6 +913,13 @@ def positive_float(value: str) -> float:
return parsed return parsed
def reports_has_no_tests(reports: Dict[str, TestFile]) -> bool:
for test_file in reports.values():
for test_suite in test_file.test_suites.values():
if len(test_suite.test_cases) > 0:
return False
return True
if __name__ == '__main__': if __name__ == '__main__':
import argparse import argparse
import sys import sys
@ -830,24 +977,25 @@ if __name__ == '__main__':
) )
args = parser.parse_args() args = parser.parse_args()
reports = parse_reports(args.folder) reports_by_file = parse_reports(args.folder)
if len(reports) == 0: if reports_has_no_tests(reports_by_file):
print(f"No test reports found in {args.folder}") print(f"No tests in reports found in {args.folder}")
sys.exit(0) sys.exit(0)
send_report_to_scribe(reports) send_report_to_scribe(reports_by_file)
longest_tests = [] # longest_tests can contain duplicatesas the same tests can be spawned from different files
longest_tests : List[TestCase] = []
total_time = 0.0 total_time = 0.0
for name in sorted(reports.keys()): for filename, test_filename in reports_by_file.items():
test_suite = reports[name] for suite_name, test_suite in test_filename.test_suites.items():
if test_suite.total_time >= args.class_print_threshold: if test_suite.total_time >= args.class_print_threshold:
test_suite.print_report(args.longest_of_class) test_suite.print_report(args.longest_of_class)
total_time += test_suite.total_time total_time += test_suite.total_time
longest_tests.extend(test_suite.test_cases) longest_tests.extend(test_suite.test_cases.values())
longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:] longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
obj = assemble_s3_object(reports, total_seconds=total_time) obj = assemble_s3_object(reports_by_file, total_seconds=total_time)
if args.upload_to_s3: if args.upload_to_s3:
send_report_to_s3(obj) send_report_to_s3(obj)