First step to refactoring S3 reading logic (#53755)

Summary:
This is an initial attempt in refactoring and consolidating our S3 read logic for print_test_stats.py, test_history.py, and run_test.py. This way, boto3 and botocore do not need to be imported in various places throughout the code base, and duplicated logic (such as the many type definitions) can exist in one place: `tools/stat_utils/s3_stat_parser.py`. walterddr contributed to this PR by moving print_test_stats.py to the tools folder and the corresponding tests a subfolder within tools.

**NOTE: this removes those tests from CI as the new `tools/test/test_stats.py` is not in the test/ directory as the other tests in TESTS in run_test.py.**

Pull Request resolved: https://github.com/pytorch/pytorch/pull/53755

Test Plan:
This refactoring change should not break anything, so running the files as before should work as they did previously.
To make sure that print_test_stats.py still functions: run `python tools/test/test_stats.py` and make sure all tests pass.
To make sure that test_history.py works, run the example commands from `tools/test_history.py --help` and check that their output matches that shown. Note that the script will continue printing for a while, so don't be alarmed.

Some next steps:
- Actually coming up with similarities among the three current use cases and further refactoring/consolidating of functions (e.g., combining simplify and get_cases)
- Moving more parsing logic to s3_stat_parser.py to have better abstraction between our files
- Adding tests for s3_stat_parser.py when there is more functionality in it

Reviewed By: agolynski, samestep

Differential Revision: D27030285

Pulled By: janeyx99

fbshipit-source-id: e664781324ef7c0c30943bfd7f17c895075ef7a7
This commit is contained in:
Jane Xu 2021-03-17 12:30:21 -07:00 committed by Facebook GitHub Bot
parent ccdcfba5de
commit 2e7311ef25
11 changed files with 808 additions and 886 deletions

View File

@ -636,7 +636,8 @@ jobs:
export CIRCLE_JOB="$CIRCLE_JOB"
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
cd workspace
python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
export PYTHONPATH="\${PWD}"
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
EOL
echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
unbuffer bash command.sh | ts
@ -800,8 +801,9 @@ jobs:
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
export PYTHONPATH="$PWD"
pip install typing_extensions boto3
python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
when: always
- store_test_results:
path: test/test-reports

View File

@ -198,7 +198,8 @@ jobs:
export CIRCLE_JOB="$CIRCLE_JOB"
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
cd workspace
python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
export PYTHONPATH="\${PWD}"
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
EOL
echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
unbuffer bash command.sh | ts
@ -362,8 +363,9 @@ jobs:
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
export PYTHONPATH="$PWD"
pip install typing_extensions boto3
python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
when: always
- store_test_results:
path: test/test-reports

View File

@ -34,13 +34,15 @@ warn_return_any = True
implicit_reexport = False
strict_equality = True
files = tools/codegen/gen.py,
files =
tools/autograd/*.py,
tools/codegen/gen.py,
tools/print_test_stats.py,
tools/pyi/*.py,
tools/stats_utils/*.py,
tools/test_history.py,
torch/testing/_internal/framework_utils.py,
torch/testing/_internal/mypy_wrapper.py,
torch/testing/_internal/print_test_stats.py,
torch/utils/benchmark/utils/common.py,
torch/utils/benchmark/utils/timer.py,
torch/utils/benchmark/utils/valgrind_wrapper/*.py,

View File

@ -34,8 +34,9 @@ files =
test/test_type_hints.py,
test/test_type_info.py,
test/test_utils.py,
tools/clang_format_utils.py,
tools/generate_torch_version.py,
tools/clang_format_utils.py
tools/stats_utils/*.py
# Minimum version supported - variable annotations were introduced

View File

@ -22,11 +22,10 @@ from typing import Dict, Optional, Tuple, List, Any
from typing_extensions import TypedDict
try:
import boto3 # type: ignore[import]
import botocore # type: ignore[import]
import botocore.exceptions # type: ignore[import]
HAVE_BOTO3 = True
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
from tools.stats_utils.s3_stat_parser import (get_S3_bucket_readonly, HAVE_BOTO3)
except ImportError:
print("Unable to import s3_stat_parser from tools. Running without S3 stats...")
HAVE_BOTO3 = False
@ -378,25 +377,19 @@ def get_test_time_reports_from_S3() -> List[Dict[str, Any]]:
job = os.environ.get("CIRCLE_JOB", "")
job_minus_shard_number = job.rstrip('0123456789')
try:
s3 = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
bucket = s3.Bucket(name="ossci-metrics")
reports = []
commit_index = 0
while len(reports) == 0 and commit_index < len(nightly_commits):
nightly_commit = nightly_commits[commit_index]
print(f'Grabbing reports from nightly commit: {nightly_commit}')
summaries = bucket.objects.filter(Prefix=f"test_time/{nightly_commit}/{job_minus_shard_number}")
for summary in summaries:
binary = summary.get()["Body"].read()
string = bz2.decompress(binary).decode("utf-8")
reports.append(json.loads(string))
commit_index += 1
return reports
except botocore.exceptions.ClientError as err:
print('Error Message: {}'.format(err.response['Error']['Message']))
return []
bucket = get_S3_bucket_readonly('ossci-metrics')
reports = []
commit_index = 0
while len(reports) == 0 and commit_index < len(nightly_commits):
nightly_commit = nightly_commits[commit_index]
print(f'Grabbing reports from nightly commit: {nightly_commit}')
summaries = bucket.objects.filter(Prefix=f"test_time/{nightly_commit}/{job_minus_shard_number}")
for summary in summaries:
binary = summary.get()["Body"].read()
string = bz2.decompress(binary).decode("utf-8")
reports.append(json.loads(string))
commit_index += 1
return reports
def calculate_job_times(reports: List[Dict[str, Any]]) -> Dict[str, Tuple[float, int]]:
@ -431,7 +424,8 @@ def pull_job_times_from_S3() -> Dict[str, Tuple[float, int]]:
if HAVE_BOTO3:
s3_reports = get_test_time_reports_from_S3()
else:
print('Please install boto3 to enable using S3 test times for automatic sharding and test categorization.')
print('Uh oh, boto3 is not found. Either it is not installed or we failed to import s3_stat_parser.')
print('If not installed, please install boto3 for automatic sharding and test categorization.')
s3_reports = []
if len(s3_reports) == 0:

View File

@ -10,7 +10,6 @@ from torch.testing._internal.framework_utils import calculate_shards
from torch.testing._internal.common_device_type import \
(instantiate_device_type_tests, onlyCUDA, onlyOnCPUAndCUDA, dtypes)
from torch.testing._internal import mypy_wrapper
from torch.testing._internal import print_test_stats
# For testing TestCase methods and torch.testing functions
class TestTesting(TestCase):
@ -647,639 +646,6 @@ class TestMypyWrapper(TestCase):
))
def fakehash(char):
return char * 40
def dummy_meta_meta() -> print_test_stats.ReportMetaMeta:
return {
'build_pr': '',
'build_tag': '',
'build_sha1': '',
'build_branch': '',
'build_job': '',
'build_workflow_id': '',
}
def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
return {
'name': name,
'seconds': seconds,
'errored': errored,
'failed': failed,
'skipped': skipped,
}
def make_report_v1(tests) -> print_test_stats.Version1Report:
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases),
'cases': cases,
}
for suite_name, cases in tests.items()
}
return {
**dummy_meta_meta(),
'total_seconds': sum(s['total_seconds'] for s in suites.values()),
'suites': suites,
}
def make_case_v2(seconds, status=None) -> print_test_stats.Version2Case:
return {
'seconds': seconds,
'status': status,
}
def make_report_v2(tests) -> print_test_stats.Version2Report:
files = {}
for file_name, file_suites in tests.items():
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases.values()),
'cases': cases,
}
for suite_name, cases in file_suites.items()
}
files[file_name] = {
'suites': suites,
'total_seconds': sum(suite['total_seconds'] for suite in suites.values()),
}
return {
**dummy_meta_meta(),
'format_version': 2,
'total_seconds': sum(s['total_seconds'] for s in files.values()),
'files': files,
}
class TestPrintTestStats(TestCase):
maxDiff = None
version1_report: print_test_stats.Version1Report = make_report_v1({
# input ordering of the suites is ignored
'Grault': [
# not printed: status same and time similar
makecase('test_grault0', 4.78, failed=True),
# status same, but time increased a lot
makecase('test_grault2', 1.473, errored=True),
],
# individual tests times changed, not overall suite
'Qux': [
# input ordering of the test cases is ignored
makecase('test_qux1', 0.001, skipped=True),
makecase('test_qux6', 0.002, skipped=True),
# time in bounds, but status changed
makecase('test_qux4', 7.158, failed=True),
# not printed because it's the same as before
makecase('test_qux7', 0.003, skipped=True),
makecase('test_qux5', 11.968),
makecase('test_qux3', 23.496),
],
# new test suite
'Bar': [
makecase('test_bar2', 3.742, failed=True),
makecase('test_bar1', 50.447),
],
# overall suite time changed but no individual tests
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
# suite doesn't show up if it doesn't change enough
'Foo': [
makecase('test_foo1', 42),
makecase('test_foo2', 56),
],
})
version2_report: print_test_stats.Version2Report = make_report_v2(
{
'test_a': {
'Grault': {
'test_grault0': make_case_v2(4.78, 'failed'),
'test_grault2': make_case_v2(1.473, 'errored'),
},
'Qux': {
'test_qux1': make_case_v2(0.001, 'skipped'),
'test_qux6': make_case_v2(0.002, 'skipped'),
'test_qux4': make_case_v2(7.158, 'failed'),
'test_qux7': make_case_v2(0.003, 'skipped'),
'test_qux8': make_case_v2(11.968),
'test_qux3': make_case_v2(23.496),
}
},
'test_b': {
'Bar': {
'test_bar2': make_case_v2(3.742, 'failed'),
'test_bar1': make_case_v2(50.447),
},
# overall suite time changed but no individual tests
'Norf': {
'test_norf1': make_case_v2(3),
'test_norf2': make_case_v2(3),
'test_norf3': make_case_v2(3),
'test_norf4': make_case_v2(3),
},
},
'test_c': {
'Foo': {
'test_foo1': make_case_v2(42),
'test_foo2': make_case_v2(56),
},
}
})
def test_simplify(self):
self.assertEqual(
{
'': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux5': {'seconds': 11.968, 'status': None},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
},
},
},
print_test_stats.simplify(self.version1_report)
)
self.assertEqual(
{
'test_a': {
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
'test_qux8': {'seconds': 11.968, 'status': None},
},
},
'test_b': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
},
'test_c': {
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
},
},
print_test_stats.simplify(self.version2_report),
)
def test_analysis(self):
head_report = self.version1_report
base_reports = {
# bbbb has no reports, so base is cccc instead
fakehash('b'): [],
fakehash('c'): [
make_report_v1({
'Baz': [
makecase('test_baz2', 13.605),
# no recent suites have & skip this test
makecase('test_baz1', 0.004, skipped=True),
],
'Foo': [
makecase('test_foo1', 43),
# test added since dddd
makecase('test_foo2', 57),
],
'Grault': [
makecase('test_grault0', 4.88, failed=True),
makecase('test_grault1', 11.967, failed=True),
makecase('test_grault2', 0.395, errored=True),
makecase('test_grault3', 30.460),
],
'Norf': [
makecase('test_norf1', 2),
makecase('test_norf2', 2),
makecase('test_norf3', 2),
makecase('test_norf4', 2),
],
'Qux': [
makecase('test_qux3', 4.978, errored=True),
makecase('test_qux7', 0.002, skipped=True),
makecase('test_qux2', 5.618),
makecase('test_qux4', 7.766, errored=True),
makecase('test_qux6', 23.589, failed=True),
],
}),
],
fakehash('d'): [
make_report_v1({
'Foo': [
makecase('test_foo1', 40),
# removed in cccc
makecase('test_foo3', 17),
],
'Baz': [
# not skipped, so not included in stdev
makecase('test_baz1', 3.14),
],
'Qux': [
makecase('test_qux7', 0.004, skipped=True),
makecase('test_qux2', 6.02),
makecase('test_qux4', 20.932),
],
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
'Grault': [
makecase('test_grault0', 5, failed=True),
makecase('test_grault1', 14.325, failed=True),
makecase('test_grault2', 0.31, errored=True),
],
}),
],
fakehash('e'): [],
fakehash('f'): [
make_report_v1({
'Foo': [
makecase('test_foo3', 24),
makecase('test_foo1', 43),
],
'Baz': [
makecase('test_baz2', 16.857),
],
'Qux': [
makecase('test_qux2', 6.422),
makecase('test_qux4', 6.382, errored=True),
],
'Norf': [
makecase('test_norf1', 0.9),
makecase('test_norf3', 0.9),
makecase('test_norf2', 0.9),
makecase('test_norf4', 0.9),
],
'Grault': [
makecase('test_grault0', 4.7, failed=True),
makecase('test_grault1', 13.146, failed=True),
makecase('test_grault2', 0.48, errored=True),
],
}),
],
}
simpler_head = print_test_stats.simplify(head_report)
simpler_base = {}
for commit, reports in base_reports.items():
simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
analysis = print_test_stats.analyze(
head_report=simpler_head,
base_reports=simpler_base,
)
self.assertEqual(
'''\
- class Baz:
- # was 15.23s ± 2.30s
-
- def test_baz1: ...
- # was 0.004s (skipped)
-
- def test_baz2: ...
- # was 15.231s ± 2.300s
class Grault:
# was 48.86s ± 1.19s
# now 6.25s
- def test_grault1: ...
- # was 13.146s ± 1.179s (failed)
- def test_grault3: ...
- # was 30.460s
class Qux:
# was 41.66s ± 1.06s
# now 42.63s
- def test_qux2: ...
- # was 6.020s ± 0.402s
! def test_qux3: ...
! # was 4.978s (errored)
! # now 23.496s
! def test_qux4: ...
! # was 7.074s ± 0.979s (errored)
! # now 7.158s (failed)
! def test_qux6: ...
! # was 23.589s (failed)
! # now 0.002s (skipped)
+ def test_qux1: ...
+ # now 0.001s (skipped)
+ def test_qux5: ...
+ # now 11.968s
+ class Bar:
+ # now 54.19s
+
+ def test_bar1: ...
+ # now 50.447s
+
+ def test_bar2: ...
+ # now 3.742s (failed)
''',
print_test_stats.anomalies(analysis),
)
def test_graph(self):
# HEAD is on master
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
* aaaaaaaaaa (HEAD) total time 502.99s
* bbbbbbbbbb (base) 1 report, total time 47.84s
* cccccccccc 1 report, total time 332.50s
* dddddddddd 0 reports
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=502.99,
base_seconds={
fakehash('b'): [47.84],
fakehash('c'): [332.50],
fakehash('d'): [],
},
on_master=True,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 9988.77s
|/
* bbbbbbbbbb (base) 121 reports, total time 7654.32s ± 55.55s
* cccccccccc 20 reports, total time 5555.55s ± 253.19s
* dddddddddd 1 report, total time 1234.56s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=9988.77,
base_seconds={
fakehash('b'): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
fakehash('c'): [5308.77] * 10 + [5802.33] * 10,
fakehash('d'): [1234.56],
},
on_master=False,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 25.52s
| |
| : (5 commits)
|/
* bbbbbbbbbb 0 reports
* cccccccccc 0 reports
* dddddddddd (base) 15 reports, total time 58.92s ± 25.82s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=25.52,
base_seconds={
fakehash('b'): [],
fakehash('c'): [],
fakehash('d'): [52.25] * 14 + [152.26],
},
on_master=False,
ancestry_path=5,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 0.08s
|/|
| : (1 commit)
|
* bbbbbbbbbb 0 reports
* cccccccccc (base) 1 report, total time 0.09s
* dddddddddd 3 reports, total time 0.10s ± 0.05s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=0.08,
base_seconds={
fakehash('b'): [],
fakehash('c'): [0.09],
fakehash('d'): [0.05, 0.10, 0.15],
},
on_master=False,
other_ancestors=1,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 5.98s
| |
| : (1 commit)
|/|
| : (7 commits)
|
* bbbbbbbbbb (base) 2 reports, total time 6.02s ± 1.71s
* cccccccccc 0 reports
* dddddddddd 10 reports, total time 5.84s ± 0.92s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=5.98,
base_seconds={
fakehash('b'): [4.81, 7.23],
fakehash('c'): [],
fakehash('d'): [4.97] * 5 + [6.71] * 5,
},
on_master=False,
ancestry_path=1,
other_ancestors=7,
)
)
def test_regression_info(self):
self.assertEqual(
'''\
----- Historic stats comparison result ------
job: foo_job
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 3.02s
|/
* bbbbbbbbbb (base) 1 report, total time 41.00s
* cccccccccc 1 report, total time 43.00s
|
:
Removed (across 1 suite) 1 test, totaling - 1.00s
Modified (across 1 suite) 1 test, totaling - 41.48s ± 2.12s
Added (across 1 suite) 1 test, totaling + 3.00s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [
make_report_v1({
'Foo': [
makecase('test_foo', 40),
makecase('test_bar', 1),
],
}),
],
fakehash('c'): [
make_report_v1({
'Foo': [
makecase('test_foo', 43),
],
}),
],
},
job_name='foo_job',
on_master=False,
ancestry_path=0,
other_ancestors=0,
)
)
def test_regression_info_new_job(self):
self.assertEqual(
'''\
----- Historic stats comparison result ------
job: foo_job
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 3.02s
| |
| : (3 commits)
|/|
| : (2 commits)
|
* bbbbbbbbbb 0 reports
* cccccccccc 0 reports
|
:
Removed (across 0 suites) 0 tests, totaling 0.00s
Modified (across 0 suites) 0 tests, totaling 0.00s
Added (across 1 suite) 2 tests, totaling + 3.02s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [],
fakehash('c'): [],
},
job_name='foo_job',
on_master=False,
ancestry_path=3,
other_ancestors=2,
)
)
class TestFrameworkUtils(TestCase):
tests = [
'super_long_test',

View File

@ -13,82 +13,16 @@ from collections import defaultdict
from glob import glob
from pathlib import Path
from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional,
Set, Tuple, Union, cast)
Set, Tuple, cast)
from xml.dom import minidom # type: ignore[import]
import requests
from typing_extensions import Literal, TypedDict
try:
import boto3 # type: ignore[import]
HAVE_BOTO3 = True
except ImportError:
HAVE_BOTO3 = False
# TODO: consolidate these typedefs with the identical ones in
# tools/test_history.py
Commit = str # 40-digit SHA-1 hex string
Status = Optional[Literal['errored', 'failed', 'skipped']]
from typing_extensions import TypedDict
from tools.stats_utils.s3_stat_parser import (newify_case, get_S3_object_from_bucket, get_S3_bucket_readonly,
Report, Status, Commit, HAVE_BOTO3, Version2Case, VersionedReport,
Version1Report, Version2Report, ReportMetaMeta)
class CaseMeta(TypedDict):
seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool
failed: bool
skipped: bool
class Version1Suite(TypedDict):
total_seconds: float
cases: List[Version1Case]
class ReportMetaMeta(TypedDict):
build_pr: str
build_tag: str
build_sha1: Commit
build_branch: str
build_job: str
build_workflow_id: str
class ReportMeta(ReportMetaMeta):
total_seconds: float
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
SimplerSuite = Dict[str, Version2Case]
SimplerFile = Dict[str, SimplerSuite]
@ -115,24 +49,6 @@ class SuiteDiff(TypedDict):
cases: List[CaseDiff]
# TODO: consolidate this with the case_status function from
# tools/test_history.py
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return cast(Status, k)
return None
# TODO: consolidate this with the newify_case function from
# tools/test_history.py
def newify_case(case: Version1Case) -> Version2Case:
return {
'seconds': case['seconds'],
'status': case_status(case),
}
# TODO: consolidate this with the get_cases function from
# tools/test_history.py
@ -848,8 +764,7 @@ def send_report_to_s3(head_report: Version2Report) -> None:
return
now = datetime.datetime.utcnow().isoformat()
key = f'test_time/{sha1}/{job}/{now}Z.json.bz2' # Z meaning UTC
s3 = boto3.resource('s3')
obj = s3.Object('ossci-metrics', key)
obj = get_S3_object_from_bucket('ossci-metrics', key)
# use bz2 because the results are smaller than gzip, and the
# compression time penalty we pay is only about half a second for
# input files of a few megabytes in size like these JSON files, and
@ -890,8 +805,7 @@ def print_regressions(head_report: Report, *, num_prev_commits: int) -> None:
commits = commits[:-1]
job = os.environ.get("CIRCLE_JOB", "")
s3 = boto3.resource("s3")
bucket = s3.Bucket(name="ossci-metrics")
bucket = get_S3_bucket_readonly('ossci-metrics')
index = {}
for commit in commits:
summaries = bucket.objects.filter(Prefix=f"test_time/{commit}/{job}/")

View File

View File

@ -0,0 +1,129 @@
from typing import Dict, List, Optional, Union, Any, cast
from typing_extensions import Literal, TypedDict
try:
import boto3 # type: ignore[import]
import botocore # type: ignore[import]
HAVE_BOTO3 = True
except ImportError:
HAVE_BOTO3 = False
Commit = str # 40-digit SHA-1 hex string
Status = Optional[Literal['errored', 'failed', 'skipped']]
class CaseMeta(TypedDict):
seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool
failed: bool
skipped: bool
class Version1Suite(TypedDict):
total_seconds: float
cases: List[Version1Case]
class ReportMetaMeta(TypedDict):
build_pr: str
build_tag: str
build_sha1: Commit
build_branch: str
build_job: str
build_workflow_id: str
class ReportMeta(ReportMetaMeta):
total_seconds: float
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
def get_S3_bucket_readonly(bucket_name: str) -> Any:
s3 = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
return s3.Bucket(bucket_name)
def get_S3_object_from_bucket(bucket_name: str, object: str) -> Any:
s3 = boto3.resource('s3')
return s3.Object(bucket_name, object)
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return cast(Status, k)
return None
def newify_case(case: Version1Case) -> Version2Case:
return {
'seconds': case['seconds'],
'status': case_status(case),
}
def get_cases(
*,
data: Report,
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
) -> List[Version2Case]:
cases: List[Version2Case] = []
if 'format_version' not in data: # version 1 implicitly
v1report = cast(Version1Report, data)
suites = v1report['suites']
for sname, v1suite in suites.items():
if sname == suite_name or not suite_name:
for v1case in v1suite['cases']:
if v1case['name'] == test_name:
cases.append(newify_case(v1case))
else:
v_report = cast(VersionedReport, data)
version = v_report['format_version']
if version == 2:
v2report = cast(Version2Report, v_report)
for fname, v2file in v2report['files'].items():
if fname == filename or not filename:
for sname, v2suite in v2file['suites'].items():
if sname == suite_name or not suite_name:
v2case = v2suite['cases'].get(test_name)
if v2case:
cases.append(v2case)
else:
raise RuntimeError(f'Unknown format version: {version}')
return cases

637
tools/test/test_stats.py Normal file
View File

@ -0,0 +1,637 @@
import unittest
from tools import print_test_stats
def fakehash(char):
return char * 40
def dummy_meta_meta() -> print_test_stats.ReportMetaMeta:
return {
'build_pr': '',
'build_tag': '',
'build_sha1': '',
'build_branch': '',
'build_job': '',
'build_workflow_id': '',
}
def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
return {
'name': name,
'seconds': seconds,
'errored': errored,
'failed': failed,
'skipped': skipped,
}
def make_report_v1(tests) -> print_test_stats.Version1Report:
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases),
'cases': cases,
}
for suite_name, cases in tests.items()
}
return {
**dummy_meta_meta(),
'total_seconds': sum(s['total_seconds'] for s in suites.values()),
'suites': suites,
}
def make_case_v2(seconds, status=None) -> print_test_stats.Version2Case:
return {
'seconds': seconds,
'status': status,
}
def make_report_v2(tests) -> print_test_stats.Version2Report:
files = {}
for file_name, file_suites in tests.items():
suites = {
suite_name: {
'total_seconds': sum(case['seconds'] for case in cases.values()),
'cases': cases,
}
for suite_name, cases in file_suites.items()
}
files[file_name] = {
'suites': suites,
'total_seconds': sum(suite['total_seconds'] for suite in suites.values()),
}
return {
**dummy_meta_meta(),
'format_version': 2,
'total_seconds': sum(s['total_seconds'] for s in files.values()),
'files': files,
}
maxDiff = None
class TestPrintTestStats(unittest.TestCase):
version1_report: print_test_stats.Version1Report = make_report_v1({
# input ordering of the suites is ignored
'Grault': [
# not printed: status same and time similar
makecase('test_grault0', 4.78, failed=True),
# status same, but time increased a lot
makecase('test_grault2', 1.473, errored=True),
],
# individual tests times changed, not overall suite
'Qux': [
# input ordering of the test cases is ignored
makecase('test_qux1', 0.001, skipped=True),
makecase('test_qux6', 0.002, skipped=True),
# time in bounds, but status changed
makecase('test_qux4', 7.158, failed=True),
# not printed because it's the same as before
makecase('test_qux7', 0.003, skipped=True),
makecase('test_qux5', 11.968),
makecase('test_qux3', 23.496),
],
# new test suite
'Bar': [
makecase('test_bar2', 3.742, failed=True),
makecase('test_bar1', 50.447),
],
# overall suite time changed but no individual tests
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
# suite doesn't show up if it doesn't change enough
'Foo': [
makecase('test_foo1', 42),
makecase('test_foo2', 56),
],
})
version2_report: print_test_stats.Version2Report = make_report_v2(
{
'test_a': {
'Grault': {
'test_grault0': make_case_v2(4.78, 'failed'),
'test_grault2': make_case_v2(1.473, 'errored'),
},
'Qux': {
'test_qux1': make_case_v2(0.001, 'skipped'),
'test_qux6': make_case_v2(0.002, 'skipped'),
'test_qux4': make_case_v2(7.158, 'failed'),
'test_qux7': make_case_v2(0.003, 'skipped'),
'test_qux8': make_case_v2(11.968),
'test_qux3': make_case_v2(23.496),
}
},
'test_b': {
'Bar': {
'test_bar2': make_case_v2(3.742, 'failed'),
'test_bar1': make_case_v2(50.447),
},
# overall suite time changed but no individual tests
'Norf': {
'test_norf1': make_case_v2(3),
'test_norf2': make_case_v2(3),
'test_norf3': make_case_v2(3),
'test_norf4': make_case_v2(3),
},
},
'test_c': {
'Foo': {
'test_foo1': make_case_v2(42),
'test_foo2': make_case_v2(56),
},
}
})
def test_simplify(self):
self.assertEqual(
{
'': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux5': {'seconds': 11.968, 'status': None},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
},
},
},
print_test_stats.simplify(self.version1_report)
)
self.assertEqual(
{
'test_a': {
'Grault': {
'test_grault0': {'seconds': 4.78, 'status': 'failed'},
'test_grault2': {'seconds': 1.473, 'status': 'errored'},
},
'Qux': {
'test_qux1': {'seconds': 0.001, 'status': 'skipped'},
'test_qux3': {'seconds': 23.496, 'status': None},
'test_qux4': {'seconds': 7.158, 'status': 'failed'},
'test_qux6': {'seconds': 0.002, 'status': 'skipped'},
'test_qux7': {'seconds': 0.003, 'status': 'skipped'},
'test_qux8': {'seconds': 11.968, 'status': None},
},
},
'test_b': {
'Bar': {
'test_bar1': {'seconds': 50.447, 'status': None},
'test_bar2': {'seconds': 3.742, 'status': 'failed'},
},
'Norf': {
'test_norf1': {'seconds': 3, 'status': None},
'test_norf2': {'seconds': 3, 'status': None},
'test_norf3': {'seconds': 3, 'status': None},
'test_norf4': {'seconds': 3, 'status': None},
},
},
'test_c': {
'Foo': {
'test_foo1': {'seconds': 42, 'status': None},
'test_foo2': {'seconds': 56, 'status': None},
},
},
},
print_test_stats.simplify(self.version2_report),
)
def test_analysis(self):
head_report = self.version1_report
base_reports = {
# bbbb has no reports, so base is cccc instead
fakehash('b'): [],
fakehash('c'): [
make_report_v1({
'Baz': [
makecase('test_baz2', 13.605),
# no recent suites have & skip this test
makecase('test_baz1', 0.004, skipped=True),
],
'Foo': [
makecase('test_foo1', 43),
# test added since dddd
makecase('test_foo2', 57),
],
'Grault': [
makecase('test_grault0', 4.88, failed=True),
makecase('test_grault1', 11.967, failed=True),
makecase('test_grault2', 0.395, errored=True),
makecase('test_grault3', 30.460),
],
'Norf': [
makecase('test_norf1', 2),
makecase('test_norf2', 2),
makecase('test_norf3', 2),
makecase('test_norf4', 2),
],
'Qux': [
makecase('test_qux3', 4.978, errored=True),
makecase('test_qux7', 0.002, skipped=True),
makecase('test_qux2', 5.618),
makecase('test_qux4', 7.766, errored=True),
makecase('test_qux6', 23.589, failed=True),
],
}),
],
fakehash('d'): [
make_report_v1({
'Foo': [
makecase('test_foo1', 40),
# removed in cccc
makecase('test_foo3', 17),
],
'Baz': [
# not skipped, so not included in stdev
makecase('test_baz1', 3.14),
],
'Qux': [
makecase('test_qux7', 0.004, skipped=True),
makecase('test_qux2', 6.02),
makecase('test_qux4', 20.932),
],
'Norf': [
makecase('test_norf1', 3),
makecase('test_norf2', 3),
makecase('test_norf3', 3),
makecase('test_norf4', 3),
],
'Grault': [
makecase('test_grault0', 5, failed=True),
makecase('test_grault1', 14.325, failed=True),
makecase('test_grault2', 0.31, errored=True),
],
}),
],
fakehash('e'): [],
fakehash('f'): [
make_report_v1({
'Foo': [
makecase('test_foo3', 24),
makecase('test_foo1', 43),
],
'Baz': [
makecase('test_baz2', 16.857),
],
'Qux': [
makecase('test_qux2', 6.422),
makecase('test_qux4', 6.382, errored=True),
],
'Norf': [
makecase('test_norf1', 0.9),
makecase('test_norf3', 0.9),
makecase('test_norf2', 0.9),
makecase('test_norf4', 0.9),
],
'Grault': [
makecase('test_grault0', 4.7, failed=True),
makecase('test_grault1', 13.146, failed=True),
makecase('test_grault2', 0.48, errored=True),
],
}),
],
}
simpler_head = print_test_stats.simplify(head_report)
simpler_base = {}
for commit, reports in base_reports.items():
simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
analysis = print_test_stats.analyze(
head_report=simpler_head,
base_reports=simpler_base,
)
self.assertEqual(
'''\
- class Baz:
- # was 15.23s ± 2.30s
-
- def test_baz1: ...
- # was 0.004s (skipped)
-
- def test_baz2: ...
- # was 15.231s ± 2.300s
class Grault:
# was 48.86s ± 1.19s
# now 6.25s
- def test_grault1: ...
- # was 13.146s ± 1.179s (failed)
- def test_grault3: ...
- # was 30.460s
class Qux:
# was 41.66s ± 1.06s
# now 42.63s
- def test_qux2: ...
- # was 6.020s ± 0.402s
! def test_qux3: ...
! # was 4.978s (errored)
! # now 23.496s
! def test_qux4: ...
! # was 7.074s ± 0.979s (errored)
! # now 7.158s (failed)
! def test_qux6: ...
! # was 23.589s (failed)
! # now 0.002s (skipped)
+ def test_qux1: ...
+ # now 0.001s (skipped)
+ def test_qux5: ...
+ # now 11.968s
+ class Bar:
+ # now 54.19s
+
+ def test_bar1: ...
+ # now 50.447s
+
+ def test_bar2: ...
+ # now 3.742s (failed)
''',
print_test_stats.anomalies(analysis),
)
def test_graph(self):
# HEAD is on master
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
* aaaaaaaaaa (HEAD) total time 502.99s
* bbbbbbbbbb (base) 1 report, total time 47.84s
* cccccccccc 1 report, total time 332.50s
* dddddddddd 0 reports
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=502.99,
base_seconds={
fakehash('b'): [47.84],
fakehash('c'): [332.50],
fakehash('d'): [],
},
on_master=True,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 9988.77s
|/
* bbbbbbbbbb (base) 121 reports, total time 7654.32s ± 55.55s
* cccccccccc 20 reports, total time 5555.55s ± 253.19s
* dddddddddd 1 report, total time 1234.56s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=9988.77,
base_seconds={
fakehash('b'): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
fakehash('c'): [5308.77] * 10 + [5802.33] * 10,
fakehash('d'): [1234.56],
},
on_master=False,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 25.52s
| |
| : (5 commits)
|/
* bbbbbbbbbb 0 reports
* cccccccccc 0 reports
* dddddddddd (base) 15 reports, total time 58.92s ± 25.82s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=25.52,
base_seconds={
fakehash('b'): [],
fakehash('c'): [],
fakehash('d'): [52.25] * 14 + [152.26],
},
on_master=False,
ancestry_path=5,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 0.08s
|/|
| : (1 commit)
|
* bbbbbbbbbb 0 reports
* cccccccccc (base) 1 report, total time 0.09s
* dddddddddd 3 reports, total time 0.10s ± 0.05s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=0.08,
base_seconds={
fakehash('b'): [],
fakehash('c'): [0.09],
fakehash('d'): [0.05, 0.10, 0.15],
},
on_master=False,
other_ancestors=1,
)
)
self.assertEqual(
'''\
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 5.98s
| |
| : (1 commit)
|/|
| : (7 commits)
|
* bbbbbbbbbb (base) 2 reports, total time 6.02s ± 1.71s
* cccccccccc 0 reports
* dddddddddd 10 reports, total time 5.84s ± 0.92s
|
:
''',
print_test_stats.graph(
head_sha=fakehash('a'),
head_seconds=5.98,
base_seconds={
fakehash('b'): [4.81, 7.23],
fakehash('c'): [],
fakehash('d'): [4.97] * 5 + [6.71] * 5,
},
on_master=False,
ancestry_path=1,
other_ancestors=7,
)
)
def test_regression_info(self):
self.assertEqual(
'''\
----- Historic stats comparison result ------
job: foo_job
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 3.02s
|/
* bbbbbbbbbb (base) 1 report, total time 41.00s
* cccccccccc 1 report, total time 43.00s
|
:
Removed (across 1 suite) 1 test, totaling - 1.00s
Modified (across 1 suite) 1 test, totaling - 41.48s ± 2.12s
Added (across 1 suite) 1 test, totaling + 3.00s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [
make_report_v1({
'Foo': [
makecase('test_foo', 40),
makecase('test_bar', 1),
],
}),
],
fakehash('c'): [
make_report_v1({
'Foo': [
makecase('test_foo', 43),
],
}),
],
},
job_name='foo_job',
on_master=False,
ancestry_path=0,
other_ancestors=0,
)
)
def test_regression_info_new_job(self):
self.assertEqual(
'''\
----- Historic stats comparison result ------
job: foo_job
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
Commit graph (base is most recent master ancestor with at least one S3 report):
: (master)
|
| * aaaaaaaaaa (HEAD) total time 3.02s
| |
| : (3 commits)
|/|
| : (2 commits)
|
* bbbbbbbbbb 0 reports
* cccccccccc 0 reports
|
:
Removed (across 0 suites) 0 tests, totaling 0.00s
Modified (across 0 suites) 0 tests, totaling 0.00s
Added (across 1 suite) 2 tests, totaling + 3.02s
''',
print_test_stats.regression_info(
head_sha=fakehash('a'),
head_report=make_report_v1({
'Foo': [
makecase('test_foo', 0.02, skipped=True),
makecase('test_baz', 3),
]}),
base_reports={
fakehash('b'): [],
fakehash('c'): [],
},
job_name='foo_job',
on_master=False,
ancestry_path=3,
other_ancestors=2,
)
)
if __name__ == '__main__':
unittest.main()

View File

@ -6,11 +6,8 @@ import json
import subprocess
from collections import defaultdict
from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
import boto3 # type: ignore[import]
import botocore # type: ignore[import]
from typing_extensions import Literal, TypedDict
from typing import Any, Dict, List, Optional, Set, Tuple
from tools.stats_utils.s3_stat_parser import (get_S3_bucket_readonly, get_cases, Report)
def get_git_commit_history(
@ -35,73 +32,6 @@ def get_object_summaries(*, bucket: Any, sha: str) -> Dict[str, List[Any]]:
by_job[job].append(summary)
return dict(by_job)
# TODO: consolidate these typedefs with the identical ones in
# torch/testing/_internal/print_test_stats.py
Commit = str # 40-digit SHA-1 hex string
Status = Optional[Literal['errored', 'failed', 'skipped']]
class CaseMeta(TypedDict):
seconds: float
class Version1Case(CaseMeta):
name: str
errored: bool
failed: bool
skipped: bool
class Version1Suite(TypedDict):
total_seconds: float
cases: List[Version1Case]
class ReportMetaMeta(TypedDict):
build_pr: str
build_tag: str
build_sha1: Commit
build_branch: str
build_job: str
build_workflow_id: str
class ReportMeta(ReportMetaMeta):
total_seconds: float
class Version1Report(ReportMeta):
suites: Dict[str, Version1Suite]
class Version2Case(CaseMeta):
status: Status
class Version2Suite(TypedDict):
total_seconds: float
cases: Dict[str, Version2Case]
class Version2File(TypedDict):
total_seconds: float
suites: Dict[str, Version2Suite]
class VersionedReport(ReportMeta):
format_version: int
# report: Version2Report implies report['format_version'] == 2
class Version2Report(VersionedReport):
files: Dict[str, Version2File]
Report = Union[Version1Report, VersionedReport]
def get_jsons(
jobs: Optional[List[str]],
summaries: Dict[str, Any],
@ -116,59 +46,6 @@ def get_jsons(
}
# TODO: consolidate this with the case_status function from
# torch/testing/_internal/print_test_stats.py
def case_status(case: Version1Case) -> Status:
for k in {'errored', 'failed', 'skipped'}:
if case[k]: # type: ignore[misc]
return cast(Status, k)
return None
# TODO: consolidate this with the newify_case function from
# torch/testing/_internal/print_test_stats.py
def newify_case(case: Version1Case) -> Version2Case:
return {
'seconds': case['seconds'],
'status': case_status(case),
}
# TODO: consolidate this with the simplify function from
# torch/testing/_internal/print_test_stats.py
def get_cases(
*,
data: Report,
filename: Optional[str],
suite_name: Optional[str],
test_name: str,
) -> List[Version2Case]:
cases: List[Version2Case] = []
if 'format_version' not in data: # version 1 implicitly
v1report = cast(Version1Report, data)
suites = v1report['suites']
for sname, v1suite in suites.items():
if sname == suite_name or not suite_name:
for v1case in v1suite['cases']:
if v1case['name'] == test_name:
cases.append(newify_case(v1case))
else:
v_report = cast(VersionedReport, data)
version = v_report['format_version']
if version == 2:
v2report = cast(Version2Report, v_report)
for fname, v2file in v2report['files'].items():
if fname == filename or not filename:
for sname, v2suite in v2file['suites'].items():
if sname == suite_name or not suite_name:
v2case = v2suite['cases'].get(test_name)
if v2case:
cases.append(v2case)
else:
raise RuntimeError(f'Unknown format version: {version}')
return cases
def make_column(
*,
data: Optional[Report],
@ -455,9 +332,7 @@ indicated test was not found in that report.
parser.error('No jobs specified.')
commits = get_git_commit_history(path=args.pytorch, ref=args.ref)
s3 = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
bucket = s3.Bucket('ossci-metrics')
bucket = get_S3_bucket_readonly('ossci-metrics')
display_history(
bucket=bucket,