mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert D26232345: [pytorch][PR] Report test time regressions
Test Plan: revert-hammer
Differential Revision:
D26232345 (7467f90b13)
Original commit changeset: b687b1737519
fbshipit-source-id: 10a031c5500b083f7c82f2ae2743b671c5a07bff
This commit is contained in:
parent
1aaddd83a5
commit
21dccbca62
|
|
@ -642,9 +642,9 @@ jobs:
|
|||
export CIRCLE_JOB="$CIRCLE_JOB"
|
||||
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
|
||||
cd workspace
|
||||
python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
|
||||
python test/print_test_stats.py --upload-to-s3 test
|
||||
EOL
|
||||
echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
|
||||
echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
|
||||
unbuffer bash command.sh | ts
|
||||
|
||||
echo "Retrieving test reports"
|
||||
|
|
|
|||
|
|
@ -201,9 +201,9 @@ jobs:
|
|||
export CIRCLE_JOB="$CIRCLE_JOB"
|
||||
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
|
||||
cd workspace
|
||||
python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
|
||||
python test/print_test_stats.py --upload-to-s3 test
|
||||
EOL
|
||||
echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
|
||||
echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
|
||||
unbuffer bash command.sh | ts
|
||||
|
||||
echo "Retrieving test reports"
|
||||
|
|
|
|||
6
mypy.ini
6
mypy.ini
|
|
@ -242,9 +242,3 @@ ignore_missing_imports = True
|
|||
|
||||
[mypy-mypy.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-xml.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-boto3.*]
|
||||
ignore_missing_imports = True
|
||||
|
|
|
|||
254
test/print_test_stats.py
Executable file
254
test/print_test_stats.py
Executable file
|
|
@ -0,0 +1,254 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Read and print test results statistics
|
||||
from xml.dom import minidom
|
||||
from glob import glob
|
||||
import bz2
|
||||
import json
|
||||
import os
|
||||
import statistics
|
||||
import time
|
||||
|
||||
import boto3
|
||||
import datetime
|
||||
import requests
|
||||
|
||||
class TestCase:
|
||||
def __init__(self, dom):
|
||||
self.class_name = str(dom.attributes['classname'].value)
|
||||
self.name = str(dom.attributes['name'].value)
|
||||
self.time = float(dom.attributes['time'].value)
|
||||
self.errored = len(dom.getElementsByTagName('error')) > 0
|
||||
self.failed = len(dom.getElementsByTagName('failure')) > 0
|
||||
self.skipped = len(dom.getElementsByTagName('skipped')) > 0
|
||||
|
||||
|
||||
class TestSuite:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.test_cases = []
|
||||
self.failed_count = 0
|
||||
self.skipped_count = 0
|
||||
self.errored_count = 0
|
||||
self.total_time = 0.0
|
||||
|
||||
def __repr__(self):
|
||||
rc = f'{self.name} run_time: {self.total_time:.2f} tests: {len(self.test_cases)}'
|
||||
if self.skipped_count > 0:
|
||||
rc += f' skipped: {self.skipped_count}'
|
||||
return f'TestSuite({rc})'
|
||||
|
||||
def append(self, test_case):
|
||||
self.test_cases.append(test_case)
|
||||
self.total_time += test_case.time
|
||||
self.failed_count += 1 if test_case.failed else 0
|
||||
self.skipped_count += 1 if test_case.skipped else 0
|
||||
self.errored_count += 1 if test_case.errored else 0
|
||||
|
||||
def print_report(self, num_longest=3):
|
||||
sorted_tests = sorted(self.test_cases, key=lambda x: x.time)
|
||||
test_count = len(sorted_tests)
|
||||
print(f"class {self.name}:")
|
||||
print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
|
||||
print(f" run_time: {self.total_time:.2f} seconds")
|
||||
print(f" avg_time: {self.total_time/test_count:.2f} seconds")
|
||||
if test_count >= 2:
|
||||
print(f" median_time: {statistics.median(x.time for x in sorted_tests):.2f} seconds")
|
||||
sorted_tests = sorted_tests[-num_longest:]
|
||||
print(f" {len(sorted_tests)} longest tests:")
|
||||
for test in reversed(sorted_tests):
|
||||
print(f" {test.name} time: {test.time:.2f} seconds")
|
||||
print("")
|
||||
|
||||
|
||||
|
||||
def parse_report(path):
|
||||
dom = minidom.parse(path)
|
||||
for test_case in dom.getElementsByTagName('testcase'):
|
||||
yield TestCase(test_case)
|
||||
|
||||
def parse_reports(folder):
|
||||
reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
|
||||
tests_by_class = dict()
|
||||
for report in reports:
|
||||
for test_case in parse_report(report):
|
||||
class_name = test_case.class_name
|
||||
if class_name not in tests_by_class:
|
||||
tests_by_class[class_name] = TestSuite(class_name)
|
||||
tests_by_class[class_name].append(test_case)
|
||||
return tests_by_class
|
||||
|
||||
def build_info():
|
||||
return {
|
||||
"build_pr": os.environ.get("CIRCLE_PR_NUMBER"),
|
||||
"build_tag": os.environ.get("CIRCLE_TAG"),
|
||||
"build_sha1": os.environ.get("CIRCLE_SHA1"),
|
||||
"build_branch": os.environ.get("CIRCLE_BRANCH"),
|
||||
"build_job": os.environ.get("CIRCLE_JOB"),
|
||||
"build_workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID"),
|
||||
}
|
||||
|
||||
def build_message(test_case):
|
||||
return {
|
||||
"normal": {
|
||||
**build_info(),
|
||||
"test_suite_name": test_case.class_name,
|
||||
"test_case_name": test_case.name,
|
||||
},
|
||||
"int": {
|
||||
"time": int(time.time()),
|
||||
"test_total_count": 1,
|
||||
"test_total_time": int(test_case.time * 1000),
|
||||
"test_failed_count": 1 if test_case.failed > 0 else 0,
|
||||
"test_skipped_count": 1 if test_case.skipped > 0 else 0,
|
||||
"test_errored_count": 1 if test_case.errored > 0 else 0,
|
||||
},
|
||||
}
|
||||
|
||||
def send_report_to_scribe(reports):
|
||||
access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
|
||||
|
||||
if not access_token:
|
||||
print("No scribe access token provided, skip sending report!")
|
||||
return
|
||||
print("Scribe access token provided, sending report...")
|
||||
url = "https://graph.facebook.com/scribe_logs"
|
||||
r = requests.post(
|
||||
url,
|
||||
data={
|
||||
"access_token": access_token,
|
||||
"logs": json.dumps(
|
||||
[
|
||||
{
|
||||
"category": "perfpipe_pytorch_test_times",
|
||||
"message": json.dumps(build_message(test_case)),
|
||||
"line_escape": False,
|
||||
}
|
||||
for name in sorted(reports.keys())
|
||||
for test_case in reports[name].test_cases
|
||||
]
|
||||
),
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
def send_report_to_s3(reports, *, total_seconds):
|
||||
job = os.environ.get('CIRCLE_JOB')
|
||||
sha1 = os.environ.get('CIRCLE_SHA1')
|
||||
branch = os.environ.get('CIRCLE_BRANCH', '')
|
||||
if branch not in ['master', 'nightly'] and not branch.startswith("release/"):
|
||||
print("S3 upload only enabled on master, nightly and release branches.")
|
||||
print(f"skipping test report on branch: {branch}")
|
||||
return
|
||||
now = datetime.datetime.utcnow().isoformat()
|
||||
key = f'test_time/{sha1}/{job}/{now}Z.json.bz2' # Z meaning UTC
|
||||
s3 = boto3.resource('s3')
|
||||
try:
|
||||
s3.get_bucket_acl(Bucket='ossci-metrics')
|
||||
except Exception as e:
|
||||
print(f"AWS ACL failed: {e}")
|
||||
print("AWS credential found, uploading to S3...")
|
||||
|
||||
obj = s3.Object('ossci-metrics', key)
|
||||
print("")
|
||||
# use bz2 because the results are smaller than gzip, and the
|
||||
# compression time penalty we pay is only about half a second for
|
||||
# input files of a few megabytes in size like these JSON files, and
|
||||
# because for some reason zlib doesn't seem to play nice with the
|
||||
# gunzip command whereas Python's bz2 does work with bzip2
|
||||
obj.put(Body=bz2.compress(json.dumps({
|
||||
**build_info(),
|
||||
'total_seconds': total_seconds,
|
||||
'suites': {
|
||||
name: {
|
||||
'total_seconds': suite.total_time,
|
||||
'cases': [
|
||||
{
|
||||
'name': case.name,
|
||||
'seconds': case.time,
|
||||
'errored': case.errored,
|
||||
'failed': case.failed,
|
||||
'skipped': case.skipped,
|
||||
}
|
||||
for case in suite.test_cases
|
||||
],
|
||||
}
|
||||
for name, suite in reports.items()
|
||||
}
|
||||
}).encode()))
|
||||
|
||||
def positive_integer(value):
|
||||
parsed = int(value)
|
||||
if parsed < 1:
|
||||
raise argparse.ArgumentTypeError(f"{value} is not a natural number")
|
||||
return parsed
|
||||
|
||||
def positive_float(value):
|
||||
parsed = float(value)
|
||||
if parsed <= 0.0:
|
||||
raise argparse.ArgumentTypeError(f"{value} is not a positive rational number")
|
||||
return parsed
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
import sys
|
||||
parser = argparse.ArgumentParser(
|
||||
"Print statistics from test XML output.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--longest-of-class",
|
||||
type=positive_integer,
|
||||
default=3,
|
||||
metavar="N",
|
||||
help="how many longest tests to show for each class",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--class-print-threshold",
|
||||
type=positive_float,
|
||||
default=1.0,
|
||||
metavar="N",
|
||||
help="Minimal total time to warrant class report",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--longest-of-run",
|
||||
type=positive_integer,
|
||||
default=10,
|
||||
metavar="N",
|
||||
help="how many longest tests to show from the entire run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--upload-to-s3",
|
||||
action="store_true",
|
||||
help="upload test time to S3 bucket",
|
||||
)
|
||||
parser.add_argument(
|
||||
"folder",
|
||||
help="test report folder",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
reports = parse_reports(args.folder)
|
||||
if len(reports) == 0:
|
||||
print(f"No test reports found in {args.folder}")
|
||||
sys.exit(0)
|
||||
|
||||
send_report_to_scribe(reports)
|
||||
|
||||
longest_tests = []
|
||||
total_time = 0
|
||||
for name in sorted(reports.keys()):
|
||||
test_suite = reports[name]
|
||||
if test_suite.total_time >= args.class_print_threshold:
|
||||
test_suite.print_report(args.longest_of_class)
|
||||
total_time += test_suite.total_time
|
||||
longest_tests.extend(test_suite.test_cases)
|
||||
longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
|
||||
|
||||
if args.upload_to_s3:
|
||||
send_report_to_s3(reports, total_seconds=total_time)
|
||||
|
||||
print(f"Total runtime is {datetime.timedelta(seconds=int(total_time))}")
|
||||
print(f"{len(longest_tests)} longest tests of entire run:")
|
||||
for test_case in reversed(longest_tests):
|
||||
print(f" {test_case.class_name}.{test_case.name} time: {test_case.time:.2f} seconds")
|
||||
|
|
@ -8,7 +8,6 @@ from torch.testing._internal.common_utils import \
|
|||
from torch.testing._internal.common_device_type import \
|
||||
(instantiate_device_type_tests, onlyCUDA, onlyOnCPUAndCUDA, dtypes)
|
||||
from torch.testing._internal import mypy_wrapper
|
||||
from torch.testing._internal import print_test_stats
|
||||
|
||||
# For testing TestCase methods and torch.testing functions
|
||||
class TestTesting(TestCase):
|
||||
|
|
@ -546,456 +545,5 @@ class TestMypyWrapper(TestCase):
|
|||
))
|
||||
|
||||
|
||||
def fakehash(char):
|
||||
return char * 40
|
||||
|
||||
|
||||
def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
|
||||
return {
|
||||
'name': name,
|
||||
'seconds': seconds,
|
||||
'errored': errored,
|
||||
'failed': failed,
|
||||
'skipped': skipped,
|
||||
}
|
||||
|
||||
|
||||
def makereport(tests):
|
||||
suites = {
|
||||
suite_name: {
|
||||
'total_seconds': sum(case['seconds'] for case in cases),
|
||||
'cases': cases,
|
||||
}
|
||||
for suite_name, cases in tests.items()
|
||||
}
|
||||
return {
|
||||
'total_seconds': sum(s['total_seconds'] for s in suites.values()),
|
||||
'suites': suites,
|
||||
}
|
||||
|
||||
|
||||
class TestPrintTestStats(TestCase):
|
||||
maxDiff = None
|
||||
|
||||
def test_analysis(self):
|
||||
head_report = makereport({
|
||||
# input ordering of the suites is ignored
|
||||
'Grault': [
|
||||
# not printed: status same and time similar
|
||||
makecase('test_grault0', 4.78, failed=True),
|
||||
# status same, but time increased a lot
|
||||
makecase('test_grault2', 1.473, errored=True),
|
||||
],
|
||||
# individual tests times changed, not overall suite
|
||||
'Qux': [
|
||||
# input ordering of the test cases is ignored
|
||||
makecase('test_qux1', 0.001, skipped=True),
|
||||
makecase('test_qux6', 0.002, skipped=True),
|
||||
# time in bounds, but status changed
|
||||
makecase('test_qux4', 7.158, failed=True),
|
||||
# not printed because it's the same as before
|
||||
makecase('test_qux7', 0.003, skipped=True),
|
||||
makecase('test_qux5', 11.968),
|
||||
makecase('test_qux3', 23.496),
|
||||
],
|
||||
# new test suite
|
||||
'Bar': [
|
||||
makecase('test_bar2', 3.742, failed=True),
|
||||
makecase('test_bar1', 50.447),
|
||||
],
|
||||
# overall suite time changed but no individual tests
|
||||
'Norf': [
|
||||
makecase('test_norf1', 3),
|
||||
makecase('test_norf2', 3),
|
||||
makecase('test_norf3', 3),
|
||||
makecase('test_norf4', 3),
|
||||
],
|
||||
# suite doesn't show up if it doesn't change enough
|
||||
'Foo': [
|
||||
makecase('test_foo1', 42),
|
||||
makecase('test_foo2', 56),
|
||||
],
|
||||
})
|
||||
|
||||
base_reports = {
|
||||
# bbbb has no reports, so base is cccc instead
|
||||
fakehash('b'): [],
|
||||
fakehash('c'): [
|
||||
makereport({
|
||||
'Baz': [
|
||||
makecase('test_baz2', 13.605),
|
||||
# no recent suites have & skip this test
|
||||
makecase('test_baz1', 0.004, skipped=True),
|
||||
],
|
||||
'Foo': [
|
||||
makecase('test_foo1', 43),
|
||||
# test added since dddd
|
||||
makecase('test_foo2', 57),
|
||||
],
|
||||
'Grault': [
|
||||
makecase('test_grault0', 4.88, failed=True),
|
||||
makecase('test_grault1', 11.967, failed=True),
|
||||
makecase('test_grault2', 0.395, errored=True),
|
||||
makecase('test_grault3', 30.460),
|
||||
],
|
||||
'Norf': [
|
||||
makecase('test_norf1', 2),
|
||||
makecase('test_norf2', 2),
|
||||
makecase('test_norf3', 2),
|
||||
makecase('test_norf4', 2),
|
||||
],
|
||||
'Qux': [
|
||||
makecase('test_qux3', 4.978, errored=True),
|
||||
makecase('test_qux7', 0.002, skipped=True),
|
||||
makecase('test_qux2', 5.618),
|
||||
makecase('test_qux4', 7.766, errored=True),
|
||||
makecase('test_qux6', 23.589, failed=True),
|
||||
],
|
||||
}),
|
||||
],
|
||||
fakehash('d'): [
|
||||
makereport({
|
||||
'Foo': [
|
||||
makecase('test_foo1', 40),
|
||||
# removed in cccc
|
||||
makecase('test_foo3', 17),
|
||||
],
|
||||
'Baz': [
|
||||
# not skipped, so not included in stdev
|
||||
makecase('test_baz1', 3.14),
|
||||
],
|
||||
'Qux': [
|
||||
makecase('test_qux7', 0.004, skipped=True),
|
||||
makecase('test_qux2', 6.02),
|
||||
makecase('test_qux4', 20.932),
|
||||
],
|
||||
'Norf': [
|
||||
makecase('test_norf1', 3),
|
||||
makecase('test_norf2', 3),
|
||||
makecase('test_norf3', 3),
|
||||
makecase('test_norf4', 3),
|
||||
],
|
||||
'Grault': [
|
||||
makecase('test_grault0', 5, failed=True),
|
||||
makecase('test_grault1', 14.325, failed=True),
|
||||
makecase('test_grault2', 0.31, errored=True),
|
||||
],
|
||||
}),
|
||||
],
|
||||
fakehash('e'): [],
|
||||
fakehash('f'): [
|
||||
makereport({
|
||||
'Foo': [
|
||||
makecase('test_foo3', 24),
|
||||
makecase('test_foo1', 43),
|
||||
],
|
||||
'Baz': [
|
||||
makecase('test_baz2', 16.857),
|
||||
],
|
||||
'Qux': [
|
||||
makecase('test_qux2', 6.422),
|
||||
makecase('test_qux4', 6.382, errored=True),
|
||||
],
|
||||
'Norf': [
|
||||
makecase('test_norf1', 0.9),
|
||||
makecase('test_norf3', 0.9),
|
||||
makecase('test_norf2', 0.9),
|
||||
makecase('test_norf4', 0.9),
|
||||
],
|
||||
'Grault': [
|
||||
makecase('test_grault0', 4.7, failed=True),
|
||||
makecase('test_grault1', 13.146, failed=True),
|
||||
makecase('test_grault2', 0.48, errored=True),
|
||||
],
|
||||
}),
|
||||
],
|
||||
}
|
||||
|
||||
simpler_head = print_test_stats.simplify(head_report)
|
||||
simpler_base = {}
|
||||
for commit, reports in base_reports.items():
|
||||
simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
|
||||
analysis = print_test_stats.analyze(
|
||||
head_report=simpler_head,
|
||||
base_reports=simpler_base,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
'''\
|
||||
|
||||
- class Baz:
|
||||
- # was 15.23s ± 2.30s
|
||||
-
|
||||
- def test_baz1: ...
|
||||
- # was 0.004s (skipped)
|
||||
-
|
||||
- def test_baz2: ...
|
||||
- # was 15.231s ± 2.300s
|
||||
|
||||
|
||||
class Grault:
|
||||
# was 48.86s ± 1.19s
|
||||
# now 6.25s
|
||||
|
||||
- def test_grault1: ...
|
||||
- # was 13.146s ± 1.179s (failed)
|
||||
|
||||
- def test_grault3: ...
|
||||
- # was 30.460s
|
||||
|
||||
|
||||
class Qux:
|
||||
# was 41.66s ± 1.06s
|
||||
# now 42.63s
|
||||
|
||||
- def test_qux2: ...
|
||||
- # was 6.020s ± 0.402s
|
||||
|
||||
! def test_qux3: ...
|
||||
! # was 4.978s (errored)
|
||||
! # now 23.496s
|
||||
|
||||
! def test_qux4: ...
|
||||
! # was 7.074s ± 0.979s (errored)
|
||||
! # now 7.158s (failed)
|
||||
|
||||
! def test_qux6: ...
|
||||
! # was 23.589s (failed)
|
||||
! # now 0.002s (skipped)
|
||||
|
||||
+ def test_qux1: ...
|
||||
+ # now 0.001s (skipped)
|
||||
|
||||
+ def test_qux5: ...
|
||||
+ # now 11.968s
|
||||
|
||||
|
||||
+ class Bar:
|
||||
+ # now 54.19s
|
||||
+
|
||||
+ def test_bar1: ...
|
||||
+ # now 50.447s
|
||||
+
|
||||
+ def test_bar2: ...
|
||||
+ # now 3.742s (failed)
|
||||
|
||||
''',
|
||||
print_test_stats.anomalies(analysis),
|
||||
)
|
||||
|
||||
def test_graph(self):
|
||||
# HEAD is on master
|
||||
self.assertEqual(
|
||||
'''\
|
||||
Commit graph (base is most recent master ancestor with at least one S3 report):
|
||||
|
||||
: (master)
|
||||
|
|
||||
* aaaaaaaaaa (HEAD) total time 502.99s
|
||||
* bbbbbbbbbb (base) 1 report, total time 47.84s
|
||||
* cccccccccc 1 report, total time 332.50s
|
||||
* dddddddddd 0 reports
|
||||
|
|
||||
:
|
||||
''',
|
||||
print_test_stats.graph(
|
||||
head_sha=fakehash('a'),
|
||||
head_seconds=502.99,
|
||||
base_seconds={
|
||||
fakehash('b'): [47.84],
|
||||
fakehash('c'): [332.50],
|
||||
fakehash('d'): [],
|
||||
},
|
||||
on_master=True,
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
'''\
|
||||
Commit graph (base is most recent master ancestor with at least one S3 report):
|
||||
|
||||
: (master)
|
||||
|
|
||||
| * aaaaaaaaaa (HEAD) total time 9988.77s
|
||||
|/
|
||||
* bbbbbbbbbb (base) 121 reports, total time 7654.32s ± 55.55s
|
||||
* cccccccccc 20 reports, total time 5555.55s ± 253.19s
|
||||
* dddddddddd 1 report, total time 1234.56s
|
||||
|
|
||||
:
|
||||
''',
|
||||
print_test_stats.graph(
|
||||
head_sha=fakehash('a'),
|
||||
head_seconds=9988.77,
|
||||
base_seconds={
|
||||
fakehash('b'): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
|
||||
fakehash('c'): [5308.77] * 10 + [5802.33] * 10,
|
||||
fakehash('d'): [1234.56],
|
||||
},
|
||||
on_master=False,
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
'''\
|
||||
Commit graph (base is most recent master ancestor with at least one S3 report):
|
||||
|
||||
: (master)
|
||||
|
|
||||
| * aaaaaaaaaa (HEAD) total time 25.52s
|
||||
| |
|
||||
| : (5 commits)
|
||||
|/
|
||||
* bbbbbbbbbb 0 reports
|
||||
* cccccccccc 0 reports
|
||||
* dddddddddd (base) 15 reports, total time 58.92s ± 25.82s
|
||||
|
|
||||
:
|
||||
''',
|
||||
print_test_stats.graph(
|
||||
head_sha=fakehash('a'),
|
||||
head_seconds=25.52,
|
||||
base_seconds={
|
||||
fakehash('b'): [],
|
||||
fakehash('c'): [],
|
||||
fakehash('d'): [52.25] * 14 + [152.26],
|
||||
},
|
||||
on_master=False,
|
||||
ancestry_path=5,
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
'''\
|
||||
Commit graph (base is most recent master ancestor with at least one S3 report):
|
||||
|
||||
: (master)
|
||||
|
|
||||
| * aaaaaaaaaa (HEAD) total time 0.08s
|
||||
|/|
|
||||
| : (1 commit)
|
||||
|
|
||||
* bbbbbbbbbb 0 reports
|
||||
* cccccccccc (base) 1 report, total time 0.09s
|
||||
* dddddddddd 3 reports, total time 0.10s ± 0.05s
|
||||
|
|
||||
:
|
||||
''',
|
||||
print_test_stats.graph(
|
||||
head_sha=fakehash('a'),
|
||||
head_seconds=0.08,
|
||||
base_seconds={
|
||||
fakehash('b'): [],
|
||||
fakehash('c'): [0.09],
|
||||
fakehash('d'): [0.05, 0.10, 0.15],
|
||||
},
|
||||
on_master=False,
|
||||
other_ancestors=1,
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
'''\
|
||||
Commit graph (base is most recent master ancestor with at least one S3 report):
|
||||
|
||||
: (master)
|
||||
|
|
||||
| * aaaaaaaaaa (HEAD) total time 5.98s
|
||||
| |
|
||||
| : (1 commit)
|
||||
|/|
|
||||
| : (7 commits)
|
||||
|
|
||||
* bbbbbbbbbb (base) 2 reports, total time 6.02s ± 1.71s
|
||||
* cccccccccc 0 reports
|
||||
* dddddddddd 10 reports, total time 5.84s ± 0.92s
|
||||
|
|
||||
:
|
||||
''',
|
||||
print_test_stats.graph(
|
||||
head_sha=fakehash('a'),
|
||||
head_seconds=5.98,
|
||||
base_seconds={
|
||||
fakehash('b'): [4.81, 7.23],
|
||||
fakehash('c'): [],
|
||||
fakehash('d'): [4.97] * 5 + [6.71] * 5,
|
||||
},
|
||||
on_master=False,
|
||||
ancestry_path=1,
|
||||
other_ancestors=7,
|
||||
)
|
||||
)
|
||||
|
||||
def test_regression_info(self):
|
||||
self.assertEqual(
|
||||
'''\
|
||||
----- Historic stats comparison result ------
|
||||
|
||||
job: foo_job
|
||||
commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
|
||||
|
||||
class Foo:
|
||||
# was 42.50s ± 2.12s
|
||||
# now 3.02s
|
||||
|
||||
- def test_bar: ...
|
||||
- # was 1.000s
|
||||
|
||||
! def test_foo: ...
|
||||
! # was 41.500s ± 2.121s
|
||||
! # now 0.020s (skipped)
|
||||
|
||||
+ def test_baz: ...
|
||||
+ # now 3.000s
|
||||
|
||||
|
||||
Commit graph (base is most recent master ancestor with at least one S3 report):
|
||||
|
||||
: (master)
|
||||
|
|
||||
| * aaaaaaaaaa (HEAD) total time 3.02s
|
||||
|/
|
||||
* bbbbbbbbbb (base) 1 report, total time 41.00s
|
||||
* cccccccccc 1 report, total time 43.00s
|
||||
|
|
||||
:
|
||||
|
||||
Removed (across 1 suite) 1 test, totaling - 1.00s
|
||||
Modified (across 1 suite) 1 test, totaling - 41.48s ± 2.12s
|
||||
Added (across 1 suite) 1 test, totaling + 3.00s
|
||||
''',
|
||||
print_test_stats.regression_info(
|
||||
head_sha=fakehash('a'),
|
||||
head_report=makereport({
|
||||
'Foo': [
|
||||
makecase('test_foo', 0.02, skipped=True),
|
||||
makecase('test_baz', 3),
|
||||
]}),
|
||||
base_reports={
|
||||
fakehash('b'): [
|
||||
makereport({
|
||||
'Foo': [
|
||||
makecase('test_foo', 40),
|
||||
makecase('test_bar', 1),
|
||||
],
|
||||
}),
|
||||
],
|
||||
fakehash('c'): [
|
||||
makereport({
|
||||
'Foo': [
|
||||
makecase('test_foo', 43),
|
||||
],
|
||||
}),
|
||||
],
|
||||
},
|
||||
job_name='foo_job',
|
||||
on_master=False,
|
||||
ancestry_path=0,
|
||||
other_ancestors=0,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from .utils import (
|
|||
|
||||
BLOCKED_PYTHON_TESTS = {
|
||||
"run_test.py",
|
||||
"print_test_stats.py",
|
||||
"test_dataloader.py",
|
||||
"test_multiprocessing.py",
|
||||
"test_multiprocessing_spawn.py",
|
||||
|
|
|
|||
|
|
@ -1,864 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import bz2
|
||||
import datetime
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import statistics
|
||||
import subprocess
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from typing import DefaultDict, Dict, Iterable, List, Optional, Tuple
|
||||
from xml.dom import minidom
|
||||
|
||||
import requests
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
try:
|
||||
import boto3
|
||||
HAVE_BOTO3 = True
|
||||
except ImportError:
|
||||
HAVE_BOTO3 = False
|
||||
|
||||
Commit = str # 40-digit SHA-1 hex string
|
||||
Status = Optional[str] # errored, failed, skipped, or None
|
||||
|
||||
# represent suite as dict because indexing is useful
|
||||
SimplerCase = Tuple[float, Status]
|
||||
SimplerSuite = Dict[str, SimplerCase]
|
||||
SimplerReport = Dict[str, SimplerSuite]
|
||||
|
||||
|
||||
class Case(TypedDict):
|
||||
name: str
|
||||
seconds: float
|
||||
errored: bool
|
||||
failed: bool
|
||||
skipped: bool
|
||||
|
||||
|
||||
class Suite(TypedDict):
|
||||
total_seconds: float
|
||||
cases: List[Case]
|
||||
|
||||
|
||||
class ReportMeta(TypedDict, total=False):
|
||||
build_pr: str
|
||||
build_tag: str
|
||||
build_sha1: Commit
|
||||
build_branch: str
|
||||
build_job: str
|
||||
build_workflow_id: str
|
||||
|
||||
|
||||
class Report(ReportMeta):
|
||||
total_seconds: float
|
||||
suites: Dict[str, Suite]
|
||||
|
||||
|
||||
class Stat(TypedDict):
|
||||
center: float
|
||||
spread: Optional[float]
|
||||
|
||||
|
||||
class CaseDiff(TypedDict):
|
||||
margin: str
|
||||
name: str
|
||||
was: Optional[Tuple[Stat, Status]]
|
||||
now: Optional[SimplerCase]
|
||||
|
||||
|
||||
class SuiteDiff(TypedDict):
|
||||
margin: str
|
||||
name: str
|
||||
was: Optional[Stat]
|
||||
now: Optional[float]
|
||||
cases: List[CaseDiff]
|
||||
|
||||
|
||||
def case_status(case: Case) -> Status:
|
||||
for k in {'errored', 'failed', 'skipped'}:
|
||||
if case[k]: # type: ignore
|
||||
return k
|
||||
return None
|
||||
|
||||
|
||||
def simplify(report: Report) -> SimplerReport:
|
||||
return {
|
||||
suite_name: {
|
||||
case['name']: (case['seconds'], case_status(case))
|
||||
for case in suite['cases']
|
||||
}
|
||||
for suite_name, suite in report['suites'].items()
|
||||
}
|
||||
|
||||
|
||||
def plural(n: int) -> str:
|
||||
return '' if n == 1 else 's'
|
||||
|
||||
|
||||
def display_stat(
|
||||
x: Stat,
|
||||
format: Tuple[Tuple[int, int], Tuple[int, int]],
|
||||
) -> str:
|
||||
spread_len = format[1][0] + 1 + format[1][1]
|
||||
spread = x['spread']
|
||||
if spread is not None:
|
||||
spread_str = f' ± {spread:{spread_len}.{format[1][1]}f}s'
|
||||
else:
|
||||
spread_str = ' ' * (3 + spread_len + 1)
|
||||
mean_len = format[0][0] + 1 + format[0][1]
|
||||
return f'{x["center"]:{mean_len}.{format[0][1]}f}s{spread_str}'
|
||||
|
||||
|
||||
def list_stat(l: List[float]) -> Stat:
|
||||
return {
|
||||
'center': statistics.mean(l),
|
||||
'spread': statistics.stdev(l) if len(l) > 1 else None
|
||||
}
|
||||
|
||||
|
||||
def zero_stat() -> Stat:
|
||||
return {'center': 0, 'spread': None}
|
||||
|
||||
|
||||
def recenter(was: Stat, now: float) -> Stat:
|
||||
return {'center': now - was['center'], 'spread': was['spread']}
|
||||
|
||||
|
||||
def sum_normals(stats: Iterable[Stat]) -> Stat:
|
||||
"""
|
||||
Returns a stat corresponding to the sum of the given stats.
|
||||
|
||||
Assumes that the center and spread for each of the given stats are
|
||||
mean and stdev, respectively.
|
||||
"""
|
||||
l = list(stats)
|
||||
spread: Optional[float]
|
||||
if any(stat['spread'] is not None for stat in l):
|
||||
spread = math.sqrt(sum((stat['spread'] or 0)**2 for stat in l))
|
||||
else:
|
||||
spread = None
|
||||
return {
|
||||
'center': sum(stat['center'] for stat in l),
|
||||
'spread': spread,
|
||||
}
|
||||
|
||||
|
||||
def format_seconds(seconds: List[float]) -> str:
|
||||
if len(seconds) > 0:
|
||||
x = list_stat(seconds)
|
||||
return f'total time {display_stat(x, ((5, 2), (4, 2)))}'.strip()
|
||||
return ''
|
||||
|
||||
|
||||
def show_ancestors(num_commits: int) -> str:
|
||||
return f' | : ({num_commits} commit{plural(num_commits)})'
|
||||
|
||||
|
||||
def unlines(lines: List[str]) -> str:
|
||||
return ''.join(f'{line}\n' for line in lines)
|
||||
|
||||
|
||||
def matching_test_times(
|
||||
base_reports: Dict[Commit, List[SimplerReport]],
|
||||
suite_name: str,
|
||||
case_name: str,
|
||||
status: Status,
|
||||
) -> List[float]:
|
||||
times: List[float] = []
|
||||
for reports in base_reports.values():
|
||||
for report in reports:
|
||||
suite = report.get(suite_name)
|
||||
if suite:
|
||||
case = suite.get(case_name)
|
||||
if case:
|
||||
t, s = case
|
||||
if s == status:
|
||||
times.append(t)
|
||||
return times
|
||||
|
||||
|
||||
def analyze(
|
||||
*,
|
||||
head_report: SimplerReport,
|
||||
base_reports: Dict[Commit, List[SimplerReport]],
|
||||
) -> List[SuiteDiff]:
|
||||
# most recent master ancestor with at least one S3 report
|
||||
base_sha = next(sha for sha, reports in base_reports.items() if reports)
|
||||
|
||||
# find all relevant suites (those in either base or head or both)
|
||||
all_reports = [head_report] + base_reports[base_sha]
|
||||
all_suites = {k for r in all_reports for k in r.keys()}
|
||||
|
||||
removed_suites: List[SuiteDiff] = []
|
||||
modified_suites: List[SuiteDiff] = []
|
||||
added_suites: List[SuiteDiff] = []
|
||||
|
||||
for suite_name in sorted(all_suites):
|
||||
case_diffs: List[CaseDiff] = []
|
||||
head_suite = head_report.get(suite_name)
|
||||
base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[
|
||||
{(n, s) for n, (_, s) in report.get(suite_name, {}).items()}
|
||||
for report in base_reports[base_sha]
|
||||
] or [set()])))
|
||||
case_stats: Dict[str, Stat] = {}
|
||||
if head_suite:
|
||||
now = sum(case[0] for case in head_suite.values())
|
||||
if any(suite_name in report for report in base_reports[base_sha]):
|
||||
removed_cases: List[CaseDiff] = []
|
||||
for case_name, case_status in base_cases.items():
|
||||
case_stats[case_name] = list_stat(matching_test_times(
|
||||
base_reports,
|
||||
suite_name,
|
||||
case_name,
|
||||
case_status,
|
||||
))
|
||||
if case_name not in head_suite:
|
||||
removed_cases.append({
|
||||
'margin': '-',
|
||||
'name': case_name,
|
||||
'was': (case_stats[case_name], case_status),
|
||||
'now': None,
|
||||
})
|
||||
modified_cases: List[CaseDiff] = []
|
||||
added_cases: List[CaseDiff] = []
|
||||
for head_case_name in sorted(head_suite):
|
||||
head_case = head_suite[head_case_name]
|
||||
if head_case_name in base_cases:
|
||||
stat = case_stats[head_case_name]
|
||||
base_status = base_cases[head_case_name]
|
||||
if head_case[1] != base_status:
|
||||
modified_cases.append({
|
||||
'margin': '!',
|
||||
'name': head_case_name,
|
||||
'was': (stat, base_status),
|
||||
'now': head_case,
|
||||
})
|
||||
else:
|
||||
added_cases.append({
|
||||
'margin': '+',
|
||||
'name': head_case_name,
|
||||
'was': None,
|
||||
'now': head_case,
|
||||
})
|
||||
# there might be a bug calculating this stdev, not sure
|
||||
was = sum_normals(case_stats.values())
|
||||
case_diffs = removed_cases + modified_cases + added_cases
|
||||
if case_diffs:
|
||||
modified_suites.append({
|
||||
'margin': ' ',
|
||||
'name': suite_name,
|
||||
'was': was,
|
||||
'now': now,
|
||||
'cases': case_diffs,
|
||||
})
|
||||
else:
|
||||
for head_case_name in sorted(head_suite):
|
||||
head_case = head_suite[head_case_name]
|
||||
case_diffs.append({
|
||||
'margin': ' ',
|
||||
'name': head_case_name,
|
||||
'was': None,
|
||||
'now': head_case,
|
||||
})
|
||||
added_suites.append({
|
||||
'margin': '+',
|
||||
'name': suite_name,
|
||||
'was': None,
|
||||
'now': now,
|
||||
'cases': case_diffs,
|
||||
})
|
||||
else:
|
||||
for case_name, case_status in base_cases.items():
|
||||
case_stats[case_name] = list_stat(matching_test_times(
|
||||
base_reports,
|
||||
suite_name,
|
||||
case_name,
|
||||
case_status,
|
||||
))
|
||||
case_diffs.append({
|
||||
'margin': ' ',
|
||||
'name': case_name,
|
||||
'was': (case_stats[case_name], case_status),
|
||||
'now': None,
|
||||
})
|
||||
removed_suites.append({
|
||||
'margin': '-',
|
||||
'name': suite_name,
|
||||
# there might be a bug calculating this stdev, not sure
|
||||
'was': sum_normals(case_stats.values()),
|
||||
'now': None,
|
||||
'cases': case_diffs,
|
||||
})
|
||||
|
||||
return removed_suites + modified_suites + added_suites
|
||||
|
||||
|
||||
def case_diff_lines(diff: CaseDiff) -> List[str]:
|
||||
lines = [f'def {diff["name"]}: ...']
|
||||
|
||||
case_fmt = ((3, 3), (2, 3))
|
||||
|
||||
was = diff['was']
|
||||
if was:
|
||||
was_line = f' # was {display_stat(was[0], case_fmt)}'
|
||||
was_status = was[1]
|
||||
if was_status:
|
||||
was_line += f' ({was_status})'
|
||||
lines.append(was_line)
|
||||
|
||||
now = diff['now']
|
||||
if now:
|
||||
now_stat: Stat = {'center': now[0], 'spread': None}
|
||||
now_line = f' # now {display_stat(now_stat, case_fmt)}'
|
||||
now_status = now[1]
|
||||
if now_status:
|
||||
now_line += f' ({now_status})'
|
||||
lines.append(now_line)
|
||||
|
||||
return [''] + [f'{diff["margin"]} {l}' for l in lines]
|
||||
|
||||
|
||||
def display_suite_diff(diff: SuiteDiff) -> str:
|
||||
lines = [f'class {diff["name"]}:']
|
||||
|
||||
suite_fmt = ((4, 2), (3, 2))
|
||||
|
||||
was = diff['was']
|
||||
if was:
|
||||
lines.append(f' # was {display_stat(was, suite_fmt)}')
|
||||
|
||||
now = diff['now']
|
||||
if now is not None:
|
||||
now_stat: Stat = {'center': now, 'spread': None}
|
||||
lines.append(f' # now {display_stat(now_stat, suite_fmt)}')
|
||||
|
||||
for case_diff in diff['cases']:
|
||||
lines.extend([f' {l}' for l in case_diff_lines(case_diff)])
|
||||
|
||||
return unlines([''] + [f'{diff["margin"]} {l}'.rstrip() for l in lines] + [''])
|
||||
|
||||
|
||||
def anomalies(diffs: List[SuiteDiff]) -> str:
|
||||
return ''.join(map(display_suite_diff, diffs))
|
||||
|
||||
|
||||
def graph(
|
||||
*,
|
||||
head_sha: Commit,
|
||||
head_seconds: float,
|
||||
base_seconds: Dict[Commit, List[float]],
|
||||
on_master: bool,
|
||||
ancestry_path: int = 0,
|
||||
other_ancestors: int = 0,
|
||||
) -> str:
|
||||
lines = [
|
||||
'Commit graph (base is most recent master ancestor with at least one S3 report):',
|
||||
'',
|
||||
' : (master)',
|
||||
' |',
|
||||
]
|
||||
|
||||
head_time_str = f' {format_seconds([head_seconds])}'
|
||||
if on_master:
|
||||
lines.append(f' * {head_sha[:10]} (HEAD) {head_time_str}')
|
||||
else:
|
||||
lines.append(f' | * {head_sha[:10]} (HEAD) {head_time_str}')
|
||||
|
||||
if ancestry_path > 0:
|
||||
lines += [
|
||||
' | |',
|
||||
show_ancestors(ancestry_path),
|
||||
]
|
||||
|
||||
if other_ancestors > 0:
|
||||
lines += [
|
||||
' |/|',
|
||||
show_ancestors(other_ancestors),
|
||||
' |',
|
||||
]
|
||||
else:
|
||||
lines.append(' |/')
|
||||
|
||||
is_first = True
|
||||
for sha, seconds in base_seconds.items():
|
||||
num_runs = len(seconds)
|
||||
prefix = str(num_runs).rjust(3)
|
||||
base = '(base)' if is_first and num_runs > 0 else ' '
|
||||
if num_runs > 0:
|
||||
is_first = False
|
||||
t = format_seconds(seconds)
|
||||
p = plural(num_runs)
|
||||
if t:
|
||||
p = f'{p}, '.ljust(3)
|
||||
lines.append(f' * {sha[:10]} {base} {prefix} report{p}{t}')
|
||||
|
||||
lines.extend([' |', ' :'])
|
||||
|
||||
return unlines(lines)
|
||||
|
||||
|
||||
def case_delta(case: CaseDiff) -> Stat:
|
||||
was = case['was']
|
||||
now = case['now']
|
||||
return recenter(
|
||||
was[0] if was else zero_stat(),
|
||||
now[0] if now else 0,
|
||||
)
|
||||
|
||||
|
||||
def display_final_stat(stat: Stat) -> str:
|
||||
center = stat['center']
|
||||
spread = stat['spread']
|
||||
displayed = display_stat(
|
||||
{'center': abs(center), 'spread': spread},
|
||||
((4, 2), (3, 2)),
|
||||
)
|
||||
if center < 0:
|
||||
sign = '-'
|
||||
elif center > 0:
|
||||
sign = '+'
|
||||
else:
|
||||
sign = ' '
|
||||
return f'{sign}{displayed}'.rstrip()
|
||||
|
||||
|
||||
def summary_line(message: str, d: DefaultDict[str, List[CaseDiff]]) -> str:
|
||||
all_cases = [c for cs in d.values() for c in cs]
|
||||
tests = len(all_cases)
|
||||
suites = len(d)
|
||||
sp = f'{plural(suites)})'.ljust(2)
|
||||
tp = f'{plural(tests)},'.ljust(2)
|
||||
# there might be a bug calculating this stdev, not sure
|
||||
stat = sum_normals(case_delta(c) for c in all_cases)
|
||||
return ''.join([
|
||||
f'{message} (across {suites:>4} suite{sp}',
|
||||
f'{tests:>6} test{tp}',
|
||||
f' totaling {display_final_stat(stat)}',
|
||||
])
|
||||
|
||||
|
||||
def summary(analysis: List[SuiteDiff]) -> str:
|
||||
removed_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
|
||||
modified_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
|
||||
added_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
|
||||
|
||||
for diff in analysis:
|
||||
# the use of 'margin' here is not the most elegant
|
||||
name = diff['name']
|
||||
margin = diff['margin']
|
||||
cases = diff['cases']
|
||||
if margin == '-':
|
||||
removed_tests[name] += cases
|
||||
elif margin == '+':
|
||||
added_tests[name] += cases
|
||||
else:
|
||||
removed = list(filter(lambda c: c['margin'] == '-', cases))
|
||||
added = list(filter(lambda c: c['margin'] == '+', cases))
|
||||
modified = list(filter(lambda c: c['margin'] == '!', cases))
|
||||
if removed:
|
||||
removed_tests[name] += removed
|
||||
if added:
|
||||
added_tests[name] += added
|
||||
if modified:
|
||||
modified_tests[name] += modified
|
||||
|
||||
return unlines([
|
||||
summary_line('Removed ', removed_tests),
|
||||
summary_line('Modified', modified_tests),
|
||||
summary_line('Added ', added_tests),
|
||||
])
|
||||
|
||||
|
||||
def regression_info(
|
||||
*,
|
||||
head_sha: Commit,
|
||||
head_report: Report,
|
||||
base_reports: Dict[Commit, List[Report]],
|
||||
job_name: str,
|
||||
on_master: bool,
|
||||
ancestry_path: int,
|
||||
other_ancestors: int,
|
||||
) -> str:
|
||||
"""
|
||||
Return a human-readable report describing any test time regressions.
|
||||
|
||||
The head_sha and head_report args give info about the current commit
|
||||
and its test times. Since Python dicts maintain insertion order
|
||||
(guaranteed as part of the language spec since 3.7), the
|
||||
base_reports argument must list the head's several most recent
|
||||
master commits, from newest to oldest (so the merge-base is
|
||||
list(base_reports)[0]).
|
||||
"""
|
||||
simpler_head = simplify(head_report)
|
||||
simpler_base: Dict[Commit, List[SimplerReport]] = {}
|
||||
for commit, reports in base_reports.items():
|
||||
simpler_base[commit] = [simplify(r) for r in reports]
|
||||
analysis = analyze(
|
||||
head_report=simpler_head,
|
||||
base_reports=simpler_base,
|
||||
)
|
||||
|
||||
return '\n'.join([
|
||||
unlines([
|
||||
'----- Historic stats comparison result ------',
|
||||
'',
|
||||
f' job: {job_name}',
|
||||
f' commit: {head_sha}',
|
||||
]),
|
||||
anomalies(analysis),
|
||||
graph(
|
||||
head_sha=head_sha,
|
||||
head_seconds=head_report['total_seconds'],
|
||||
base_seconds={
|
||||
c: [r['total_seconds'] for r in rs]
|
||||
for c, rs in base_reports.items()
|
||||
},
|
||||
on_master=on_master,
|
||||
ancestry_path=ancestry_path,
|
||||
other_ancestors=other_ancestors,
|
||||
),
|
||||
summary(analysis),
|
||||
])
|
||||
|
||||
|
||||
class TestCase:
|
||||
def __init__(self, dom) -> None:
|
||||
self.class_name = str(dom.attributes['classname'].value)
|
||||
self.name = str(dom.attributes['name'].value)
|
||||
self.time = float(dom.attributes['time'].value)
|
||||
self.errored = len(dom.getElementsByTagName('error')) > 0
|
||||
self.failed = len(dom.getElementsByTagName('failure')) > 0
|
||||
self.skipped = len(dom.getElementsByTagName('skipped')) > 0
|
||||
|
||||
|
||||
class TestSuite:
|
||||
def __init__(self, name) -> None:
|
||||
self.name = name
|
||||
self.test_cases: List[TestCase] = []
|
||||
self.failed_count = 0
|
||||
self.skipped_count = 0
|
||||
self.errored_count = 0
|
||||
self.total_time = 0.0
|
||||
|
||||
def __repr__(self):
|
||||
rc = f'{self.name} run_time: {self.total_time:.2f} tests: {len(self.test_cases)}'
|
||||
if self.skipped_count > 0:
|
||||
rc += f' skipped: {self.skipped_count}'
|
||||
return f'TestSuite({rc})'
|
||||
|
||||
def append(self, test_case):
|
||||
self.test_cases.append(test_case)
|
||||
self.total_time += test_case.time
|
||||
self.failed_count += 1 if test_case.failed else 0
|
||||
self.skipped_count += 1 if test_case.skipped else 0
|
||||
self.errored_count += 1 if test_case.errored else 0
|
||||
|
||||
def print_report(self, num_longest=3):
|
||||
sorted_tests = sorted(self.test_cases, key=lambda x: x.time)
|
||||
test_count = len(sorted_tests)
|
||||
print(f"class {self.name}:")
|
||||
print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
|
||||
print(f" run_time: {self.total_time:.2f} seconds")
|
||||
print(f" avg_time: {self.total_time/test_count:.2f} seconds")
|
||||
if test_count >= 2:
|
||||
print(f" median_time: {statistics.median(x.time for x in sorted_tests):.2f} seconds")
|
||||
sorted_tests = sorted_tests[-num_longest:]
|
||||
print(f" {len(sorted_tests)} longest tests:")
|
||||
for test in reversed(sorted_tests):
|
||||
print(f" {test.name} time: {test.time:.2f} seconds")
|
||||
print("")
|
||||
|
||||
|
||||
def parse_report(path):
|
||||
dom = minidom.parse(path)
|
||||
for test_case in dom.getElementsByTagName('testcase'):
|
||||
yield TestCase(test_case)
|
||||
|
||||
|
||||
def parse_reports(folder):
|
||||
reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
|
||||
tests_by_class = dict()
|
||||
for report in reports:
|
||||
for test_case in parse_report(report):
|
||||
class_name = test_case.class_name
|
||||
if class_name not in tests_by_class:
|
||||
tests_by_class[class_name] = TestSuite(class_name)
|
||||
tests_by_class[class_name].append(test_case)
|
||||
return tests_by_class
|
||||
|
||||
|
||||
def build_info():
|
||||
return {
|
||||
"build_pr": os.environ.get("CIRCLE_PR_NUMBER"),
|
||||
"build_tag": os.environ.get("CIRCLE_TAG"),
|
||||
"build_sha1": os.environ.get("CIRCLE_SHA1"),
|
||||
"build_branch": os.environ.get("CIRCLE_BRANCH"),
|
||||
"build_job": os.environ.get("CIRCLE_JOB"),
|
||||
"build_workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID"),
|
||||
}
|
||||
|
||||
|
||||
def build_message(test_case):
|
||||
return {
|
||||
"normal": {
|
||||
**build_info(),
|
||||
"test_suite_name": test_case.class_name,
|
||||
"test_case_name": test_case.name,
|
||||
},
|
||||
"int": {
|
||||
"time": int(time.time()),
|
||||
"test_total_count": 1,
|
||||
"test_total_time": int(test_case.time * 1000),
|
||||
"test_failed_count": 1 if test_case.failed > 0 else 0,
|
||||
"test_skipped_count": 1 if test_case.skipped > 0 else 0,
|
||||
"test_errored_count": 1 if test_case.errored > 0 else 0,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def send_report_to_scribe(reports):
|
||||
access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
|
||||
|
||||
if not access_token:
|
||||
print("No scribe access token provided, skip sending report!")
|
||||
return
|
||||
print("Scribe access token provided, sending report...")
|
||||
url = "https://graph.facebook.com/scribe_logs"
|
||||
r = requests.post(
|
||||
url,
|
||||
data={
|
||||
"access_token": access_token,
|
||||
"logs": json.dumps(
|
||||
[
|
||||
{
|
||||
"category": "perfpipe_pytorch_test_times",
|
||||
"message": json.dumps(build_message(test_case)),
|
||||
"line_escape": False,
|
||||
}
|
||||
for name in sorted(reports.keys())
|
||||
for test_case in reports[name].test_cases
|
||||
]
|
||||
),
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
|
||||
def assemble_s3_object(reports, *, total_seconds):
|
||||
return {
|
||||
**build_info(),
|
||||
'total_seconds': total_seconds,
|
||||
'suites': {
|
||||
name: {
|
||||
'total_seconds': suite.total_time,
|
||||
'cases': [
|
||||
{
|
||||
'name': case.name,
|
||||
'seconds': case.time,
|
||||
'errored': case.errored,
|
||||
'failed': case.failed,
|
||||
'skipped': case.skipped,
|
||||
}
|
||||
for case in suite.test_cases
|
||||
],
|
||||
}
|
||||
for name, suite in reports.items()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def send_report_to_s3(obj):
|
||||
job = os.environ.get('CIRCLE_JOB')
|
||||
sha1 = os.environ.get('CIRCLE_SHA1')
|
||||
branch = os.environ.get('CIRCLE_BRANCH', '')
|
||||
if branch not in ['master', 'nightly'] and not branch.startswith("release/"):
|
||||
print("S3 upload only enabled on master, nightly and release branches.")
|
||||
print(f"skipping test report on branch: {branch}")
|
||||
return
|
||||
now = datetime.datetime.utcnow().isoformat()
|
||||
key = f'test_time/{sha1}/{job}/{now}Z.json.bz2' # Z meaning UTC
|
||||
s3 = boto3.resource('s3')
|
||||
try:
|
||||
s3.get_bucket_acl(Bucket='ossci-metrics')
|
||||
except Exception as e:
|
||||
print(f"AWS ACL failed: {e}")
|
||||
print("AWS credential found, uploading to S3...")
|
||||
|
||||
obj = s3.Object('ossci-metrics', key)
|
||||
print("")
|
||||
# use bz2 because the results are smaller than gzip, and the
|
||||
# compression time penalty we pay is only about half a second for
|
||||
# input files of a few megabytes in size like these JSON files, and
|
||||
# because for some reason zlib doesn't seem to play nice with the
|
||||
# gunzip command whereas Python's bz2 does work with bzip2
|
||||
obj.put(Body=bz2.compress(json.dumps(obj).encode()))
|
||||
|
||||
|
||||
def print_regressions(obj, *, num_prev_commits):
|
||||
sha1 = os.environ.get("CIRCLE_SHA1", "HEAD")
|
||||
|
||||
base = subprocess.check_output(
|
||||
["git", "merge-base", sha1, "origin/master"],
|
||||
encoding="ascii",
|
||||
).strip()
|
||||
|
||||
count_spec = f"{base}..{sha1}"
|
||||
intermediate_commits = int(subprocess.check_output(
|
||||
["git", "rev-list", "--count", count_spec],
|
||||
encoding="ascii"
|
||||
))
|
||||
ancestry_path = int(subprocess.check_output(
|
||||
["git", "rev-list", "--ancestry-path", "--count", count_spec],
|
||||
encoding="ascii",
|
||||
))
|
||||
|
||||
# if current commit is already on master, we need to exclude it from
|
||||
# this history; otherwise we include the merge-base
|
||||
commits = subprocess.check_output(
|
||||
["git", "rev-list", f"--max-count={num_prev_commits+1}", base],
|
||||
encoding="ascii",
|
||||
).splitlines()
|
||||
on_master = False
|
||||
if base == sha1:
|
||||
on_master = True
|
||||
commits = commits[1:]
|
||||
else:
|
||||
commits = commits[:-1]
|
||||
|
||||
job = os.environ.get("CIRCLE_JOB", "")
|
||||
s3 = boto3.resource("s3")
|
||||
bucket = s3.Bucket(name="ossci-metrics")
|
||||
index = {}
|
||||
for commit in commits:
|
||||
summaries = bucket.objects.filter(Prefix=f"test_time/{commit}/{job}/")
|
||||
index[commit] = list(summaries)
|
||||
|
||||
objects: Dict[Commit, List[Report]] = {}
|
||||
# should we do these in parallel?
|
||||
for commit, summaries in index.items():
|
||||
objects[commit] = []
|
||||
for summary in summaries:
|
||||
binary = summary.get()["Body"].read()
|
||||
string = bz2.decompress(binary).decode("utf-8")
|
||||
objects[commit].append(json.loads(string))
|
||||
|
||||
print()
|
||||
print(regression_info(
|
||||
head_sha=sha1,
|
||||
head_report=obj,
|
||||
base_reports=objects,
|
||||
job_name=job,
|
||||
on_master=on_master,
|
||||
ancestry_path=ancestry_path - 1,
|
||||
other_ancestors=intermediate_commits - ancestry_path,
|
||||
), end="")
|
||||
|
||||
|
||||
def positive_integer(value):
|
||||
parsed = int(value)
|
||||
if parsed < 1:
|
||||
raise argparse.ArgumentTypeError(f"{value} is not a natural number")
|
||||
return parsed
|
||||
|
||||
|
||||
def positive_float(value):
|
||||
parsed = float(value)
|
||||
if parsed <= 0.0:
|
||||
raise argparse.ArgumentTypeError(f"{value} is not a positive rational number")
|
||||
return parsed
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
import sys
|
||||
parser = argparse.ArgumentParser(
|
||||
"Print statistics from test XML output.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--longest-of-class",
|
||||
type=positive_integer,
|
||||
default=3,
|
||||
metavar="N",
|
||||
help="how many longest tests to show for each class",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--class-print-threshold",
|
||||
type=positive_float,
|
||||
default=1.0,
|
||||
metavar="N",
|
||||
help="Minimal total time to warrant class report",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--longest-of-run",
|
||||
type=positive_integer,
|
||||
default=10,
|
||||
metavar="N",
|
||||
help="how many longest tests to show from the entire run",
|
||||
)
|
||||
if HAVE_BOTO3:
|
||||
parser.add_argument(
|
||||
"--upload-to-s3",
|
||||
action="store_true",
|
||||
help="upload test time to S3 bucket",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--compare-with-s3",
|
||||
action="store_true",
|
||||
help="download test times for base commits and compare",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-prev-commits",
|
||||
type=positive_integer,
|
||||
default=10,
|
||||
metavar="N",
|
||||
help="how many previous commits to compare test times with",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-json",
|
||||
metavar="FILE.json",
|
||||
help="compare S3 with JSON file, instead of the test report folder",
|
||||
)
|
||||
parser.add_argument(
|
||||
"folder",
|
||||
help="test report folder",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
reports = parse_reports(args.folder)
|
||||
if len(reports) == 0:
|
||||
print(f"No test reports found in {args.folder}")
|
||||
sys.exit(0)
|
||||
|
||||
send_report_to_scribe(reports)
|
||||
|
||||
longest_tests = []
|
||||
total_time = 0
|
||||
for name in sorted(reports.keys()):
|
||||
test_suite = reports[name]
|
||||
if test_suite.total_time >= args.class_print_threshold:
|
||||
test_suite.print_report(args.longest_of_class)
|
||||
total_time += test_suite.total_time
|
||||
longest_tests.extend(test_suite.test_cases)
|
||||
longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
|
||||
|
||||
obj = assemble_s3_object(reports, total_seconds=total_time)
|
||||
|
||||
if args.upload_to_s3:
|
||||
send_report_to_s3(obj)
|
||||
|
||||
print(f"Total runtime is {datetime.timedelta(seconds=int(total_time))}")
|
||||
print(f"{len(longest_tests)} longest tests of entire run:")
|
||||
for test_case in reversed(longest_tests):
|
||||
print(f" {test_case.class_name}.{test_case.name} time: {test_case.time:.2f} seconds")
|
||||
|
||||
if args.compare_with_s3:
|
||||
head_json = obj
|
||||
if args.use_json:
|
||||
head_json = json.loads(Path(args.use_json).read_text())
|
||||
print_regressions(head_json, num_prev_commits=args.num_prev_commits)
|
||||
Loading…
Reference in New Issue
Block a user