mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Up until this PR, the top-level `total_seconds` stat we've been uploading to S3 has only included suites longer than one second. This PR corrects that issue, and also clarifies the script's textual output for "longest tests of entire run". (Note that the `total_time` local variable is passed as the `total_seconds` parameter in the call to `assemble_s3_object`.) Pull Request resolved: https://github.com/pytorch/pytorch/pull/56040 Test Plan: Create a simple test file (call it `test_quick_maths.py`) with these contents: ```py from torch.testing._internal.common_utils import TestCase, run_tests class TestQuickMaths(TestCase): def test_two_plus_two(self): self.assertEqual(2 + 2, 4) if __name__ == '__main__': run_tests() ``` Run it and save the test results: ```sh rm -r /tmp/reports ; python3 test_quick_maths.py --save-xml=/tmp/reports ``` Then display them using the script: ```sh tools/print_test_stats.py /tmp/reports ``` - Before this PR: ``` No scribe access token provided, skip sending report! Total runtime is 0:00:00 0 longest tests of entire run: ``` - With this PR: ``` No scribe access token provided, skip sending report! Total runtime is 0:00:00.108000 0 longest tests of entire run (ignoring suites totaling less than 1.0 seconds): ``` If you were to upload this to S3 (see https://github.com/pytorch/pytorch/issues/49190 for an example of how to do this manually), the top-level `total_seconds` field should also change from `0` to `0.108`. Reviewed By: janeyx99 Differential Revision: D27770666 Pulled By: samestep fbshipit-source-id: 8255a4726ab3a692bbeff4c48974fbb3c6375142
949 lines
33 KiB
Python
Executable File
949 lines
33 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import bz2
|
|
import datetime
|
|
import json
|
|
import math
|
|
import os
|
|
import re
|
|
import statistics
|
|
import subprocess
|
|
import time
|
|
from collections import defaultdict
|
|
from glob import glob
|
|
from pathlib import Path
|
|
from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional,
|
|
Set, Tuple, cast)
|
|
from xml.dom import minidom
|
|
|
|
import requests
|
|
from typing_extensions import TypedDict
|
|
from tools.stats_utils.s3_stat_parser import (newify_case, get_S3_object_from_bucket, get_test_stats_summaries_for_job,
|
|
Report, Status, Commit, HAVE_BOTO3, Version2Case, VersionedReport,
|
|
Version1Report, Version2Report, ReportMetaMeta)
|
|
|
|
|
|
|
|
SimplerSuite = Dict[str, Version2Case]
|
|
SimplerFile = Dict[str, SimplerSuite]
|
|
SimplerReport = Dict[str, SimplerFile]
|
|
|
|
|
|
class Stat(TypedDict):
|
|
center: float
|
|
spread: Optional[float]
|
|
|
|
|
|
class CaseDiff(TypedDict):
|
|
margin: str
|
|
name: str
|
|
was: Optional[Tuple[Stat, Status]]
|
|
now: Optional[Version2Case]
|
|
|
|
|
|
class SuiteDiff(TypedDict):
|
|
margin: str
|
|
name: str
|
|
was: Optional[Stat]
|
|
now: Optional[float]
|
|
cases: List[CaseDiff]
|
|
|
|
|
|
# TODO: consolidate this with the get_cases function from
|
|
# tools/test_history.py
|
|
|
|
# Here we translate to a three-layer format (file -> suite -> case)
|
|
# rather than a two-layer format (suite -> case) because as mentioned in
|
|
# a comment in the body of this function, if we consolidate suites that
|
|
# share a name, there will be test case name collisions, and once we
|
|
# have those, there's no clean way to deal with it in the diffing logic.
|
|
# It's not great to have to add a dummy empty string for the filename
|
|
# for version 1 reports, but it's better than either losing cases that
|
|
# share a name (for version 2 reports) or using a list of cases rather
|
|
# than a dict.
|
|
def simplify(report: Report) -> SimplerReport:
|
|
if 'format_version' not in report: # version 1 implicitly
|
|
v1report = cast(Version1Report, report)
|
|
return {
|
|
# we just don't have test filename information sadly, so we
|
|
# just make one fake filename that is the empty string
|
|
'': {
|
|
suite_name: {
|
|
# This clobbers some cases that have duplicate names
|
|
# because in version 1, we would merge together all
|
|
# the suites with a given name (even if they came
|
|
# from different files), so there were actually
|
|
# situations in which two cases in the same suite
|
|
# shared a name (because they actually originally
|
|
# came from two suites that were then merged). It
|
|
# would probably be better to warn about the cases
|
|
# that we're silently discarding here, but since
|
|
# we're only uploading in the new format (where
|
|
# everything is also keyed by filename) going
|
|
# forward, it shouldn't matter too much.
|
|
case['name']: newify_case(case)
|
|
for case in suite['cases']
|
|
}
|
|
for suite_name, suite in v1report['suites'].items()
|
|
}
|
|
}
|
|
else:
|
|
v_report = cast(VersionedReport, report)
|
|
version = v_report['format_version']
|
|
if version == 2:
|
|
v2report = cast(Version2Report, v_report)
|
|
return {
|
|
filename: {
|
|
suite_name: suite['cases']
|
|
for suite_name, suite in file_data['suites'].items()
|
|
}
|
|
for filename, file_data in v2report['files'].items()
|
|
}
|
|
else:
|
|
raise RuntimeError(f'Unknown format version: {version}')
|
|
|
|
|
|
def plural(n: int) -> str:
|
|
return '' if n == 1 else 's'
|
|
|
|
|
|
def display_stat(
|
|
x: Stat,
|
|
format: Tuple[Tuple[int, int], Tuple[int, int]],
|
|
) -> str:
|
|
spread_len = format[1][0] + 1 + format[1][1]
|
|
spread = x['spread']
|
|
if spread is not None:
|
|
spread_str = f' ± {spread:{spread_len}.{format[1][1]}f}s'
|
|
else:
|
|
spread_str = ' ' * (3 + spread_len + 1)
|
|
mean_len = format[0][0] + 1 + format[0][1]
|
|
return f'{x["center"]:{mean_len}.{format[0][1]}f}s{spread_str}'
|
|
|
|
|
|
def list_stat(l: List[float]) -> Stat:
|
|
return {
|
|
'center': statistics.mean(l),
|
|
'spread': statistics.stdev(l) if len(l) > 1 else None
|
|
}
|
|
|
|
|
|
def zero_stat() -> Stat:
|
|
return {'center': 0, 'spread': None}
|
|
|
|
|
|
def recenter(was: Stat, now: float) -> Stat:
|
|
return {'center': now - was['center'], 'spread': was['spread']}
|
|
|
|
|
|
def sum_normals(stats: Iterable[Stat]) -> Stat:
|
|
"""
|
|
Returns a stat corresponding to the sum of the given stats.
|
|
|
|
Assumes that the center and spread for each of the given stats are
|
|
mean and stdev, respectively.
|
|
"""
|
|
l = list(stats)
|
|
spread: Optional[float]
|
|
if any(stat['spread'] is not None for stat in l):
|
|
spread = math.sqrt(sum((stat['spread'] or 0)**2 for stat in l))
|
|
else:
|
|
spread = None
|
|
return {
|
|
'center': sum(stat['center'] for stat in l),
|
|
'spread': spread,
|
|
}
|
|
|
|
|
|
def format_seconds(seconds: List[float]) -> str:
|
|
if len(seconds) > 0:
|
|
x = list_stat(seconds)
|
|
return f'total time {display_stat(x, ((5, 2), (4, 2)))}'.strip()
|
|
return ''
|
|
|
|
|
|
def show_ancestors(num_commits: int) -> str:
|
|
return f' | : ({num_commits} commit{plural(num_commits)})'
|
|
|
|
|
|
def unlines(lines: List[str]) -> str:
|
|
return ''.join(f'{line}\n' for line in lines)
|
|
|
|
|
|
def matching_test_times(
|
|
*,
|
|
base_reports: Dict[Commit, List[SimplerReport]],
|
|
filename: str,
|
|
suite_name: str,
|
|
case_name: str,
|
|
status: Status,
|
|
) -> List[float]:
|
|
times: List[float] = []
|
|
for reports in base_reports.values():
|
|
for report in reports:
|
|
file_data = report.get(filename)
|
|
if file_data:
|
|
suite = file_data.get(suite_name)
|
|
if suite:
|
|
case = suite.get(case_name)
|
|
if case:
|
|
t = case['seconds']
|
|
s = case['status']
|
|
if s == status:
|
|
times.append(t)
|
|
return times
|
|
|
|
|
|
def analyze(
|
|
*,
|
|
head_report: SimplerReport,
|
|
base_reports: Dict[Commit, List[SimplerReport]],
|
|
) -> List[SuiteDiff]:
|
|
nonempty_shas = [sha for sha, reports in base_reports.items() if reports]
|
|
# most recent master ancestor with at least one S3 report,
|
|
# or empty list if there are none (will show all tests as added)
|
|
base_report = base_reports[nonempty_shas[0]] if nonempty_shas else []
|
|
|
|
# find all relevant suites (those in either base or head or both)
|
|
all_reports = [head_report] + base_report
|
|
all_suites: Set[Tuple[str, str]] = {
|
|
(filename, suite_name)
|
|
for r in all_reports
|
|
for filename, file_data in r.items()
|
|
for suite_name in file_data.keys()
|
|
}
|
|
|
|
removed_suites: List[SuiteDiff] = []
|
|
modified_suites: List[SuiteDiff] = []
|
|
added_suites: List[SuiteDiff] = []
|
|
|
|
for filename, suite_name in sorted(all_suites):
|
|
case_diffs: List[CaseDiff] = []
|
|
head_suite = head_report.get(filename, {}).get(suite_name)
|
|
base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[
|
|
{
|
|
(n, case['status'])
|
|
for n, case
|
|
in report.get(filename, {}).get(suite_name, {}).items()
|
|
}
|
|
for report in base_report
|
|
] or [set()])))
|
|
case_stats: Dict[str, Stat] = {}
|
|
if head_suite:
|
|
now = sum(case['seconds'] for case in head_suite.values())
|
|
if any(
|
|
filename in report and suite_name in report[filename]
|
|
for report in base_report
|
|
):
|
|
removed_cases: List[CaseDiff] = []
|
|
for case_name, case_status in base_cases.items():
|
|
case_stats[case_name] = list_stat(matching_test_times(
|
|
base_reports=base_reports,
|
|
filename=filename,
|
|
suite_name=suite_name,
|
|
case_name=case_name,
|
|
status=case_status,
|
|
))
|
|
if case_name not in head_suite:
|
|
removed_cases.append({
|
|
'margin': '-',
|
|
'name': case_name,
|
|
'was': (case_stats[case_name], case_status),
|
|
'now': None,
|
|
})
|
|
modified_cases: List[CaseDiff] = []
|
|
added_cases: List[CaseDiff] = []
|
|
for head_case_name in sorted(head_suite):
|
|
head_case = head_suite[head_case_name]
|
|
if head_case_name in base_cases:
|
|
stat = case_stats[head_case_name]
|
|
base_status = base_cases[head_case_name]
|
|
if head_case['status'] != base_status:
|
|
modified_cases.append({
|
|
'margin': '!',
|
|
'name': head_case_name,
|
|
'was': (stat, base_status),
|
|
'now': head_case,
|
|
})
|
|
else:
|
|
added_cases.append({
|
|
'margin': '+',
|
|
'name': head_case_name,
|
|
'was': None,
|
|
'now': head_case,
|
|
})
|
|
# there might be a bug calculating this stdev, not sure
|
|
was = sum_normals(case_stats.values())
|
|
case_diffs = removed_cases + modified_cases + added_cases
|
|
if case_diffs:
|
|
modified_suites.append({
|
|
'margin': ' ',
|
|
'name': suite_name,
|
|
'was': was,
|
|
'now': now,
|
|
'cases': case_diffs,
|
|
})
|
|
else:
|
|
for head_case_name in sorted(head_suite):
|
|
head_case = head_suite[head_case_name]
|
|
case_diffs.append({
|
|
'margin': ' ',
|
|
'name': head_case_name,
|
|
'was': None,
|
|
'now': head_case,
|
|
})
|
|
added_suites.append({
|
|
'margin': '+',
|
|
'name': suite_name,
|
|
'was': None,
|
|
'now': now,
|
|
'cases': case_diffs,
|
|
})
|
|
else:
|
|
for case_name, case_status in base_cases.items():
|
|
case_stats[case_name] = list_stat(matching_test_times(
|
|
base_reports=base_reports,
|
|
filename=filename,
|
|
suite_name=suite_name,
|
|
case_name=case_name,
|
|
status=case_status,
|
|
))
|
|
case_diffs.append({
|
|
'margin': ' ',
|
|
'name': case_name,
|
|
'was': (case_stats[case_name], case_status),
|
|
'now': None,
|
|
})
|
|
removed_suites.append({
|
|
'margin': '-',
|
|
'name': suite_name,
|
|
# there might be a bug calculating this stdev, not sure
|
|
'was': sum_normals(case_stats.values()),
|
|
'now': None,
|
|
'cases': case_diffs,
|
|
})
|
|
|
|
return removed_suites + modified_suites + added_suites
|
|
|
|
|
|
def case_diff_lines(diff: CaseDiff) -> List[str]:
|
|
lines = [f'def {diff["name"]}: ...']
|
|
|
|
case_fmt = ((3, 3), (2, 3))
|
|
|
|
was = diff['was']
|
|
if was:
|
|
was_line = f' # was {display_stat(was[0], case_fmt)}'
|
|
was_status = was[1]
|
|
if was_status:
|
|
was_line += f' ({was_status})'
|
|
lines.append(was_line)
|
|
|
|
now = diff['now']
|
|
if now:
|
|
now_stat: Stat = {'center': now['seconds'], 'spread': None}
|
|
now_line = f' # now {display_stat(now_stat, case_fmt)}'
|
|
now_status = now['status']
|
|
if now_status:
|
|
now_line += f' ({now_status})'
|
|
lines.append(now_line)
|
|
|
|
return [''] + [f'{diff["margin"]} {l}' for l in lines]
|
|
|
|
|
|
def display_suite_diff(diff: SuiteDiff) -> str:
|
|
lines = [f'class {diff["name"]}:']
|
|
|
|
suite_fmt = ((4, 2), (3, 2))
|
|
|
|
was = diff['was']
|
|
if was:
|
|
lines.append(f' # was {display_stat(was, suite_fmt)}')
|
|
|
|
now = diff['now']
|
|
if now is not None:
|
|
now_stat: Stat = {'center': now, 'spread': None}
|
|
lines.append(f' # now {display_stat(now_stat, suite_fmt)}')
|
|
|
|
for case_diff in diff['cases']:
|
|
lines.extend([f' {l}' for l in case_diff_lines(case_diff)])
|
|
|
|
return unlines([''] + [f'{diff["margin"]} {l}'.rstrip() for l in lines] + [''])
|
|
|
|
|
|
def anomalies(diffs: List[SuiteDiff]) -> str:
|
|
return ''.join(map(display_suite_diff, diffs))
|
|
|
|
|
|
def graph(
|
|
*,
|
|
head_sha: Commit,
|
|
head_seconds: float,
|
|
base_seconds: Dict[Commit, List[float]],
|
|
on_master: bool,
|
|
ancestry_path: int = 0,
|
|
other_ancestors: int = 0,
|
|
) -> str:
|
|
lines = [
|
|
'Commit graph (base is most recent master ancestor with at least one S3 report):',
|
|
'',
|
|
' : (master)',
|
|
' |',
|
|
]
|
|
|
|
head_time_str = f' {format_seconds([head_seconds])}'
|
|
if on_master:
|
|
lines.append(f' * {head_sha[:10]} (HEAD) {head_time_str}')
|
|
else:
|
|
lines.append(f' | * {head_sha[:10]} (HEAD) {head_time_str}')
|
|
|
|
if ancestry_path > 0:
|
|
lines += [
|
|
' | |',
|
|
show_ancestors(ancestry_path),
|
|
]
|
|
|
|
if other_ancestors > 0:
|
|
lines += [
|
|
' |/|',
|
|
show_ancestors(other_ancestors),
|
|
' |',
|
|
]
|
|
else:
|
|
lines.append(' |/')
|
|
|
|
is_first = True
|
|
for sha, seconds in base_seconds.items():
|
|
num_runs = len(seconds)
|
|
prefix = str(num_runs).rjust(3)
|
|
base = '(base)' if is_first and num_runs > 0 else ' '
|
|
if num_runs > 0:
|
|
is_first = False
|
|
t = format_seconds(seconds)
|
|
p = plural(num_runs)
|
|
if t:
|
|
p = f'{p}, '.ljust(3)
|
|
lines.append(f' * {sha[:10]} {base} {prefix} report{p}{t}')
|
|
|
|
lines.extend([' |', ' :'])
|
|
|
|
return unlines(lines)
|
|
|
|
|
|
def case_delta(case: CaseDiff) -> Stat:
|
|
was = case['was']
|
|
now = case['now']
|
|
return recenter(
|
|
was[0] if was else zero_stat(),
|
|
now['seconds'] if now else 0,
|
|
)
|
|
|
|
|
|
def display_final_stat(stat: Stat) -> str:
|
|
center = stat['center']
|
|
spread = stat['spread']
|
|
displayed = display_stat(
|
|
{'center': abs(center), 'spread': spread},
|
|
((4, 2), (3, 2)),
|
|
)
|
|
if center < 0:
|
|
sign = '-'
|
|
elif center > 0:
|
|
sign = '+'
|
|
else:
|
|
sign = ' '
|
|
return f'{sign}{displayed}'.rstrip()
|
|
|
|
|
|
def summary_line(message: str, d: DefaultDict[str, List[CaseDiff]]) -> str:
|
|
all_cases = [c for cs in d.values() for c in cs]
|
|
tests = len(all_cases)
|
|
suites = len(d)
|
|
sp = f'{plural(suites)})'.ljust(2)
|
|
tp = f'{plural(tests)},'.ljust(2)
|
|
# there might be a bug calculating this stdev, not sure
|
|
stat = sum_normals(case_delta(c) for c in all_cases)
|
|
return ''.join([
|
|
f'{message} (across {suites:>4} suite{sp}',
|
|
f'{tests:>6} test{tp}',
|
|
f' totaling {display_final_stat(stat)}',
|
|
])
|
|
|
|
|
|
def summary(analysis: List[SuiteDiff]) -> str:
|
|
removed_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
|
|
modified_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
|
|
added_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
|
|
|
|
for diff in analysis:
|
|
# the use of 'margin' here is not the most elegant
|
|
name = diff['name']
|
|
margin = diff['margin']
|
|
cases = diff['cases']
|
|
if margin == '-':
|
|
removed_tests[name] += cases
|
|
elif margin == '+':
|
|
added_tests[name] += cases
|
|
else:
|
|
removed = list(filter(lambda c: c['margin'] == '-', cases))
|
|
added = list(filter(lambda c: c['margin'] == '+', cases))
|
|
modified = list(filter(lambda c: c['margin'] == '!', cases))
|
|
if removed:
|
|
removed_tests[name] += removed
|
|
if added:
|
|
added_tests[name] += added
|
|
if modified:
|
|
modified_tests[name] += modified
|
|
|
|
return unlines([
|
|
summary_line('Removed ', removed_tests),
|
|
summary_line('Modified', modified_tests),
|
|
summary_line('Added ', added_tests),
|
|
])
|
|
|
|
|
|
def regression_info(
|
|
*,
|
|
head_sha: Commit,
|
|
head_report: Report,
|
|
base_reports: Dict[Commit, List[Report]],
|
|
job_name: str,
|
|
on_master: bool,
|
|
ancestry_path: int,
|
|
other_ancestors: int,
|
|
) -> str:
|
|
"""
|
|
Return a human-readable report describing any test time regressions.
|
|
|
|
The head_sha and head_report args give info about the current commit
|
|
and its test times. Since Python dicts maintain insertion order
|
|
(guaranteed as part of the language spec since 3.7), the
|
|
base_reports argument must list the head's several most recent
|
|
master commits, from newest to oldest (so the merge-base is
|
|
list(base_reports)[0]).
|
|
"""
|
|
simpler_head = simplify(head_report)
|
|
simpler_base: Dict[Commit, List[SimplerReport]] = {}
|
|
for commit, reports in base_reports.items():
|
|
simpler_base[commit] = [simplify(r) for r in reports]
|
|
analysis = analyze(
|
|
head_report=simpler_head,
|
|
base_reports=simpler_base,
|
|
)
|
|
|
|
return '\n'.join([
|
|
unlines([
|
|
'----- Historic stats comparison result ------',
|
|
'',
|
|
f' job: {job_name}',
|
|
f' commit: {head_sha}',
|
|
]),
|
|
|
|
# don't print anomalies, because sometimes due to sharding, the
|
|
# output from this would be very long and obscure better signal
|
|
|
|
# anomalies(analysis),
|
|
|
|
graph(
|
|
head_sha=head_sha,
|
|
head_seconds=head_report['total_seconds'],
|
|
base_seconds={
|
|
c: [r['total_seconds'] for r in rs]
|
|
for c, rs in base_reports.items()
|
|
},
|
|
on_master=on_master,
|
|
ancestry_path=ancestry_path,
|
|
other_ancestors=other_ancestors,
|
|
),
|
|
summary(analysis),
|
|
])
|
|
|
|
|
|
class TestCase:
|
|
def __init__(self, dom: Any) -> None:
|
|
self.class_name = str(dom.attributes['classname'].value)
|
|
self.name = str(dom.attributes['name'].value)
|
|
self.time = float(dom.attributes['time'].value)
|
|
self.errored = len(dom.getElementsByTagName('error')) > 0
|
|
self.failed = len(dom.getElementsByTagName('failure')) > 0
|
|
self.skipped = len(dom.getElementsByTagName('skipped')) > 0
|
|
|
|
|
|
class TestSuite:
|
|
def __init__(self, name: str) -> None:
|
|
self.name = name
|
|
self.test_cases: Dict[str, TestCase] = dict()
|
|
self.failed_count = 0
|
|
self.skipped_count = 0
|
|
self.errored_count = 0
|
|
self.total_time = 0.0
|
|
|
|
def __repr__(self) -> str:
|
|
rc = f'{self.name} run_time: {self.total_time:.2f} tests: {len(self.test_cases)}'
|
|
if self.skipped_count > 0:
|
|
rc += f' skipped: {self.skipped_count}'
|
|
return f'TestSuite({rc})'
|
|
|
|
def append(self, test_case: TestCase) -> None:
|
|
self.test_cases[test_case.name] = test_case
|
|
self.total_time += test_case.time
|
|
self.failed_count += 1 if test_case.failed else 0
|
|
self.skipped_count += 1 if test_case.skipped else 0
|
|
self.errored_count += 1 if test_case.errored else 0
|
|
|
|
def replace(self, test_case: TestCase) -> float:
|
|
name = test_case.name
|
|
assert name in self.test_cases, f'Error: attempting to replace nonexistent test case {name}'
|
|
old_time = self.test_cases[name].time
|
|
# We don't replace anything if the old test case was not shorter.
|
|
if old_time >= test_case.time:
|
|
return 0.0
|
|
self.total_time = self.total_time + test_case.time - old_time
|
|
self.test_cases[name] = test_case
|
|
return test_case.time - old_time
|
|
|
|
def print_report(self, num_longest: int = 3) -> None:
|
|
sorted_tests = sorted(self.test_cases.values(), key=lambda x: x.time)
|
|
test_count = len(sorted_tests)
|
|
print(f"class {self.name}:")
|
|
print(f" tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
|
|
print(f" run_time: {self.total_time:.2f} seconds")
|
|
print(f" avg_time: {self.total_time/test_count:.2f} seconds")
|
|
if test_count >= 2:
|
|
print(f" median_time: {statistics.median(x.time for x in sorted_tests):.2f} seconds")
|
|
sorted_tests = sorted_tests[-num_longest:]
|
|
print(f" {len(sorted_tests)} longest tests:")
|
|
for test in reversed(sorted_tests):
|
|
print(f" {test.name} time: {test.time:.2f} seconds")
|
|
print("")
|
|
|
|
|
|
class TestFile:
|
|
def __init__(self, name: str) -> None:
|
|
self.name = name
|
|
self.total_time = 0.0
|
|
self.test_suites: Dict[str, TestSuite] = dict()
|
|
|
|
def append(self, test_case: TestCase) -> None:
|
|
suite_name = test_case.class_name
|
|
if suite_name not in self.test_suites:
|
|
self.test_suites[suite_name] = TestSuite(suite_name)
|
|
if test_case.name in self.test_suites[suite_name].test_cases:
|
|
# We expect duplicate tests for test_cpp_extensions_aot, distributed/test_distributed_fork,
|
|
# and distributed/test_distributed_spawn. In these cases, we store the test case that took the longest,
|
|
# as in these jobs, the duplicate tests are run in parallel.
|
|
# For other unexpected cases, we should raise a warning.
|
|
if self.name == 'test_cpp_extensions_aot' or \
|
|
self.name == 'distributed/test_distributed_fork' or \
|
|
self.name == 'distributed/test_distributed_spawn' or \
|
|
self.name == 'cpp': # The caffe2 cpp tests spawn duplicate test cases as well.
|
|
time_difference = self.test_suites[suite_name].replace(test_case)
|
|
self.total_time += time_difference
|
|
else:
|
|
raise RuntimeWarning(f'Duplicate test case {test_case.name} in suite {suite_name} called from {self.name}')
|
|
else:
|
|
self.test_suites[suite_name].append(test_case)
|
|
self.total_time += test_case.time
|
|
|
|
|
|
def parse_report(path: str) -> Iterator[TestCase]:
|
|
dom = minidom.parse(path)
|
|
for test_case in dom.getElementsByTagName('testcase'):
|
|
yield TestCase(test_case)
|
|
|
|
|
|
def parse_reports(folder: str) -> Dict[str, TestFile]:
|
|
reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
|
|
tests_by_file = dict()
|
|
for report in reports:
|
|
test_filename = re.sub(r'\.', '/', os.path.basename(os.path.dirname(report)))
|
|
if test_filename not in tests_by_file:
|
|
tests_by_file[test_filename] = TestFile(test_filename)
|
|
for test_case in parse_report(report):
|
|
tests_by_file[test_filename].append(test_case)
|
|
return tests_by_file
|
|
|
|
def build_info() -> ReportMetaMeta:
|
|
return {
|
|
"build_pr": os.environ.get("CIRCLE_PR_NUMBER", ""),
|
|
"build_tag": os.environ.get("CIRCLE_TAG", ""),
|
|
"build_sha1": os.environ.get("CIRCLE_SHA1", ""),
|
|
"build_branch": os.environ.get("CIRCLE_BRANCH", ""),
|
|
"build_job": os.environ.get("CIRCLE_JOB", ""),
|
|
"build_workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID", ""),
|
|
}
|
|
|
|
|
|
def build_message(test_case: TestCase) -> Dict[str, Dict[str, Any]]:
|
|
return {
|
|
"normal": {
|
|
**build_info(),
|
|
"test_suite_name": test_case.class_name,
|
|
"test_case_name": test_case.name,
|
|
},
|
|
"int": {
|
|
"time": int(time.time()),
|
|
"test_total_count": 1,
|
|
"test_total_time": int(test_case.time * 1000),
|
|
"test_failed_count": 1 if test_case.failed > 0 else 0,
|
|
"test_skipped_count": 1 if test_case.skipped > 0 else 0,
|
|
"test_errored_count": 1 if test_case.errored > 0 else 0,
|
|
},
|
|
}
|
|
|
|
|
|
def send_report_to_scribe(reports: Dict[str, TestFile]) -> None:
|
|
access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
|
|
|
|
if not access_token:
|
|
print("No scribe access token provided, skip sending report!")
|
|
return
|
|
print("Scribe access token provided, sending report...")
|
|
url = "https://graph.facebook.com/scribe_logs"
|
|
r = requests.post(
|
|
url,
|
|
data={
|
|
"access_token": access_token,
|
|
"logs": json.dumps(
|
|
[
|
|
{
|
|
"category": "perfpipe_pytorch_test_times",
|
|
"message": json.dumps(build_message(test_case)),
|
|
"line_escape": False,
|
|
}
|
|
for test_file in reports.values()
|
|
for test_suite in test_file.test_suites.values()
|
|
for test_case in test_suite.test_cases.values()
|
|
]
|
|
),
|
|
},
|
|
)
|
|
r.raise_for_status()
|
|
|
|
|
|
def assemble_s3_object(
|
|
reports: Dict[str, TestFile],
|
|
*,
|
|
total_seconds: float,
|
|
) -> Version2Report:
|
|
return {
|
|
**build_info(), # type: ignore[misc]
|
|
'total_seconds': total_seconds,
|
|
'format_version': 2,
|
|
'files' : {
|
|
name: {
|
|
'total_seconds': test_file.total_time,
|
|
'suites': {
|
|
name: {
|
|
'total_seconds': suite.total_time,
|
|
'cases': {
|
|
name: {
|
|
'seconds': case.time,
|
|
'status': 'skipped' if case.skipped else
|
|
'errored' if case.errored else
|
|
'failed' if case.failed else None
|
|
}
|
|
for name, case in suite.test_cases.items()
|
|
},
|
|
}
|
|
for name, suite in test_file.test_suites.items()
|
|
}
|
|
}
|
|
for name, test_file in reports.items()
|
|
}
|
|
}
|
|
|
|
|
|
def send_report_to_s3(head_report: Version2Report) -> None:
|
|
job = os.environ.get('CIRCLE_JOB')
|
|
sha1 = os.environ.get('CIRCLE_SHA1')
|
|
branch = os.environ.get('CIRCLE_BRANCH', '')
|
|
if branch not in ['master', 'nightly'] and not branch.startswith("release/"):
|
|
print("S3 upload only enabled on master, nightly and release branches.")
|
|
print(f"skipping test report on branch: {branch}")
|
|
return
|
|
now = datetime.datetime.utcnow().isoformat()
|
|
key = f'test_time/{sha1}/{job}/{now}Z.json.bz2' # Z meaning UTC
|
|
obj = get_S3_object_from_bucket('ossci-metrics', key)
|
|
# use bz2 because the results are smaller than gzip, and the
|
|
# compression time penalty we pay is only about half a second for
|
|
# input files of a few megabytes in size like these JSON files, and
|
|
# because for some reason zlib doesn't seem to play nice with the
|
|
# gunzip command whereas Python's bz2 does work with bzip2
|
|
obj.put(Body=bz2.compress(json.dumps(head_report).encode()))
|
|
|
|
|
|
def print_regressions(head_report: Report, *, num_prev_commits: int) -> None:
|
|
sha1 = os.environ.get("CIRCLE_SHA1", "HEAD")
|
|
|
|
base = subprocess.check_output(
|
|
["git", "merge-base", sha1, "origin/master"],
|
|
encoding="ascii",
|
|
).strip()
|
|
|
|
count_spec = f"{base}..{sha1}"
|
|
intermediate_commits = int(subprocess.check_output(
|
|
["git", "rev-list", "--count", count_spec],
|
|
encoding="ascii"
|
|
))
|
|
ancestry_path = int(subprocess.check_output(
|
|
["git", "rev-list", "--ancestry-path", "--count", count_spec],
|
|
encoding="ascii",
|
|
))
|
|
|
|
# if current commit is already on master, we need to exclude it from
|
|
# this history; otherwise we include the merge-base
|
|
commits = subprocess.check_output(
|
|
["git", "rev-list", f"--max-count={num_prev_commits+1}", base],
|
|
encoding="ascii",
|
|
).splitlines()
|
|
on_master = False
|
|
if base == sha1:
|
|
on_master = True
|
|
commits = commits[1:]
|
|
else:
|
|
commits = commits[:-1]
|
|
|
|
job = os.environ.get("CIRCLE_JOB", "")
|
|
objects: Dict[Commit, List[Report]] = defaultdict(list)
|
|
|
|
for commit in commits:
|
|
objects[commit]
|
|
summaries = get_test_stats_summaries_for_job(sha=commit, job_prefix=job)
|
|
for _, summary in summaries.items():
|
|
objects[commit].extend(summary)
|
|
|
|
print()
|
|
print(regression_info(
|
|
head_sha=sha1,
|
|
head_report=head_report,
|
|
base_reports=objects,
|
|
job_name=job,
|
|
on_master=on_master,
|
|
ancestry_path=ancestry_path - 1,
|
|
other_ancestors=intermediate_commits - ancestry_path,
|
|
), end="")
|
|
|
|
|
|
def positive_integer(value: str) -> float:
|
|
parsed = int(value)
|
|
if parsed < 1:
|
|
raise argparse.ArgumentTypeError(f"{value} is not a natural number")
|
|
return parsed
|
|
|
|
|
|
def positive_float(value: str) -> float:
|
|
parsed = float(value)
|
|
if parsed <= 0.0:
|
|
raise argparse.ArgumentTypeError(f"{value} is not a positive rational number")
|
|
return parsed
|
|
|
|
|
|
def reports_has_no_tests(reports: Dict[str, TestFile]) -> bool:
|
|
for test_file in reports.values():
|
|
for test_suite in test_file.test_suites.values():
|
|
if len(test_suite.test_cases) > 0:
|
|
return False
|
|
return True
|
|
|
|
if __name__ == '__main__':
|
|
import argparse
|
|
import sys
|
|
parser = argparse.ArgumentParser(
|
|
"Print statistics from test XML output.",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
)
|
|
parser.add_argument(
|
|
"--longest-of-class",
|
|
type=positive_integer,
|
|
default=3,
|
|
metavar="N",
|
|
help="how many longest tests to show for each class",
|
|
)
|
|
parser.add_argument(
|
|
"--class-print-threshold",
|
|
type=positive_float,
|
|
default=1.0,
|
|
metavar="N",
|
|
help="Minimal total time to warrant class report",
|
|
)
|
|
parser.add_argument(
|
|
"--longest-of-run",
|
|
type=positive_integer,
|
|
default=10,
|
|
metavar="N",
|
|
help="how many longest tests to show from the entire run",
|
|
)
|
|
if HAVE_BOTO3:
|
|
parser.add_argument(
|
|
"--upload-to-s3",
|
|
action="store_true",
|
|
help="upload test time to S3 bucket",
|
|
)
|
|
parser.add_argument(
|
|
"--compare-with-s3",
|
|
action="store_true",
|
|
help="download test times for base commits and compare",
|
|
)
|
|
parser.add_argument(
|
|
"--num-prev-commits",
|
|
type=positive_integer,
|
|
default=10,
|
|
metavar="N",
|
|
help="how many previous commits to compare test times with",
|
|
)
|
|
parser.add_argument(
|
|
"--use-json",
|
|
metavar="FILE.json",
|
|
help="compare S3 with JSON file, instead of the test report folder",
|
|
)
|
|
parser.add_argument(
|
|
"folder",
|
|
help="test report folder",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
reports_by_file = parse_reports(args.folder)
|
|
if reports_has_no_tests(reports_by_file):
|
|
print(f"No tests in reports found in {args.folder}")
|
|
sys.exit(0)
|
|
|
|
try:
|
|
send_report_to_scribe(reports_by_file)
|
|
except Exception as e:
|
|
print(f"error encountered when uploading to scribe: {e}")
|
|
|
|
# longest_tests can contain duplicates as the same tests can be spawned from different files
|
|
longest_tests : List[TestCase] = []
|
|
total_time = 0.0
|
|
for filename, test_filename in reports_by_file.items():
|
|
for suite_name, test_suite in test_filename.test_suites.items():
|
|
total_time += test_suite.total_time
|
|
if test_suite.total_time >= args.class_print_threshold:
|
|
test_suite.print_report(args.longest_of_class)
|
|
longest_tests.extend(test_suite.test_cases.values())
|
|
longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
|
|
|
|
obj = assemble_s3_object(reports_by_file, total_seconds=total_time)
|
|
|
|
if args.upload_to_s3:
|
|
try:
|
|
send_report_to_s3(obj)
|
|
except Exception as e:
|
|
print(f"error encountered when uploading to s3: {e}")
|
|
|
|
print(f"Total runtime is {datetime.timedelta(seconds=total_time)}")
|
|
print(
|
|
f"{len(longest_tests)} longest tests of entire run"
|
|
f" (ignoring suites totaling less than {args.class_print_threshold} seconds):"
|
|
)
|
|
for test_case in reversed(longest_tests):
|
|
print(f" {test_case.class_name}.{test_case.name} time: {test_case.time:.2f} seconds")
|
|
|
|
if args.compare_with_s3:
|
|
head_json = obj
|
|
if args.use_json:
|
|
head_json = json.loads(Path(args.use_json).read_text())
|
|
print_regressions(head_json, num_prev_commits=args.num_prev_commits)
|