pytorch/tools/code_coverage/package/oss/init.py
Sam Estep 21ef248fb8 [reland] Report test time regressions (#50171)
Summary:
This is a followup to https://github.com/pytorch/pytorch/issues/49190. Vaguely speaking, the goals are to make it easy to identify test time regressions introduced by PRs. Eventually the hope is to use this information to edit Dr CI comments, but this particular PR just does the analysis and prints it to stdout, so a followup PR would be needed to edit the actual comments on GitHub.

**Important:** for uninteresting reasons, this PR moves the `print_test_stats.py` file.

- *Before:* `test/print_test_stats.py`
- *After:* `torch/testing/_internal/print_test_stats.py`

Notes on the approach:

- Just getting the mean and stdev for the total job time of the last _N_ commits isn't sufficient, because e.g. if `master` was broken 5 commits ago, then a lot of those job times will be much shorter, breaking the statistics.
- We use the commit history to make better estimates for the mean and stdev of individual test (and suite) times, but only when the test in that historical commit is present and its status matches that of the base commit.
- We list all the tests that were removed or added, or whose status changed (e.g. skipped to not skipped, or vice versa), along with time (estimate) info for that test case and its containing suite.
- We don't list tests whose time changed a lot if their status didn't change, because there's a lot of noise and it's unclear how to do that well without too many false positives.
- We show a human-readable commit graph that indicates exactly how many commits are in the pool of commits that could be causing regressions (e.g. if a PR has multiple commits in it, or if the base commit on `master` doesn't have a report in S3).
- We don't show an overall estimate of whether the PR increased or decreased the total test job time, because it's noisy and it's a bit tricky to aggregate stdevs up from individual tests to the whole job level. This might change in a followup PR.
- Instead, we simply show a summary at the bottom which says how many tests were removed/added/modified (where "modified" means that the status changed), and our best estimates of the mean times (and stdevs) of those changes.
- Importantly, the summary at the bottom is only for the test cases that were already shown in the more verbose diff report, and does not include any information about tests whose status didn't change but whose running time got much longer.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/50171

Test Plan:
To run the unit tests:
```
$ python test/test_testing.py
$ python test/print_test_stats.py
```

To verify that this works, check the [CircleCI logs](https://app.circleci.com/pipelines/github/pytorch/pytorch/258628/workflows/9cfadc34-e042-485e-b3b3-dc251f160307) for a test job run on this PR; for example:
- pytorch_linux_bionic_py3_6_clang9_test

To test locally, use the following steps.

First run an arbitrary test suite (you need to have some XML reports so that `test/print_test_stats.py` runs, but we'll be ignoring them here via the `--use-json` CLI option):
```
$ DATA_DIR=/tmp
$ ARBITRARY_TEST=testing
$ python test/test_$ARBITRARY_TEST.py --save-xml=$DATA_DIR/test/test_$ARBITRARY_TEST
```
Now choose a commit and a test job (it has to be on `master` since we're going to grab the test time data from S3, and [we only upload test times to S3 on the `master`, `nightly`, and `release` branches](https://github.com/pytorch/pytorch/pull/49645)):
```
$ export CIRCLE_SHA1=c39fb9771d89632c5c3a163d3c00af3bef1bd489
$ export CIRCLE_JOB=pytorch_linux_bionic_py3_6_clang9_test
```
Download the `*.json.bz2` file(s) for that commit/job pair:
```
$ aws s3 cp s3://ossci-metrics/test_time/$CIRCLE_SHA1/$CIRCLE_JOB/ $DATA_DIR/ossci-metrics/test_time/$CIRCLE_SHA1/$CIRCLE_JOB --recursive
```
And feed everything into `test/print_test_stats.py`:
```
$ bzip2 -kdc $DATA_DIR/ossci-metrics/test_time/$CIRCLE_SHA1/$CIRCLE_JOB/*Z.json.bz2 | torch/testing/_internal/print_test_stats.py --compare-with-s3 --use-json=/dev/stdin $DATA_DIR/test/test_$ARBITRARY_TEST
```
The first part of the output should be the same as before this PR; here is the new part, at the end of the output:

- https://pastebin.com/Jj1svhAn

Reviewed By: malfet, izdeby

Differential Revision: D26317769

Pulled By: samestep

fbshipit-source-id: 1ba06cec0fafac77f9e7341d57079543052d73db
2021-02-08 15:35:21 -08:00

169 lines
5.0 KiB
Python

import argparse
import os
from typing import List, Optional, Tuple
from ..util.setting import (
JSON_FOLDER_BASE_DIR,
LOG_DIR,
CompilerType,
Option,
Test,
TestList,
TestType,
)
from ..util.utils import (
clean_up,
create_folder,
print_log,
raise_no_test_found_exception,
remove_file,
remove_folder,
)
from ..util.utils_init import add_arguments_utils, create_folders, get_options
from .utils import (
clean_up_gcda,
detect_compiler_type,
get_llvm_tool_path,
get_oss_binary_folder,
get_pytorch_folder,
)
BLOCKED_PYTHON_TESTS = {
"run_test.py",
"test_dataloader.py",
"test_multiprocessing.py",
"test_multiprocessing_spawn.py",
"test_utils.py",
}
def initialization() -> Tuple[Option, TestList, List[str]]:
# create folder if not exists
create_folders()
# add arguments
parser = argparse.ArgumentParser()
parser = add_arguments_utils(parser)
parser = add_arguments_oss(parser)
# parse arguments
(options, args_interested_folder, args_run_only, arg_clean) = parse_arguments(
parser
)
# clean up
if arg_clean:
clean_up_gcda()
clean_up()
# get test lists
test_list = get_test_list(args_run_only)
# get interested folder -- final report will only over these folders
interested_folders = empty_list_if_none(args_interested_folder)
# print initialization information
print_init_info()
# remove last time's log
remove_file(os.path.join(LOG_DIR, "log.txt"))
return (options, test_list, interested_folders)
def add_arguments_oss(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
parser.add_argument(
"--run-only",
help="only run certain test(s), for example: atest test_nn.py.",
nargs="*",
default=None,
)
return parser
def parse_arguments(
parser: argparse.ArgumentParser,
) -> Tuple[Option, Optional[List[str]], Optional[List[str]], Optional[bool]]:
# parse args
args = parser.parse_args()
# get option
options = get_options(args)
return (options, args.interest_only, args.run_only, args.clean)
def get_test_list_by_type(
run_only: Optional[List[str]], test_type: TestType
) -> TestList:
test_list: TestList = []
binary_folder = get_oss_binary_folder(test_type)
g = os.walk(binary_folder)
for _, _, file_list in g:
for file_name in file_list:
if run_only is not None and file_name not in run_only:
continue
# target pattern in oss is used in printing report -- which tests we have run
test: Test = Test(
name=file_name,
target_pattern=file_name,
test_set="",
test_type=test_type,
)
test_list.append(test)
return test_list
def get_test_list(run_only: Optional[List[str]]) -> TestList:
test_list: TestList = []
# add c++ test list
test_list.extend(get_test_list_by_type(run_only, TestType.CPP))
# add python test list
py_run_only = get_python_run_only(run_only)
test_list.extend(get_test_list_by_type(py_run_only, TestType.PY))
# not find any test to run
if not test_list:
raise_no_test_found_exception(
get_oss_binary_folder(TestType.CPP), get_oss_binary_folder(TestType.PY)
)
return test_list
def empty_list_if_none(arg_interested_folder: Optional[List[str]]) -> List[str]:
if arg_interested_folder is None:
return []
# if this argument is specified, just return itself
return arg_interested_folder
def gcc_export_init():
remove_folder(JSON_FOLDER_BASE_DIR)
create_folder(JSON_FOLDER_BASE_DIR)
def get_python_run_only(args_run_only: Optional[List[str]]) -> List[str]:
# if user specifies run-only option
if args_run_only:
return args_run_only
# if not specified, use default setting, different for gcc and clang
if detect_compiler_type() == CompilerType.GCC:
return ["run_test.py"]
else:
# for clang, some tests will result in too large intermidiate files that can't be merged by llvm, we need to skip them
run_only: List[str] = []
binary_folder = get_oss_binary_folder(TestType.PY)
g = os.walk(binary_folder)
for _, _, file_list in g:
for file_name in file_list:
if file_name in BLOCKED_PYTHON_TESTS or not file_name.endswith(".py"):
continue
run_only.append(file_name)
# only run tests in the first-level folder in test/
break
return run_only
def print_init_info() -> None:
print_log("pytorch folder: ", get_pytorch_folder())
print_log("cpp test binaries folder: ", get_oss_binary_folder(TestType.CPP))
print_log("python test scripts folder: ", get_oss_binary_folder(TestType.PY))
print_log("compiler type: ", detect_compiler_type().value)
print_log(
"llvm tool folder (only for clang, if you are using gcov please ignore it): ",
get_llvm_tool_path(),
)