mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: expecttest and test_expecttest are the implementation and tests for this functionality. I wired it up to the --accept flag, but there's also a new environment variable EXPECTTEST_ACCEPT which may be more convenient to trigger. Haven't tested if this works in fbcode. There may be a few expect tests which will benefit from inline treatment, but I just did one to show it works. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/12825 Reviewed By: teng-li Differential Revision: D10448630 Pulled By: ezyang fbshipit-source-id: 3d339f82e2d00891309620a60e13039fa1ed8b46
395 lines
13 KiB
Python
395 lines
13 KiB
Python
#!/usr/bin/env python
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import os
|
|
import shlex
|
|
import shutil
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
|
|
import torch
|
|
from torch.utils import cpp_extension
|
|
from common_utils import TEST_WITH_ROCM
|
|
import torch.distributed as dist
|
|
|
|
TESTS = [
|
|
'autograd',
|
|
'cpp_extensions',
|
|
'c10d',
|
|
'cuda',
|
|
'cuda_primary_ctx',
|
|
'dataloader',
|
|
'distributed',
|
|
'distributions',
|
|
'expecttest',
|
|
'indexing',
|
|
'jit',
|
|
'multiprocessing',
|
|
'nccl',
|
|
'nn',
|
|
'numba_integration',
|
|
'optim',
|
|
'sparse',
|
|
'thd_distributed',
|
|
'torch',
|
|
'type_info',
|
|
'utils',
|
|
]
|
|
|
|
WINDOWS_BLACKLIST = [
|
|
'distributed',
|
|
'thd_distributed',
|
|
]
|
|
|
|
ROCM_BLACKLIST = [
|
|
'c10d',
|
|
'cpp_extensions',
|
|
'distributed',
|
|
'distributions',
|
|
'multiprocessing',
|
|
'nccl',
|
|
'thd_distributed',
|
|
'utils',
|
|
]
|
|
|
|
DISTRIBUTED_TESTS_CONFIG = {
|
|
'gloo': {
|
|
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
|
|
},
|
|
}
|
|
|
|
|
|
if dist.is_available():
|
|
if dist.is_mpi_available():
|
|
DISTRIBUTED_TESTS_CONFIG['mpi'] = {
|
|
'WORLD_SIZE': '3'
|
|
}
|
|
if dist.is_nccl_available():
|
|
DISTRIBUTED_TESTS_CONFIG['nccl'] = {
|
|
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
|
|
}
|
|
|
|
|
|
THD_DISTRIBUTED_TESTS_CONFIG = {
|
|
'tcp': {
|
|
'WORLD_SIZE': '3'
|
|
},
|
|
'gloo': {
|
|
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
|
|
},
|
|
# THD NCCL and MPI tests are known to be flaky in CI
|
|
}
|
|
|
|
# https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
|
|
SIGNALS_TO_NAMES_DICT = dict((getattr(signal, n), n) for n in dir(signal)
|
|
if n.startswith('SIG') and '_' not in n)
|
|
|
|
|
|
def print_to_stderr(message):
|
|
print(message, file=sys.stderr)
|
|
|
|
|
|
def shell(command, cwd):
|
|
sys.stdout.flush()
|
|
sys.stderr.flush()
|
|
return subprocess.call(
|
|
shlex.split(command), universal_newlines=True, cwd=cwd)
|
|
|
|
|
|
def get_shell_output(command):
|
|
return subprocess.check_output(shlex.split(command)).decode().strip()
|
|
|
|
|
|
def run_test(python, test_module, test_directory, options):
|
|
unittest_args = options.additional_unittest_args
|
|
if options.verbose:
|
|
unittest_args.append('--verbose')
|
|
unittest_args = ' '.join(unittest_args)
|
|
# Can't call `python -m unittest test_*` here because it doesn't run code
|
|
# in `if __name__ == '__main__': `. So call `python test_*.py` instead.
|
|
return shell('{} {}.py {}'.format(python, test_module, unittest_args),
|
|
test_directory)
|
|
|
|
|
|
def test_cpp_extensions(python, test_module, test_directory, options):
|
|
try:
|
|
cpp_extension.verify_ninja_availability()
|
|
except RuntimeError:
|
|
print(
|
|
'Ninja is not available. Skipping C++ extensions test. '
|
|
"Install ninja with 'pip install ninja' or 'conda install ninja'.")
|
|
return 0
|
|
return_code = shell('{} setup.py install --root ./install'.format(python),
|
|
os.path.join(test_directory, 'cpp_extensions'))
|
|
if return_code != 0:
|
|
return return_code
|
|
|
|
python_path = os.environ.get('PYTHONPATH', '')
|
|
try:
|
|
cpp_extensions = os.path.join(test_directory, 'cpp_extensions')
|
|
install_directory = ''
|
|
# install directory is the one that is named site-packages
|
|
for root, directories, _ in os.walk(os.path.join(cpp_extensions, 'install')):
|
|
for directory in directories:
|
|
if '-packages' in directory:
|
|
install_directory = os.path.join(root, directory)
|
|
|
|
assert install_directory, 'install_directory must not be empty'
|
|
os.environ['PYTHONPATH'] = os.pathsep.join([install_directory, python_path])
|
|
return run_test(python, test_module, test_directory, options)
|
|
finally:
|
|
os.environ['PYTHONPATH'] = python_path
|
|
|
|
|
|
def test_distributed(python, test_module, test_directory, options):
|
|
mpi_available = subprocess.call('command -v mpiexec', shell=True) == 0
|
|
if options.verbose and not mpi_available:
|
|
print_to_stderr(
|
|
'MPI not available -- MPI backend tests will be skipped')
|
|
config = DISTRIBUTED_TESTS_CONFIG
|
|
if test_module == "test_thd_distributed":
|
|
config = THD_DISTRIBUTED_TESTS_CONFIG
|
|
for backend, env_vars in config.items():
|
|
if backend == 'mpi' and not mpi_available:
|
|
continue
|
|
for with_init_file in {True, False}:
|
|
tmp_dir = tempfile.mkdtemp()
|
|
if options.verbose:
|
|
with_init = ' with file init_method' if with_init_file else ''
|
|
print_to_stderr(
|
|
'Running distributed tests for the {} backend{}'.format(
|
|
backend, with_init))
|
|
os.environ['TEMP_DIR'] = tmp_dir
|
|
os.environ['BACKEND'] = backend
|
|
os.environ['INIT_METHOD'] = 'env://'
|
|
os.environ.update(env_vars)
|
|
if with_init_file:
|
|
if test_module == "test_distributed":
|
|
init_method = 'file://{}/'.format(tmp_dir)
|
|
else:
|
|
init_method = 'file://{}/shared_init_file'.format(tmp_dir)
|
|
os.environ['INIT_METHOD'] = init_method
|
|
try:
|
|
os.mkdir(os.path.join(tmp_dir, 'barrier'))
|
|
os.mkdir(os.path.join(tmp_dir, 'test_dir'))
|
|
if backend == 'mpi':
|
|
# test mpiexec for --noprefix option
|
|
devnull = open(os.devnull, 'w')
|
|
noprefix_opt = '--noprefix' if subprocess.call(
|
|
'mpiexec -n 1 --noprefix bash -c ""', shell=True,
|
|
stdout=devnull, stderr=subprocess.STDOUT) == 0 else ''
|
|
|
|
mpiexec = 'mpiexec -n 3 {} {}'.format(noprefix_opt, python)
|
|
|
|
return_code = run_test(mpiexec, test_module,
|
|
test_directory, options)
|
|
else:
|
|
return_code = run_test(python, test_module, test_directory,
|
|
options)
|
|
if return_code != 0:
|
|
return return_code
|
|
finally:
|
|
shutil.rmtree(tmp_dir)
|
|
return 0
|
|
|
|
|
|
CUSTOM_HANDLERS = {
|
|
'cpp_extensions': test_cpp_extensions,
|
|
'distributed': test_distributed,
|
|
'thd_distributed': test_distributed,
|
|
}
|
|
|
|
|
|
def parse_test_module(test):
|
|
return test.split('.')[0]
|
|
|
|
|
|
class TestChoices(list):
|
|
def __init__(self, *args, **kwargs):
|
|
super(TestChoices, self).__init__(args[0])
|
|
|
|
def __contains__(self, item):
|
|
return list.__contains__(self, parse_test_module(item))
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description='Run the PyTorch unit test suite',
|
|
epilog='where TESTS is any of: {}'.format(', '.join(TESTS)))
|
|
parser.add_argument(
|
|
'-v',
|
|
'--verbose',
|
|
action='store_true',
|
|
help='print verbose information and test-by-test results')
|
|
parser.add_argument(
|
|
'-p', '--python', help='the python interpreter to execute tests with')
|
|
parser.add_argument(
|
|
'-c', '--coverage', action='store_true', help='enable coverage')
|
|
parser.add_argument(
|
|
'-i',
|
|
'--include',
|
|
nargs='+',
|
|
choices=TestChoices(TESTS),
|
|
default=TESTS,
|
|
metavar='TESTS',
|
|
help='select a set of tests to include (defaults to ALL tests).'
|
|
' tests can be specified with module name, module.TestClass'
|
|
' or module.TestClass.test_method')
|
|
parser.add_argument(
|
|
'-x',
|
|
'--exclude',
|
|
nargs='+',
|
|
choices=TESTS,
|
|
metavar='TESTS',
|
|
default=[],
|
|
help='select a set of tests to exclude')
|
|
parser.add_argument(
|
|
'-f',
|
|
'--first',
|
|
choices=TESTS,
|
|
metavar='TESTS',
|
|
help='select the test to start from (excludes previous tests)')
|
|
parser.add_argument(
|
|
'-l',
|
|
'--last',
|
|
choices=TESTS,
|
|
metavar='TESTS',
|
|
help='select the last test to run (excludes following tests)')
|
|
parser.add_argument(
|
|
'--ignore-win-blacklist',
|
|
action='store_true',
|
|
help='always run blacklisted windows tests')
|
|
parser.add_argument(
|
|
'additional_unittest_args',
|
|
nargs='*',
|
|
help='additional arguments passed through to unittest, e.g., '
|
|
'python run_test.py -i sparse -- TestSparse.test_factory_size_check')
|
|
return parser.parse_args()
|
|
|
|
|
|
def get_python_command(options):
|
|
if options.coverage:
|
|
return 'coverage run --parallel-mode --source torch'
|
|
elif options.python:
|
|
return options.python
|
|
else:
|
|
return os.environ.get('PYCMD', 'python')
|
|
|
|
|
|
def find_test_index(test, selected_tests, find_last_index=False):
|
|
"""Find the index of the first or last occurrence of a given test/test module in the list of seleceted tests.
|
|
|
|
This function is used to determine the indexes when slicing the list of selected tests when
|
|
``options.first``(:attr:`find_last_index`=False) and/or ``options.last``(:attr:`find_last_index`=True) are used.
|
|
|
|
:attr:`selected_tests` can be a list that contains multiple consequent occurrences of tests
|
|
as part of the same test module, e.g.:
|
|
|
|
```
|
|
selected_tests = ['autograd', 'cuda', **'torch.TestTorch.test_acos',
|
|
'torch.TestTorch.test_tan', 'torch.TestTorch.test_add'**, 'utils']
|
|
```
|
|
|
|
If :attr:`test`='torch' and :attr:`find_last_index`=False, result should be **2**.
|
|
If :attr:`test`='torch' and :attr:`find_last_index`=True, result should be **4**.
|
|
|
|
Arguments:
|
|
test (str): Name of test to lookup
|
|
selected_tests (list): List of tests
|
|
find_last_index (bool, optional): should we lookup the index of first or last
|
|
occurrence (first is default)
|
|
|
|
Returns:
|
|
index of the first or last occurance of the given test
|
|
"""
|
|
idx = 0
|
|
found_idx = -1
|
|
for t in selected_tests:
|
|
if t.startswith(test):
|
|
found_idx = idx
|
|
if not find_last_index:
|
|
break
|
|
idx += 1
|
|
return found_idx
|
|
|
|
|
|
def exclude_tests(exclude_list, selected_tests, exclude_message=None):
|
|
tests_copy = selected_tests[:]
|
|
for exclude_test in exclude_list:
|
|
for test in tests_copy:
|
|
if test.startswith(exclude_test):
|
|
if exclude_message is not None:
|
|
print_to_stderr('Excluding {} {}'.format(test, exclude_message))
|
|
selected_tests.remove(test)
|
|
return selected_tests
|
|
|
|
|
|
def get_selected_tests(options):
|
|
selected_tests = options.include
|
|
|
|
if options.first:
|
|
first_index = find_test_index(options.first, selected_tests)
|
|
selected_tests = selected_tests[first_index:]
|
|
|
|
if options.last:
|
|
last_index = find_test_index(options.last, selected_tests, find_last_index=True)
|
|
selected_tests = selected_tests[:last_index + 1]
|
|
|
|
selected_tests = exclude_tests(options.exclude, selected_tests)
|
|
|
|
if sys.platform == 'win32' and not options.ignore_win_blacklist:
|
|
ostype = os.environ.get('MSYSTEM')
|
|
target_arch = os.environ.get('VSCMD_ARG_TGT_ARCH')
|
|
if ostype != 'MINGW64' or target_arch != 'x64':
|
|
WINDOWS_BLACKLIST.append('cpp_extensions')
|
|
|
|
selected_tests = exclude_tests(WINDOWS_BLACKLIST, selected_tests, 'on Windows')
|
|
|
|
elif TEST_WITH_ROCM:
|
|
selected_tests = exclude_tests(ROCM_BLACKLIST, selected_tests, 'on ROCm')
|
|
|
|
return selected_tests
|
|
|
|
|
|
def main():
|
|
options = parse_args()
|
|
python = get_python_command(options)
|
|
test_directory = os.path.dirname(os.path.abspath(__file__))
|
|
selected_tests = get_selected_tests(options)
|
|
|
|
if options.verbose:
|
|
print_to_stderr('Selected tests: {}'.format(', '.join(selected_tests)))
|
|
|
|
if options.coverage:
|
|
shell('coverage erase')
|
|
|
|
for test in selected_tests:
|
|
test_name = 'test_{}'.format(test)
|
|
test_module = parse_test_module(test)
|
|
|
|
print_to_stderr('Running {} ...'.format(test_name))
|
|
handler = CUSTOM_HANDLERS.get(test_module, run_test)
|
|
return_code = handler(python, test_name, test_directory, options)
|
|
assert isinstance(return_code, int) and not isinstance(
|
|
return_code, bool), 'Return code should be an integer'
|
|
if return_code != 0:
|
|
message = '{} failed!'.format(test_name)
|
|
if return_code < 0:
|
|
# subprocess.Popen returns the child process' exit signal as
|
|
# return code -N, where N is the signal number.
|
|
signal_name = SIGNALS_TO_NAMES_DICT[-return_code]
|
|
message += ' Received signal: {}'.format(signal_name)
|
|
raise RuntimeError(message)
|
|
|
|
if options.coverage:
|
|
shell('coverage combine')
|
|
shell('coverage html')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|