mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary:
[Currently](faa4da49ff/.flake8 (L22)), our `.flake8` config file has the `exclude` pattern `scripts`. I'm guessing that this is just meant to exclude the top-level `scripts` dir from Flake8, but it also applies to the following (apparently erroneously):
- `.circleci/scripts`
- `.github/scripts`
- `test/scripts`
This PR corrects the problem by making all the `exclude` patterns (except for the wildcard `*.pyi` pattern) relative to the repository root. Also, since this PR already touches all the `exclude` lines, it also sorts them to help reduce merge conflicts when `.flake8` is edited in the future. This sorting happened to reveal that the `build` pattern was previously present twice, so now it has been deduplicated.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/55178
Test Plan:
Locally:
```
flake8
```
And also [in CI](https://github.com/pytorch/pytorch/pull/55178/checks?check_run_id=2249949511).
Reviewed By: janeyx99
Differential Revision: D27520412
Pulled By: samestep
fbshipit-source-id: 359275c10ca600ee4ce7906e3a7587ffaa4ae1ed
163 lines
6.6 KiB
Python
Executable File
163 lines
6.6 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""This script runs cuda-memcheck on the specified unit test. Each test case
|
|
is run in its isolated process with a timeout so that:
|
|
1) different test cases won't influence each other, and
|
|
2) in case of hang, the script would still finish in a finite amount of time.
|
|
The output will be written to a log file result.log
|
|
|
|
Example usage:
|
|
python run_cuda_memcheck.py ../test_torch.py 600
|
|
|
|
Note that running cuda-memcheck could be very slow.
|
|
"""
|
|
|
|
import asyncio
|
|
import torch
|
|
import multiprocessing
|
|
import argparse
|
|
import subprocess
|
|
import tqdm
|
|
import os
|
|
import sys
|
|
import cuda_memcheck_common as cmc
|
|
|
|
ALL_TESTS = []
|
|
GPUS = torch.cuda.device_count()
|
|
|
|
# parse arguments
|
|
parser = argparse.ArgumentParser(description="Run isolated cuda-memcheck on unit tests")
|
|
parser.add_argument('filename', help="the python file for a test, such as test_torch.py")
|
|
parser.add_argument('timeout', type=int, help='kill the test if it does not terminate in a certain amount of seconds')
|
|
parser.add_argument('--strict', action='store_true',
|
|
help='Whether to show cublas/cudnn errors. These errors are ignored by default because'
|
|
'cublas/cudnn does not run error-free under cuda-memcheck, and ignoring these errors')
|
|
parser.add_argument('--nproc', type=int, default=multiprocessing.cpu_count(),
|
|
help='Number of processes running tests, default to number of cores in the system')
|
|
parser.add_argument('--gpus', default='all',
|
|
help='GPU assignments for each process, it could be "all", or : separated list like "1,2:3,4:5,6"')
|
|
parser.add_argument('--ci', action='store_true',
|
|
help='Whether this script is executed in CI. When executed inside a CI, this script fails when '
|
|
'an error is detected. Also, it will not show tqdm progress bar, but directly print the error'
|
|
'to stdout instead.')
|
|
parser.add_argument('--nohang', action='store_true', help='Treat timeout as success')
|
|
parser.add_argument('--split', type=int, default=1, help='Split the job into pieces')
|
|
parser.add_argument('--rank', type=int, default=0, help='Which piece this process should pick')
|
|
args = parser.parse_args()
|
|
|
|
# Filters that ignores cublas/cudnn errors
|
|
# TODO (@zasdfgbnm): When can we remove this? Will cublas/cudnn run error-free under cuda-memcheck?
|
|
def is_ignored_only(output):
|
|
try:
|
|
report = cmc.parse(output)
|
|
except cmc.ParseError:
|
|
# in case the simple parser fails parsing the output of cuda memcheck
|
|
# then this error is never ignored.
|
|
return False
|
|
count_ignored_errors = 0
|
|
for e in report.errors:
|
|
if 'libcublas' in ''.join(e.stack) or 'libcudnn' in ''.join(e.stack) or 'libcufft' in ''.join(e.stack):
|
|
count_ignored_errors += 1
|
|
return count_ignored_errors == report.num_errors
|
|
|
|
# Set environment PYTORCH_CUDA_MEMCHECK=1 to allow skipping some tests
|
|
os.environ['PYTORCH_CUDA_MEMCHECK'] = '1'
|
|
|
|
# Discover tests:
|
|
# To get a list of tests, run:
|
|
# pytest --setup-only test/test_torch.py
|
|
# and then parse the output
|
|
proc = subprocess.Popen(['pytest', '--setup-only', args.filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
stdout, stderr = proc.communicate()
|
|
lines = stdout.decode().strip().splitlines()
|
|
for line in lines:
|
|
if '(fixtures used:' in line:
|
|
line = line.strip().split()[0]
|
|
line = line[line.find('::') + 2:]
|
|
line = line.replace('::', '.')
|
|
ALL_TESTS.append(line)
|
|
|
|
# Do a simple filtering:
|
|
# if 'cpu' or 'CPU' is in the name and 'cuda' or 'CUDA' is not in the name, then skip it
|
|
def is_cpu_only(name):
|
|
name = name.lower()
|
|
return ('cpu' in name) and not ('cuda' in name)
|
|
|
|
ALL_TESTS = [x for x in ALL_TESTS if not is_cpu_only(x)]
|
|
|
|
# Split all tests into chunks, and only on the selected chunk
|
|
ALL_TESTS.sort()
|
|
chunk_size = (len(ALL_TESTS) + args.split - 1) // args.split
|
|
start = chunk_size * args.rank
|
|
end = chunk_size * (args.rank + 1)
|
|
ALL_TESTS = ALL_TESTS[start:end]
|
|
|
|
# Run tests:
|
|
# Since running cuda-memcheck on PyTorch unit tests is very slow, these tests must be run in parallel.
|
|
# This is done by using the coroutine feature in new Python versions. A number of coroutines are created;
|
|
# they create subprocesses and awaiting them to finish. The number of running subprocesses could be
|
|
# specified by the user and by default is the same as the number of CPUs in the machine.
|
|
# These subprocesses are balanced across different GPUs on the system by assigning one devices per process,
|
|
# or as specified by the user
|
|
progress = 0
|
|
if not args.ci:
|
|
logfile = open('result.log', 'w')
|
|
progressbar = tqdm.tqdm(total=len(ALL_TESTS))
|
|
else:
|
|
logfile = sys.stdout
|
|
|
|
# create a fake progress bar that does not display anything
|
|
class ProgressbarStub:
|
|
def update(self, *args):
|
|
return
|
|
progressbar = ProgressbarStub()
|
|
|
|
async def run1(coroutine_id):
|
|
global progress
|
|
|
|
if args.gpus == 'all':
|
|
gpuid = coroutine_id % GPUS
|
|
else:
|
|
gpu_assignments = args.gpus.split(':')
|
|
assert args.nproc == len(gpu_assignments), 'Please specify GPU assignmnent for each process, separated by :'
|
|
gpuid = gpu_assignments[coroutine_id]
|
|
|
|
while progress < len(ALL_TESTS):
|
|
test = ALL_TESTS[progress]
|
|
progress += 1
|
|
cmd = f'CUDA_VISIBLE_DEVICES={gpuid} cuda-memcheck --error-exitcode 1 python {args.filename} {test}'
|
|
proc = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
|
|
try:
|
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), args.timeout)
|
|
except asyncio.TimeoutError:
|
|
print('Timeout:', test, file=logfile)
|
|
proc.kill()
|
|
if args.ci and not args.nohang:
|
|
sys.exit("Hang detected on cuda-memcheck")
|
|
else:
|
|
if proc.returncode == 0:
|
|
print('Success:', test, file=logfile)
|
|
else:
|
|
stdout = stdout.decode()
|
|
stderr = stderr.decode()
|
|
should_display = args.strict or not is_ignored_only(stdout)
|
|
if should_display:
|
|
print('Fail:', test, file=logfile)
|
|
print(stdout, file=logfile)
|
|
print(stderr, file=logfile)
|
|
if args.ci:
|
|
sys.exit("Failure detected on cuda-memcheck")
|
|
else:
|
|
print('Ignored:', test, file=logfile)
|
|
del proc
|
|
progressbar.update(1)
|
|
|
|
async def main():
|
|
tasks = [asyncio.ensure_future(run1(i)) for i in range(args.nproc)]
|
|
for t in tasks:
|
|
await t
|
|
|
|
if __name__ == '__main__':
|
|
loop = asyncio.get_event_loop()
|
|
loop.run_until_complete(main())
|