diff --git a/.lintrunner.toml b/.lintrunner.toml index 1f79f1eb971..4511520c546 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -964,7 +964,6 @@ exclude_patterns = [ 'test/jit/**', # should be run through test/test_jit.py 'test/ao/sparsity/**', # should be run through test/test_ao_sparsity.py 'test/fx/**', # should be run through test/test_fx.py - 'test/bottleneck_test/**', # excluded by test/run_test.py 'test/package/**', # excluded by test/run_test.py 'test/distributed/argparse_util_test.py', 'test/distributed/bin/test_script.py', @@ -1410,8 +1409,6 @@ exclude_patterns = [ 'torch/utils/benchmark/utils/timer.py', 'torch/utils/benchmark/utils/valgrind_wrapper/__init__.py', 'torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py', - 'torch/utils/bottleneck/__init__.py', - 'torch/utils/bottleneck/__main__.py', 'torch/utils/bundled_inputs.py', 'torch/utils/checkpoint.py', 'torch/utils/collect_env.py', diff --git a/docs/source/bottleneck.rst b/docs/source/bottleneck.rst deleted file mode 100644 index ed5caf3fff5..00000000000 --- a/docs/source/bottleneck.rst +++ /dev/null @@ -1,62 +0,0 @@ -torch.utils.bottleneck -====================== - -.. automodule:: torch.utils.bottleneck -.. currentmodule:: torch.utils.bottleneck - -`torch.utils.bottleneck` is a tool that can be used as an initial step for -debugging bottlenecks in your program. It summarizes runs of your script with -the Python profiler and PyTorch's autograd profiler. - -Run it on the command line with - -:: - - python -m torch.utils.bottleneck /path/to/source/script.py [args] - -where [args] are any number of arguments to `script.py`, or run -``python -m torch.utils.bottleneck -h`` for more usage instructions. - -.. warning:: - Because your script will be profiled, please ensure that it exits in a - finite amount of time. - -.. warning:: - Due to the asynchronous nature of CUDA kernels, when running against - CUDA code, the cProfile output and CPU-mode autograd profilers may - not show correct timings: the reported CPU time reports the amount of time - used to launch the kernels but does not include the time the kernel - spent executing on a GPU unless the operation does a synchronize. - Ops that do synchronize appear to be extremely expensive under regular - CPU-mode profilers. - In these case where timings are incorrect, the CUDA-mode autograd profiler - may be helpful. - -.. note:: - To decide which (CPU-only-mode or CUDA-mode) autograd profiler output to - look at, you should first check if your script is CPU-bound - ("CPU total time is much greater than CUDA total time"). - If it is CPU-bound, looking at the results of the CPU-mode autograd - profiler will help. If on the other hand your script spends most of its - time executing on the GPU, then it makes sense to start - looking for responsible CUDA operators in the output of the CUDA-mode - autograd profiler. - - Of course the reality is much more complicated and your script might not be - in one of those two extremes depending on the part of the model you're - evaluating. If the profiler outputs don't help, you could try looking at - the result of :func:`torch.autograd.profiler.emit_nvtx()` with ``nvprof``. - However, please take into account that the NVTX overhead is very high and - often gives a heavily skewed timeline. Similarly, ``Intel® VTune™ Profiler`` - helps to analyze performance on Intel platforms further with - :func:`torch.autograd.profiler.emit_itt()`. - -.. warning:: - If you are profiling CUDA code, the first profiler that ``bottleneck`` runs - (cProfile) will include the CUDA startup time (CUDA buffer allocation cost) - in its time reporting. This should not matter if your bottlenecks result - in code much slower than the CUDA startup time. - -For more complicated uses of the profilers (like in a multi-GPU case), -please see https://docs.python.org/3/library/profile.html -or :func:`torch.autograd.profiler.profile()` for more information. diff --git a/docs/source/pytorch-api.md b/docs/source/pytorch-api.md index 2e858079d23..3b3f0f627bd 100644 --- a/docs/source/pytorch-api.md +++ b/docs/source/pytorch-api.md @@ -76,7 +76,6 @@ storage torch.testing torch.utils torch.utils.benchmark -torch.utils.bottleneck torch.utils.checkpoint torch.utils.cpp_extension torch.utils.data diff --git a/test/bottleneck_test/test.py b/test/bottleneck_test/test.py deleted file mode 100644 index 0549a6372ab..00000000000 --- a/test/bottleneck_test/test.py +++ /dev/null @@ -1,7 +0,0 @@ -# Owner(s): ["module: unknown"] - -import torch - - -x = torch.ones((3, 3), requires_grad=True) -(3 * x).sum().backward() diff --git a/test/bottleneck_test/test_args.py b/test/bottleneck_test/test_args.py deleted file mode 100644 index 38fc03701bf..00000000000 --- a/test/bottleneck_test/test_args.py +++ /dev/null @@ -1,17 +0,0 @@ -# Owner(s): ["module: unknown"] - -import argparse - -import torch - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - # Required args. Raises error if they aren't passed. - parser.add_argument("--foo", help="foo", required=True) - parser.add_argument("--bar", help="bar", required=True) - _ = parser.parse_args() - - x = torch.ones((3, 3), requires_grad=True) - (3 * x).sum().backward() diff --git a/test/bottleneck_test/test_cuda.py b/test/bottleneck_test/test_cuda.py deleted file mode 100644 index d9f9b0b8274..00000000000 --- a/test/bottleneck_test/test_cuda.py +++ /dev/null @@ -1,29 +0,0 @@ -# Owner(s): ["module: unknown"] - -import torch -import torch.nn as nn - - -class Model(nn.Module): - def __init__(self) -> None: - super().__init__() - self.linear = nn.Linear(20, 20) - - def forward(self, input): - out = self.linear(input[:, 10:30]) - return out.sum() - - -def main(): - data = torch.randn(10, 50).cuda() - model = Model().cuda() - optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - for _ in range(10): - optimizer.zero_grad() - loss = model(data) - loss.backward() - optimizer.step() - - -if __name__ == "__main__": - main() diff --git a/test/test_utils.py b/test/test_utils.py index 0314da6e320..7c9e4c1d334 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -3,7 +3,6 @@ import os import random -import re import shutil import subprocess import sys @@ -633,151 +632,6 @@ class TestDataLoaderUtils(TestCase): test_dir = os.path.abspath(os.path.dirname(str(__file__))) -@unittest.skipIf( - "SKIP_TEST_BOTTLENECK" in os.environ.keys(), "SKIP_TEST_BOTTLENECK is set" -) -class TestBottleneck(TestCase): - def _run(self, command, timeout=30): - """Returns (return-code, stdout, stderr)""" - import subprocess - - p = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True, - ) - try: - output, err = p.communicate(timeout=timeout) - except subprocess.TimeoutExpired: - p.kill() - output, err = p.communicate() - rc = p.returncode - output_str = output.decode("ascii") - err_str = err.decode("ascii") - return (rc, output_str, err_str) - - def _run_bottleneck(self, test_file, scriptargs=""): - curdir = os.path.dirname(os.path.abspath(__file__)) - filepath = f"{curdir}/{test_file}" - if scriptargs != "": - scriptargs = f" {scriptargs}" - rc, out, err = self._run( - f"{sys.executable} -m torch.utils.bottleneck {filepath}{scriptargs}" - ) - return rc, out, err - - def _check_run_args(self): - # Check that this fails due to missing args - rc, out, err = self._run_bottleneck("bottleneck_test/test_args.py") - self.assertEqual( - rc, - 2, - atol=0, - rtol=0, - msg=self._fail_msg("Missing args should error", out + err), - ) - - # This should succeed - rc, out, err = self._run_bottleneck( - "bottleneck_test/test_args.py", "--foo foo --bar bar" - ) - self.assertEqual( - rc, - 0, - atol=0, - rtol=0, - msg=self._fail_msg("Should pass args to script", out + err), - ) - - def _fail_msg(self, msg, output): - return f"{msg}, output was:\n{output}" - - def _check_environment_summary(self, output): - results = re.search("Environment Summary", output) - self.assertIsNotNone( - results, self._fail_msg("Should have Environment Summary", output) - ) - - # Up to five lines away from the heading, there should be the version number - results = re.search( - r"Environment Summary.*(\n.*){,5}\nPyTorch \d+\.\d+", output - ) - self.assertIsNotNone( - results, self._fail_msg("Should have PyTorch version", output) - ) - - def _check_cprof_summary(self, output): - results = re.search("cProfile output", output) - self.assertIsNotNone( - results, self._fail_msg("Should have cProfile output", output) - ) - - # This assumes that after the cProfile output section we have - # the autograd profiler output - results = re.search( - r"cProfile output.*(\n.*){6,50}\n.*autograd profiler output", output - ) - self.assertIsNotNone( - results, - self._fail_msg( - "Distance between cProfile and autograd prof out not in [6, 50] lines", - output, - ), - ) - - def _check_autograd_summary(self, output): - results = re.search("autograd profiler output", output) - self.assertIsNotNone( - results, self._fail_msg("Should have autograd profiler output", output) - ) - - # This assumes that after the autograd profiler output is the end of the - # output. - results = re.search(r"autograd profiler output.*(\n.*){6,100}", output) - self.assertIsNotNone( - results, - self._fail_msg( - "Distance between autograd prof output and end of output not in [6, 100] lines", - output, - ), - ) - - def _check_cuda(self, output): - if HAS_CUDA: - results = re.search("CUDA mode", output) - self.assertIsNotNone( - results, self._fail_msg("Should tell users CUDA", output) - ) - else: - results = re.search("CUDA mode", output) - self.assertIsNone( - results, self._fail_msg("Should not tell users about CUDA", output) - ) - - @unittest.skipIf(HAS_CUDA, "CPU-only test") - def test_bottleneck_cpu_only(self): - rc, out, err = self._run_bottleneck("bottleneck_test/test.py") - self.assertEqual(rc, 0, msg=f"Run failed with\n{err}") - - self._check_run_args() - self._check_environment_summary(out) - self._check_autograd_summary(out) - self._check_cprof_summary(out) - self._check_cuda(out) - - @unittest.skipIf(not HAS_CUDA, "No CUDA") - def test_bottleneck_cuda(self): - rc, out, err = self._run_bottleneck("bottleneck_test/test_cuda.py") - self.assertEqual(rc, 0, msg=f"Run failed with\n{err}") - - self._check_run_args() - self._check_environment_summary(out) - self._check_autograd_summary(out) - self._check_cprof_summary(out) - self._check_cuda(out) - - from torch.utils.collect_env import get_pretty_env_info diff --git a/tools/testing/discover_tests.py b/tools/testing/discover_tests.py index 25fcf07de93..13511b1ec12 100644 --- a/tools/testing/discover_tests.py +++ b/tools/testing/discover_tests.py @@ -73,7 +73,6 @@ TESTS = discover_tests( cpp_tests_dir=CPP_TESTS_DIR, blocklisted_patterns=[ "ao", - "bottleneck_test", "custom_backend", "custom_operator", "fx", # executed by test_fx.py diff --git a/torch/utils/bottleneck/__init__.py b/torch/utils/bottleneck/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/torch/utils/bottleneck/__main__.py b/torch/utils/bottleneck/__main__.py deleted file mode 100644 index d8bc43be0e2..00000000000 --- a/torch/utils/bottleneck/__main__.py +++ /dev/null @@ -1,229 +0,0 @@ -# mypy: allow-untyped-defs -import argparse -import cProfile -import pstats -import sys -import os - -import torch -from torch.autograd import profiler -from torch.utils.collect_env import get_env_info - - -def redirect_argv(new_argv): - sys.argv[:] = new_argv[:] - - -def compiled_with_cuda(sysinfo): - if sysinfo.cuda_compiled_version: - return f'compiled w/ CUDA {sysinfo.cuda_compiled_version}' - return 'not compiled w/ CUDA' - - -env_summary = """ --------------------------------------------------------------------------------- - Environment Summary --------------------------------------------------------------------------------- -PyTorch {pytorch_version}{debug_str} {cuda_compiled} -Running with Python {py_version} and {cuda_runtime} - -`{pip_version} list` truncated output: -{pip_list_output} -""".strip() - - -def run_env_analysis(): - print('Running environment analysis...') - info = get_env_info() - - result: dict[str, str] = {} - - debug_str = '' - if info.is_debug_build: - debug_str = ' DEBUG' - - cuda_avail = '' - if info.is_cuda_available: - cuda = info.cuda_runtime_version - if cuda is not None: - cuda_avail = 'CUDA ' + cuda - else: - cuda = 'CUDA unavailable' - - pip_version = info.pip_version - pip_list_output = info.pip_packages - if pip_list_output is None: - pip_list_output = 'Unable to fetch' - - result = { - 'debug_str': debug_str, - 'pytorch_version': info.torch_version, - 'cuda_compiled': compiled_with_cuda(info), - 'py_version': f'{sys.version_info[0]}.{sys.version_info[1]}', - 'cuda_runtime': cuda_avail, - 'pip_version': pip_version, - 'pip_list_output': pip_list_output, - } - - return env_summary.format(**result) - - -def run_cprofile(code, globs, launch_blocking=False): - print('Running your script with cProfile') - prof = cProfile.Profile() - prof.enable() - exec(code, globs, None) - prof.disable() - return prof - - -cprof_summary = """ --------------------------------------------------------------------------------- - cProfile output --------------------------------------------------------------------------------- -""".strip() - - -def print_cprofile_summary(prof, sortby='tottime', topk=15): - print(cprof_summary) - cprofile_stats = pstats.Stats(prof).sort_stats(sortby) - cprofile_stats.print_stats(topk) - - -def run_autograd_prof(code, globs): - def run_prof(use_cuda=False): - with profiler.profile(use_cuda=use_cuda) as prof: - exec(code, globs, None) - return prof - - print('Running your script with the autograd profiler...') - result = [run_prof(use_cuda=False)] - if torch.cuda.is_available(): - result.append(run_prof(use_cuda=True)) - else: - result.append(None) - - return result - - -autograd_prof_summary = """ --------------------------------------------------------------------------------- - autograd profiler output ({mode} mode) --------------------------------------------------------------------------------- - {description} -{cuda_warning} -{output} -""".strip() - - -def print_autograd_prof_summary(prof, mode, sortby='cpu_time', topk=15): - valid_sortby = ['cpu_time', 'cuda_time', 'cpu_time_total', 'cuda_time_total', 'count'] - if sortby not in valid_sortby: - warn = ('WARNING: invalid sorting option for autograd profiler results: {}\n' - 'Expected `cpu_time`, `cpu_time_total`, or `count`. ' - 'Defaulting to `cpu_time`.') - print(warn.format(sortby)) - sortby = 'cpu_time' - - if mode == 'CUDA': - cuda_warning = ('\n\tBecause the autograd profiler uses the CUDA event API,\n' - '\tthe CUDA time column reports approximately max(cuda_time, cpu_time).\n' - '\tPlease ignore this output if your code does not use CUDA.\n') - else: - cuda_warning = '' - - sorted_events = sorted(prof.function_events, - key=lambda x: getattr(x, sortby), reverse=True) - topk_events = sorted_events[:topk] - - result = { - 'mode': mode, - 'description': f'top {topk} events sorted by {sortby}', - 'output': torch.autograd.profiler_util._build_table(topk_events), - 'cuda_warning': cuda_warning - } - - print(autograd_prof_summary.format(**result)) - - -descript = """ -`bottleneck` is a tool that can be used as an initial step for debugging -bottlenecks in your program. - -It summarizes runs of your script with the Python profiler and PyTorch\'s -autograd profiler. Because your script will be profiled, please ensure that it -exits in a finite amount of time. - -For more complicated uses of the profilers, please see -https://docs.python.org/3/library/profile.html and -https://pytorch.org/docs/main/autograd.html#profiler for more information. -""".strip() - - -def parse_args(): - parser = argparse.ArgumentParser(description=descript) - parser.add_argument('scriptfile', type=str, - help='Path to the script to be run. ' - 'Usually run with `python path/to/script`.') - parser.add_argument('args', type=str, nargs=argparse.REMAINDER, - help='Command-line arguments to be passed to the script.') - return parser.parse_args() - - -def cpu_time_total(autograd_prof): - return sum(event.cpu_time_total for event in autograd_prof.function_events) - - -def main(): - args = parse_args() - - # Customizable constants. - scriptfile = args.scriptfile - scriptargs = [] if args.args is None else args.args - scriptargs.insert(0, scriptfile) - cprofile_sortby = 'tottime' - cprofile_topk = 15 - autograd_prof_sortby = 'cpu_time_total' - autograd_prof_topk = 15 - - redirect_argv(scriptargs) - - sys.path.insert(0, os.path.dirname(scriptfile)) - with open(scriptfile, 'rb') as stream: - code = compile(stream.read(), scriptfile, 'exec') - globs = { - '__file__': scriptfile, - '__name__': '__main__', - '__package__': None, - '__cached__': None, - } - - print(descript) - - env_summary = run_env_analysis() - - if torch.cuda.is_available(): - torch.cuda.init() - cprofile_prof = run_cprofile(code, globs) - autograd_prof_cpu, autograd_prof_cuda = run_autograd_prof(code, globs) - - print(env_summary) - print_cprofile_summary(cprofile_prof, cprofile_sortby, cprofile_topk) - - if not torch.cuda.is_available(): - print_autograd_prof_summary(autograd_prof_cpu, 'CPU', autograd_prof_sortby, autograd_prof_topk) - return - - # Print both the result of the CPU-mode and CUDA-mode autograd profilers - # if their execution times are very different. - cuda_prof_exec_time = cpu_time_total(autograd_prof_cuda) - if len(autograd_prof_cpu.function_events) > 0: - cpu_prof_exec_time = cpu_time_total(autograd_prof_cpu) - pct_diff = (cuda_prof_exec_time - cpu_prof_exec_time) / cuda_prof_exec_time - if abs(pct_diff) > 0.05: - print_autograd_prof_summary(autograd_prof_cpu, 'CPU', autograd_prof_sortby, autograd_prof_topk) - - print_autograd_prof_summary(autograd_prof_cuda, 'CUDA', autograd_prof_sortby, autograd_prof_topk) - -if __name__ == '__main__': - main()