pytorch/benchmarks/operator_benchmark/benchmark_pytorch.py
Sam Estep e3900d2ba5 Add lint for unqualified noqa (#56272)
Summary:
As this diff shows, currently there are a couple hundred instances of raw `noqa` in the codebase, which just ignore all errors on a given line. That isn't great, so this PR changes all existing instances of that antipattern to qualify the `noqa` with respect to a specific error code, and adds a lint to prevent more of this from happening in the future.

Interestingly, some of the examples the `noqa` lint catches are genuine attempts to qualify the `noqa` with a specific error code, such as these two:
```
test/jit/test_misc.py:27:            print(f"{hello + ' ' + test}, I'm a {test}") # noqa E999
test/jit/test_misc.py:28:            print(f"format blank") # noqa F541
```
However, those are still wrong because they are [missing a colon](https://flake8.pycqa.org/en/3.9.1/user/violations.html#in-line-ignoring-errors), which actually causes the error code to be completely ignored:

- If you change them to anything else, the warnings will still be suppressed.
- If you add the necessary colons then it is revealed that `E261` was also being suppressed, unintentionally:
  ```
  test/jit/test_misc.py:27:57: E261 at least two spaces before inline comment
  test/jit/test_misc.py:28:35: E261 at least two spaces before inline comment
  ```

I did try using [flake8-noqa](https://pypi.org/project/flake8-noqa/) instead of a custom `git grep` lint, but it didn't seem to work. This PR is definitely missing some of the functionality that flake8-noqa is supposed to provide, though, so if someone can figure out how to use it, we should do that instead.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/56272

Test Plan:
CI should pass on the tip of this PR, and we know that the lint works because the following CI run (before this PR was finished) failed:

- https://github.com/pytorch/pytorch/runs/2365189927

Reviewed By: janeyx99

Differential Revision: D27830127

Pulled By: samestep

fbshipit-source-id: d6dcf4f945ebd18cd76c46a07f3b408296864fcb
2021-04-19 13:16:18 -07:00

197 lines
7.5 KiB
Python

import time
import json
import torch
import cpp_extension # noqa: F401
"""PyTorch performance microbenchmarks.
This module contains PyTorch-specific functionalities for performance
microbenchmarks.
"""
class TorchBenchmarkBase(torch.nn.Module):
""" This is a base class used to create Pytorch operator benchmark.
module_name is the name of the operator being benchmarked.
test_name is the name (it's created by concatenating all the
inputs) of a specific test
"""
def __init__(self):
super(TorchBenchmarkBase, self).__init__()
self.user_given_name = None
self._pass_count = 0
self._num_inputs_require_grads = 0
def _set_backward_test(self, is_backward):
self._is_backward = is_backward
def auto_set(self):
""" This is used to automatically set the require_grad for the backward patch.
It is implemented based on two counters. One counter to save the number of
times init has been called. The other counter to save the number of times
this function itself has been called. In the very first time init is called,
this function counts how many inputs require gradient. In each of the
following init calls, this function will return only one true value.
Here is an example:
...
self.v1 = torch.rand(M, N, K, requires_grad=self.auto_set())
self.v2 = torch.rand(M, N, K, requires_grad=self.auto_set())
...
"""
if not self._is_backward:
return False
if self._pass_count == 0:
self._num_inputs_require_grads += 1
return True
else:
self._auto_set_counter += 1
return (self._pass_count == self._auto_set_counter)
def extract_inputs_tuple(self):
self.inputs_tuple = tuple(self.inputs.values())
@torch.jit.export
def get_inputs(self):
# Need to convert the inputs to tuple outside of JIT so that
# JIT can infer the size of the inputs.
return self.inputs_tuple
@torch.jit.export
def forward_impl(self):
# This is to supply the inputs to the forward function which
# will be called in both the eager and JIT mode of local runs
return self.forward(*self.get_inputs())
@torch.jit.export
def forward_consume(self, iters: int):
# _consume is used to avoid the dead-code-elimination optimization
for _ in range(iters):
torch.ops.operator_benchmark._consume(self.forward_impl())
def module_name(self):
""" this is used to label the operator being benchmarked
"""
if self.user_given_name:
return self.user_given_name
return self.__class__.__name__
def set_module_name(self, name):
self.user_given_name = name
def test_name(self, **kargs):
""" this is a globally unique name which can be used to
label a specific test
"""
# This is a list of attributes which will not be included
# in the test name.
skip_key_list = ['device']
test_name_str = []
for key in kargs:
value = kargs[key]
test_name_str.append(
('' if key in skip_key_list else key)
+ str(value if type(value) != bool else int(value)))
name = (self.module_name() + '_' +
'_'.join(test_name_str)).replace(" ", "")
return name
class PyTorchOperatorTestCase(object):
""" This class includes all the information needed to benchmark an operator.
op_bench: it's a user-defined class (child of TorchBenchmarkBase)
which includes input and operator, .etc
test_config: a namedtuple includes test_name, input_shape, tag, run_backward.
When run_backward is false, the run_forward method will be executed,
When run_backward is true, run_forward_eager and _output_mean will be
executed to generate output. Then, run_backward will be executed.
"""
def __init__(self, op_bench, test_config):
self.test_config = test_config
self.op_bench = op_bench
self.place_holder_tensor = torch.ones(1)
self.framework = "PyTorch"
self.time_series = []
self._jit_forward_graph = None
def _generate_jit_forward_graph(self):
""" generate a graph for the forward function via scripting
"""
scripted_op_bench = torch.jit.script(self.op_bench)
return scripted_op_bench.forward_consume
def run_jit_forward(self, num_runs, print_per_iter=False, cuda_sync=False):
""" Run the forward path of an op with JIT mode
"""
if self._jit_forward_graph is None:
self._jit_forward_graph = self._generate_jit_forward_graph()
self._jit_forward_graph(num_runs)
def _print_per_iter(self):
# print last 50 values
length = min(len(self.time_series), 50)
for i in range(length):
print("PyTorchObserver " + json.dumps(
{
"type": self.test_config.test_name,
"metric": "latency",
"unit": "ms",
"value": str(self.time_series[length - i - 1]),
}
))
def run_forward(self, num_runs, print_per_iter, cuda_sync):
""" Run the forward path of an op with eager mode
"""
if print_per_iter:
for _ in range(num_runs):
start_time = time.time()
self.output = self.op_bench.forward_impl()
if cuda_sync:
torch.cuda.synchronize(torch.cuda.current_device())
end_time = time.time()
self.time_series.append((end_time - start_time) * 1e3)
else:
for _ in range(num_runs):
self.output = self.op_bench.forward_impl()
if cuda_sync:
torch.cuda.synchronize(torch.cuda.current_device())
def _output_mean(self):
""" TODO (mingzhe): it is not necessary to sum up everything by myself,
torch.autograd.backward do take a gradient tensor. By default, it
is the same shape as your output tensor, with all 1s.
Mathematically, it is the same as if the output is summed together.
So we should be able to get ride of this method.
dummy function for gradient calculation
"""
self.mean = self.output.mean()
def run_backward(self, num_runs, print_per_iter=False):
""" Run the backward path of an op in many iterations
"""
# TODO: can we use JIT here to reduce python overhead?
for _ in range(num_runs):
self.mean.backward(retain_graph=True)
def create_pytorch_op_test_case(op_bench, test_config):
""" This method is used to generate est. func_name is a global unique
string. For PyTorch add operator with M=8, N=2, K=1, tag = long, here
are the values for the members in test_case:
op.module_name: add
framework: PyTorch
test_config: TestConfig(test_name='add_M8_N2_K1', input_config='M: 8, N: 2, K: 1',
tag='long', run_backward=False)
func_name: addPyTorchTestConfig(test_name='add_M8_N2_K1', input_config='M: 8, N: 2, K: 1',
tag='long', run_backward=False)
"""
test_case = PyTorchOperatorTestCase(op_bench, test_config)
test_config = test_case.test_config
op = test_case.op_bench
func_name = "{}{}{}".format(op.module_name(), test_case.framework, str(test_config))
return (func_name, test_case)