Bump black version to 23.1.0 (#96578)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/96578 Approved by: https://github.com/ezyang
2025-12-06 12:20:52 +01:00 · 2023-03-14 19:46:45 -07:00 · 2023-03-14 19:46:45 -07:00 · 60a68477a6
commit 60a68477a6
parent a229e78544
114 changed files with 111 additions and 167 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -878,7 +878,7 @@ init_command = [
    'tools/linter/adapters/pip_init.py',
    '--dry-run={{DRYRUN}}',
    '--no-black-binary',
-    'black==22.3.0',
+    'black==23.1.0',
    'ufmt==1.3.3',
    'usort==1.0.2',
 ]
--- a/benchmarks/dynamo/benchmarks.py
+++ b/benchmarks/dynamo/benchmarks.py
@ -4,6 +4,7 @@ import os

 from typing import Set

+
 # Note - hf and timm have their own version of this, torchbench does not
 # TOOD(voz): Someday, consolidate all the files into one runner instead of a shim like this...
 def model_names(filename: str) -> Set[str]:
--- a/benchmarks/dynamo/check_graph_breaks.py
+++ b/benchmarks/dynamo/check_graph_breaks.py
@ -11,12 +11,10 @@ def get_field(csv, model_name: str, field: str, typ=float):


 def check_graph_breaks(actual_csv, expected_csv, expected_filename):
-
    failed = []
    improved = []

    for model in actual_csv["name"]:
-
        graph_breaks = get_field(actual_csv, model, "graph_breaks", typ=int)
        expected_graph_breaks = get_field(expected_csv, model, "graph_breaks", typ=int)

--- a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
+++ b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
@ -31,7 +31,6 @@ ARTIFACTS_QUERY_URL = "https://api.usw2a1.rockset.com/v1/public/shared_lambdas/c


 def query_job_sha(repo, sha):
-
    params = {
        "parameters": [
            {"name": "sha", "type": "string", "value": sha},
@ -108,7 +107,6 @@ def write_filtered_csvs(root_path, dataframes):


 if __name__ == "__main__":
-
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
    )
--- a/benchmarks/dynamo/huggingface.py
+++ b/benchmarks/dynamo/huggingface.py
@ -373,7 +373,6 @@ class HuggingfaceRunner(BenchmarkRunner):
        model_name,
        batch_size=None,
    ):
-
        is_training = self.args.training
        use_eval_mode = self.args.use_eval_mode
        dtype = torch.float32
@ -513,7 +512,6 @@ def refresh_model_names_and_batch_sizes():
    lm_seen = set()
    family_seen = set()
    for cls_name in hf_fx._SUPPORTED_MODELS:
-
        if "For" not in cls_name:
            continue

--- a/benchmarks/dynamo/microbenchmarks/bench_autotune_conv.py
+++ b/benchmarks/dynamo/microbenchmarks/bench_autotune_conv.py
@ -73,7 +73,6 @@ def bench_op(
    warmup=25,
    rep=75,
 ):
-
    skip = False
    # allocate inputs, nchw
    x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
--- a/benchmarks/dynamo/microbenchmarks/bench_conv.py
+++ b/benchmarks/dynamo/microbenchmarks/bench_conv.py
@ -70,7 +70,6 @@ def bench_op(
    warmup=25,
    rep=75,
 ):
-
    # allocate inputs, nchw
    x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
    w = torch.randn(
--- a/benchmarks/dynamo/microbenchmarks/bench_conv1x1.py
+++ b/benchmarks/dynamo/microbenchmarks/bench_conv1x1.py
@ -66,7 +66,6 @@ def bench_op(
    warmup=25,
    rep=75,
 ):
-
    # allocate inputs, nchw
    x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
    w = torch.randn(
--- a/benchmarks/dynamo/microbenchmarks/bench_conv_fusion.py
+++ b/benchmarks/dynamo/microbenchmarks/bench_conv_fusion.py
@ -236,7 +236,6 @@ def bench(layer_params, layer_id, p, fusion_types=[""]):

    row = [layer_id]
    for fusion_type in fusion_types:
-
        if fusion_type == "":
            conv_torchinductor = getattr(Func, "conv_torchinductor")
            conv = getattr(Func, "conv")
--- a/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py
+++ b/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py
@ -56,7 +56,6 @@ def bench(shape, layer_id, p, fusion_types=[""]):

    row = [layer_id]
    for fusion_type in fusion_types:
-
        if fusion_type == "":
            fn_mm = getattr(Func, "mm")
        else:
--- a/benchmarks/dynamo/microbenchmarks/profile_conv.py
+++ b/benchmarks/dynamo/microbenchmarks/profile_conv.py
@ -46,7 +46,6 @@ def profile_op(
    warmup=25,
    rep=50,
 ):
-
    # allocate inputs, nchw
    x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
    w = torch.randn(
--- a/benchmarks/dynamo/parse_logs.py
+++ b/benchmarks/dynamo/parse_logs.py
@ -60,6 +60,7 @@ out = csv.DictWriter(
 out.writeheader()
 out.writerow({"explain": gist_url})

+
 # Sometimes backtraces will be in third party code, which results
 # in very long file names.  Delete the absolute path in this case.
 def normalize_file(f):
--- a/benchmarks/dynamo/timm_models.py
+++ b/benchmarks/dynamo/timm_models.py
@ -182,7 +182,6 @@ class TimmRunnner(BenchmarkRunner):
        model_name,
        batch_size=None,
    ):
-
        is_training = self.args.training
        use_eval_mode = self.args.use_eval_mode

--- a/benchmarks/dynamo/torchbench.py
+++ b/benchmarks/dynamo/torchbench.py
@ -242,7 +242,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
        batch_size=None,
        part=None,
    ):
-
        is_training = self.args.training
        use_eval_mode = self.args.use_eval_mode
        dynamic_shapes = self.args.dynamic_shapes
--- a/test/distributed/_composable/fully_shard/test_fully_shard_init.py
+++ b/test/distributed/_composable/fully_shard/test_fully_shard_init.py
@ -120,7 +120,7 @@ class TestInitialization(FSDPTest):
        composable_handles = traversal_utils._get_fsdp_handles(composable_module)
        fsdp_wrapped_handles = traversal_utils._get_fsdp_handles(fsdp_wrapped_model)
        self.assertEqual(len(composable_handles), len(fsdp_wrapped_handles))
-        for (composable_handle, fsdp_wrapped_handle) in zip(
+        for composable_handle, fsdp_wrapped_handle in zip(
            composable_handles, fsdp_wrapped_handles
        ):
            self.assertEqual(
@ -179,7 +179,7 @@ class TestInitialization(FSDPTest):
            policy=policy,
            sync_module_states=True,
        )
-        for (composable_param, fsdp_wrapped_param) in zip(
+        for composable_param, fsdp_wrapped_param in zip(
            composable_module.parameters(),
            fsdp_wrapped_model.parameters(),
        ):
--- a/test/distributed/fsdp/test_fsdp_checkpoint.py
+++ b/test/distributed/fsdp/test_fsdp_checkpoint.py
@ -116,7 +116,7 @@ class TestFSDPCheckpoint(FSDPTest):
        assert outputs
        assert models

-        for (l, o) in zip(losses[1:], outputs[1:]):
+        for l, o in zip(losses[1:], outputs[1:]):
            self.assertEqual(losses[0], l)
            self.assertEqual(outputs[0], o)

@ -324,7 +324,6 @@ class TestModel(nn.Module):


 class TestFSDPCheckpointSubmodule(FSDPTest):
-
    # TODO: grad value checks occasionally fails when use_reentrant = True
    @skip_if_lt_x_gpu(2)
    @parametrize("use_reentrant", [False])
--- a/test/distributed/fsdp/test_fsdp_comm_hooks.py
+++ b/test/distributed/fsdp/test_fsdp_comm_hooks.py
@ -70,7 +70,6 @@ class Net(nn.Module):


 class DummyState:
-
    __slots__ = ["process_group", "noise"]

    def __init__(self, process_group: dist.ProcessGroup, noise: int):
@ -157,7 +156,6 @@ class TestCommunicationHooks(FSDPTest):
            self.assertEqual(entry._communication_hook, default_hook)

        for _ in range(4):
-
            # Clear gradients
            net_default_hook.zero_grad()
            loss = net_default_hook(inpt).sum()
@ -183,7 +181,6 @@ class TestCommunicationHooks(FSDPTest):
        ]

    def _init_model(self, core, sharding_strategy, mixed_precision=None):
-
        device = torch.device("cuda")
        return FSDP(
            core,
@ -424,7 +421,6 @@ class TestCommunicationHooks(FSDPTest):
    def test_fp16_hook(
        self, has_wrapping: bool, sharding_strategy: Optional[ShardingStrategy]
    ):
-
        state = default_hooks.LowPrecisionState(process_group=_get_default_group())
        hook = default_hooks.fp16_compress_hook

@ -452,7 +448,6 @@ class TestCommunicationHooks(FSDPTest):
    def test_bf16_hook(
        self, has_wrapping: bool, sharding_strategy: Optional[ShardingStrategy]
    ):
-
        state = default_hooks.LowPrecisionState(process_group=_get_default_group())
        hook = default_hooks.bf16_compress_hook

--- a/test/distributed/fsdp/test_fsdp_grad_acc.py
+++ b/test/distributed/fsdp/test_fsdp_grad_acc.py
@ -160,7 +160,7 @@ class TestGradAcc(FSDPTest):
            num_iters_to_acc = sum(config.num_iters for config in configs)
            for _ in range(num_iters_to_acc - 1):
                batches.append(tuple(permute_tensor(t) for t in batch))
-            for (batch1, batch2) in itertools.combinations(batches, r=2):
+            for batch1, batch2 in itertools.combinations(batches, r=2):
                for t1, t2 in zip(batch1, batch2):
                    assert not torch.all(
                        t1 == t2
--- a/test/distributed/fsdp/test_fsdp_optim_state.py
+++ b/test/distributed/fsdp/test_fsdp_optim_state.py
@ -1338,7 +1338,6 @@ class TestFSDPOptimState(FSDPTest):
        use_multiple_param_groups: bool,
        use_optim_input: bool,
    ):
-
        NUM_ITERS = 3
        # Run a wrapped model for a few iterations
        model1, optim1, optim_input1 = self._init_nested_model(
--- a/test/distributed/fsdp/test_fsdp_state_dict.py
+++ b/test/distributed/fsdp/test_fsdp_state_dict.py
@ -937,14 +937,14 @@ class TestFSDPStateDict(FSDPTest):
        # Check that it can be loaded into FSDP.
        new_fsdp, _ = _create_module()
        _zero_model(new_fsdp)
-        for (p1, p2) in zip(fsdp.parameters(), new_fsdp.parameters()):
+        for p1, p2 in zip(fsdp.parameters(), new_fsdp.parameters()):
            self.assertNotEqual(p1, p2)
        with FSDP.state_dict_type(new_fsdp, STATE_DICT_MAPPING[state_dict_type]):
            if state_dict_type != "local_state_dict":
                # FlatParameter has not supported deepcopy yet.
                state_dict = deepcopy(state_dict)
            new_fsdp.load_state_dict(state_dict, strict=True)
-        for (p1, p2) in zip(fsdp.parameters(), new_fsdp.parameters()):
+        for p1, p2 in zip(fsdp.parameters(), new_fsdp.parameters()):
            self.assertEqual(p1, p2)

        # Test that the checkpoint can be loaded into a local model.
@ -954,7 +954,7 @@ class TestFSDPStateDict(FSDPTest):
                param.zero_()

        with fsdp.summon_full_params(fsdp):
-            for (p1, p2) in zip(fsdp.parameters(), local.parameters()):
+            for p1, p2 in zip(fsdp.parameters(), local.parameters()):
                self.assertNotEqual(p1, p2)

        if state_dict_type == "local_state_dict":
@ -963,7 +963,7 @@ class TestFSDPStateDict(FSDPTest):
        with fsdp.summon_full_params(fsdp):
            if self.rank == 0:
                local.load_state_dict(state_dict, strict=True)
-                for (p1, p2) in zip(fsdp.parameters(), local.parameters()):
+                for p1, p2 in zip(fsdp.parameters(), local.parameters()):
                    self.assertEqual(p1, p2)

    @skip_if_lt_x_gpu(2)
--- a/test/distributed/fsdp/test_shard_utils.py
+++ b/test/distributed/fsdp/test_shard_utils.py
@ -31,7 +31,6 @@ class TestShardUtils(TestCase):
            out_offsets,
            in_split_sizes,
        ):
-
            for my_rank in range(world_size):
                _in_split_sizes = in_split_sizes[my_rank]
                _out_split_sizes = [
--- a/test/dynamo/test_modules.py
+++ b/test/dynamo/test_modules.py
@ -847,7 +847,6 @@ class NNModuleTests(torch._dynamo.test_case.TestCase):
        torch._dynamo.config.traceable_tensor_subclasses.add(TensorProxy)

        try:
-
            x = torch.randn(1).as_subclass(TensorProxy)
            cnt = torch._dynamo.testing.CompileCounter()
            out1 = foo(x)
@ -862,7 +861,6 @@ class NNModuleTests(torch._dynamo.test_case.TestCase):

    def test_torch_function_with_closure(self):
        def run():
-
            counter = 0

            def foo(x):
@ -1097,7 +1095,7 @@ class OptimizedModuleTest(torch._dynamo.test_case.TestCase):
        opt_mod = torch._dynamo.optimize("eager")(mod)

        # Check parameteres and buffers
-        for (p1, p2) in zip(mod.parameters(), opt_mod.parameters()):
+        for p1, p2 in zip(mod.parameters(), opt_mod.parameters()):
            self.assertTrue(id(p1) == id(p2))

    def test_recursion(self):
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@ -1572,7 +1572,6 @@ class ReproTests(torch._dynamo.test_case.TestCase):
        self.assertEqual(y, 10)

    def test_sort_out(self):
-
        dtype = torch.float32
        device = "cpu"

@ -1607,7 +1606,6 @@ class ReproTests(torch._dynamo.test_case.TestCase):
        self.assertTrue(same(ref, res))

    def test_sigmoid_out(self):
-
        dtype = torch.float32
        device = "cpu"

--- a/test/inductor/test_config.py
+++ b/test/inductor/test_config.py
@ -178,7 +178,6 @@ class TestInductorConfig(TestCase):
        a(torch.randn(10))

    def test_api_options(self):
-
        reduce_overhead_opts = torch._inductor.list_mode_options("reduce-overhead")
        self.assertEqual(reduce_overhead_opts["triton.cudagraphs"], True)

--- a/test/inductor/test_torchinductor_dynamic_shapes.py
+++ b/test/inductor/test_torchinductor_dynamic_shapes.py
@ -79,7 +79,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:


 class TestInductorDynamic(TestCase):
-
    compile_fn = partial(torch.compile, dynamic=True)

    def setUp(self):
--- a/test/inductor/test_torchinductor_opinfo.py
+++ b/test/inductor/test_torchinductor_opinfo.py
@ -597,7 +597,6 @@ class TestInductorOpInfo(TestCase):
                    )

        except Exception as e:
-
            if test_expect is ExpectedTestResult.XFAILURE:
                raise e

--- a/test/onnx/pytorch_test_common.py
+++ b/test/onnx/pytorch_test_common.py
@ -48,6 +48,7 @@ skipIfNoBFloat16Cuda = _skipper(
    lambda: not torch.cuda.is_bf16_supported(), "BFloat16 CUDA is not available"
 )

+
 # skips tests for all versions below min_opset_version.
 # if exporting the op is only supported after a specific version,
 # add this wrapper to prevent running the test for opset_versions
--- a/test/onnx/test_onnx_opset.py
+++ b/test/onnx/test_onnx_opset.py
@ -494,7 +494,6 @@ class TestONNXOpset(pytorch_test_common.ExportTestCase):
            ("zeros", "border", "reflection"),
            (True, False),
        ):
-
            args = (
                torch.randn(n, c, h_in, w_in),  # x
                torch.randn(n, h_out, w_out, 2),  # grid,
--- a/test/onnx/test_onnxscript_no_runtime.py
+++ b/test/onnx/test_onnxscript_no_runtime.py
@ -13,14 +13,12 @@ from torch.testing._internal import common_utils


 class TestONNXScriptExport(common_utils.TestCase):
-
    # opset version is
    # 1. local function is supported after opset 15
    # 2. onnx-script requires users to determine opset in local function
    opset_version = 15

    def test_onnxscript_registration_with_multiple_models(self):
-
        from onnxscript.onnx_opset import opset15 as op

        # 1. Register Selu onnxscript function as custom Op
--- a/test/onnx/test_onnxscript_runtime.py
+++ b/test/onnx/test_onnxscript_runtime.py
@ -12,14 +12,12 @@ from torch.testing._internal import common_utils


 class TestONNXScriptRuntime(onnx_test_common._TestONNXRuntime):
-
    # opset version is
    # 1. local function is supported after opset 15
    # 2. onnx-script requires users to determine opset in local function
    opset_version = 15

    def test_selu_from_onnxscript_example(self):
-
        x = torch.randn(1, 2, 3, 4, requires_grad=True)
        model = torch.nn.SELU()

@ -52,7 +50,6 @@ class TestONNXScriptRuntime(onnx_test_common._TestONNXRuntime):
        self.run_test(model, x)

    def test_layer_norm(self):
-
        x = torch.randn(2, 3)
        y = torch.randn(2, 3)
        z = torch.randn(2, 3)
--- a/test/onnx/test_pytorch_onnx_no_runtime.py
+++ b/test/onnx/test_pytorch_onnx_no_runtime.py
@ -30,9 +30,7 @@ def export_to_onnx(
    model: Union[torch.nn.Module, torch.jit.ScriptFunction],
    input: Union[torch.Tensor, Tuple[torch.Tensor]],
    custom_ops: Optional[
-        Iterable[
-            Union[contextlib.AbstractContextManager, contextlib.ContextDecorator],
-        ]
+        Iterable[Union[contextlib.AbstractContextManager, contextlib.ContextDecorator]]
    ] = None,
    mocks: Optional[Iterable] = None,
    operator_export_type: torch.onnx.OperatorExportTypes = torch.onnx.OperatorExportTypes.ONNX,
@ -765,7 +763,6 @@ class TestONNXExport(pytorch_test_common.ExportTestCase):
        )

    def test_dropout_script(self):
-
        eg = torch.zeros(1, 2, 3, requires_grad=True)

        @jit_utils._trace(eg)
--- a/test/onnx/test_pytorch_onnx_onnxruntime.py
+++ b/test/onnx/test_pytorch_onnx_onnxruntime.py
@ -8600,7 +8600,6 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):

    @skipIfUnsupportedMinOpsetVersion(9)
    def test_kldiv_loss(self):
-
        x = torch.rand(5).log()
        y = torch.rand(5)
        self._kldiv_loss(x, y)
@ -12832,7 +12831,6 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
        name_fn=lambda align_corners: str(align_corners),
    )
    def test_grid_sample(self, mode, padding_mode, align_corners):
-
        n, c, h_in, w_in, h_out, w_out = 1, 1, 3, 2, 2, 4

        class GridSampleModule(torch.nn.Module):
--- a/test/onnx/test_pytorch_onnx_shape_inference.py
+++ b/test/onnx/test_pytorch_onnx_shape_inference.py
@ -328,7 +328,6 @@ class TestONNXCustomOpShapeInference(pytorch_test_common.ExportTestCase):
        self.opset_version = _constants.ONNX_MAX_OPSET

    def test_setType_maintains_output_shape_for_single_custom_op(self):
-
        self.addCleanup(torch.onnx.unregister_custom_op_symbolic, "::linalg_inv", 9)

        class CustomInverse(torch.nn.Module):
@ -363,7 +362,6 @@ class TestONNXCustomOpShapeInference(pytorch_test_common.ExportTestCase):
            self.assertEqual(dim.dim_value, rank)

    def test_no_setType_for_single_custom_op(self):
-
        self.addCleanup(torch.onnx.unregister_custom_op_symbolic, "::linalg_inv", 9)

        class CustomInverse(torch.nn.Module):
@ -398,7 +396,6 @@ class TestONNXCustomOpShapeInference(pytorch_test_common.ExportTestCase):
    def test_setType_maintains_output_shape_for_single_custom_op_with_dynamic_axes(
        self,
    ):
-
        self.addCleanup(torch.onnx.unregister_custom_op_symbolic, "::linalg_inv", 9)

        class CustomInverse(torch.nn.Module):
@ -438,7 +435,6 @@ class TestONNXCustomOpShapeInference(pytorch_test_common.ExportTestCase):
            self.assertEqual(dims[i].dim_value, x.size()[i])

    def test_setType_maintains_output_shape_for_single_custom_op_with_onnx_ops(self):
-
        self.addCleanup(torch.onnx.unregister_custom_op_symbolic, "::linalg_inv", 9)

        class CustomInverse(torch.nn.Module):
--- a/tools/amd_build/build_amd.py
+++ b/tools/amd_build/build_amd.py
@ -133,6 +133,7 @@ ignores = [

 ignores = [os.path.join(proj_dir, ignore) for ignore in ignores]

+
 # Check if the compiler is hip-clang.
 def is_hip_clang() -> bool:
    try:
--- a/tools/autograd/context.py
+++ b/tools/autograd/context.py
@ -5,6 +5,7 @@ from torchgen.api.autograd import NativeFunctionWithDifferentiabilityInfo as NFW
 from torchgen.context import native_function_manager
 from torchgen.utils import T

+
 # Like tools.api.context.with_native_function, but for
 # NativeFunctionWithDifferentiabilityInfo.
 def with_native_function_with_differentiability_info(
--- a/tools/autograd/gen_autograd_functions.py
+++ b/tools/autograd/gen_autograd_functions.py
@ -420,7 +420,6 @@ UNTRACEABLE_FUNCTIONS = VIEW_FUNCTIONS
 def get_infos_with_derivatives_list(
    differentiability_infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]]
 ) -> List[DifferentiabilityInfo]:
-
    diff_info_list = [
        info
        for diffinfo_dict in differentiability_infos.values()
@ -469,7 +468,6 @@ def gen_autograd_functions_python(
    differentiability_infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]],
    template_path: str,
 ) -> None:
-
    fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
    num_shards = 5
    fm.write(
--- a/tools/autograd/gen_inplace_or_view_type.py
+++ b/tools/autograd/gen_inplace_or_view_type.py
@ -221,6 +221,7 @@ ${assign_return_values} ([&]() {

 TMP_VAR = "_tmp"

+
 # FIXME: Ideally these functions should be methods on Type class, but we have a
 #        comment in codegen/model.py there saying these concepts are not well defined.
 #        Thus we put a version that commonly used by autograd codegen here.
@ -321,7 +322,8 @@ def emit_view_call(

 def emit_view_lambda(f: NativeFunction, unpacked_bindings: List[Binding]) -> str:
    """Generate an additional lambda function to recover views in backward when as_strided is not supported.
-    See Note [View + Inplace update for base tensor] and [View + Inplace update for view tensor] for more details."""
+    See Note [View + Inplace update for base tensor] and [View + Inplace update for view tensor] for more details.
+    """
    input_base = "input_base"
    replay_view_func = ""
    updated_unpacked_args: List[str] = []
--- a/tools/autograd/gen_variable_factories.py
+++ b/tools/autograd/gen_variable_factories.py
@ -17,6 +17,7 @@ from torchgen.utils import FileManager, mapMaybe
 OPTIONAL_TYPE_PATTERN = re.compile(r"c10::optional<(.+)>")
 TYPE_PATTERN = re.compile(r"(?:const\s+)?([A-Z]\w+)")

+
 # Add 'at::' to types defined in ATen namespace, e.g. Tensor, TensorList, IntArrayRef and etc.
 # TODO: maybe update the cpp argument API to take optional namespace argument?
 def fully_qualified_type(argument_type: str) -> str:
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py
@ -761,7 +761,6 @@ def gen_variable_type(
    template_path: str,
    used_keys: Set[str],
 ) -> None:
-
    """VariableType.h and VariableType.cpp body

    This is the at::Type subclass for differentiable tensors. The
--- a/tools/autograd/load_derivatives.py
+++ b/tools/autograd/load_derivatives.py
@ -52,6 +52,7 @@ _GLOBAL_LOAD_DERIVATIVE_CACHE = {}

 _VALID_AUTOGRAD_KEYS = set(AUTOGRAD_KEYS)

+
 # This function directly adds per-dispatchkey derivative entries for {view}_copy variants of each view op.
 # Since every {view} and {view}_copy op shares the same derivative formula,
 # we generate them here instead of duplicating them in the yaml.
@ -96,7 +97,6 @@ def load_derivatives(
    global _GLOBAL_LOAD_DERIVATIVE_CACHE
    key = (derivatives_yaml_path, native_yaml_path)
    if key not in _GLOBAL_LOAD_DERIVATIVE_CACHE:
-
        with open(derivatives_yaml_path, "r") as f:
            definitions = yaml.load(f, Loader=YamlLoader)

--- a/tools/code_analyzer/gen_operators_yaml.py
+++ b/tools/code_analyzer/gen_operators_yaml.py
@ -183,7 +183,6 @@ def create_debug_info_from_selected_models(
    selected_models: List[dict],
    new_style_rule: bool,
 ):
-
    model_dict = {
        "asset_info": {},  # maps asset name -> dict of asset metadata like hashes
        "is_new_style_rule": new_style_rule,
@ -465,13 +464,13 @@ def fill_output(output: Dict[str, object], options: object):
    # to True, since it indicates that this operator list came from something
    # other than a traced operator list.
    include_all_non_op_selectives = False
-    for (op_name, op_info) in operators.items():
+    for op_name, op_info in operators.items():
        include_all_non_op_selectives = (
            include_all_non_op_selectives or op_info.include_all_overloads
        )

    operators_as_dict = {}
-    for (k, v) in operators.items():
+    for k, v in operators.items():
        operators_as_dict[k] = v.to_dict()

    output["operators"] = operators_as_dict
--- a/tools/code_analyzer/gen_oplist.py
+++ b/tools/code_analyzer/gen_oplist.py
@ -18,14 +18,14 @@ from torchgen.selective_build.selector import (

 def extract_all_operators(selective_builder: SelectiveBuilder) -> Set[str]:
    ops = []
-    for (op_name, op) in selective_builder.operators.items():
+    for op_name, op in selective_builder.operators.items():
        ops.append(op_name)
    return set(ops)


 def extract_training_operators(selective_builder: SelectiveBuilder) -> Set[str]:
    ops = []
-    for (op_name, op) in selective_builder.operators.items():
+    for op_name, op in selective_builder.operators.items():
        if op.is_used_for_training:
            ops.append(op_name)
    return set(ops)
@ -33,7 +33,7 @@ def extract_training_operators(selective_builder: SelectiveBuilder) -> Set[str]:

 def throw_if_any_op_includes_overloads(selective_builder: SelectiveBuilder) -> None:
    ops = []
-    for (op_name, op) in selective_builder.operators.items():
+    for op_name, op in selective_builder.operators.items():
        if op.include_all_overloads:
            ops.append(op_name)
    if ops:
@ -47,7 +47,6 @@ def throw_if_any_op_includes_overloads(selective_builder: SelectiveBuilder) -> N


 def gen_supported_mobile_models(model_dicts: List[Any], output_dir: str) -> None:
-
    supported_mobile_models_source = """/*
 * Generated by gen_oplist.py
 */
--- a/tools/code_coverage/package/tool/print_report.py
+++ b/tools/code_coverage/package/tool/print_report.py
@ -38,7 +38,6 @@ def is_this_type_of_tests(target_name: str, test_set_by_type: Set[str]) -> bool:
 def print_test_by_type(
    tests: TestList, test_set_by_type: Set[str], type_name: str, summary_file: IO[str]
 ) -> None:
-
    print("Tests " + type_name + " to collect coverage:", file=summary_file)
    for test in tests:
        if is_this_type_of_tests(test.name, test_set_by_type):
--- a/tools/linter/adapters/clangtidy_linter.py
+++ b/tools/linter/adapters/clangtidy_linter.py
@ -22,6 +22,7 @@ result = subprocess.run(
 PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
 IS_WINDOWS: bool = os.name == "nt"

+
 # Returns '/usr/local/include/python<version number>'
 def get_python_include_dir() -> str:
    return gp()["include"]
@ -147,7 +148,7 @@ def check_file(
        proc = run_command(
            [binary, f"-p={build_dir}", *include_args, filename],
        )
-    except (OSError) as err:
+    except OSError as err:
        return [
            LintMessage(
                path=filename,
--- a/tools/lite_interpreter/gen_selected_mobile_ops_header.py
+++ b/tools/lite_interpreter/gen_selected_mobile_ops_header.py
@ -47,7 +47,7 @@ selected_mobile_ops_preamble = """#pragma once

 def extract_root_operators(selective_builder: SelectiveBuilder) -> Set[str]:
    ops = []
-    for (op_name, op) in selective_builder.operators.items():
+    for op_name, op in selective_builder.operators.items():
        if op.is_root_operator:
            ops.append(op_name)
    return set(ops)
--- a/tools/onnx/gen_diagnostics.py
+++ b/tools/onnx/gen_diagnostics.py
@ -142,7 +142,6 @@ def _format_rule_for_cpp(rule: _RuleType) -> str:
 def gen_diagnostics_python(
    rules: Sequence[_RuleType], out_py_dir: str, template_dir: str
 ) -> None:
-
    rule_class_lines = [_format_rule_for_python_class(rule) for rule in rules]
    rule_field_lines = [_format_rule_for_python_field(rule) for rule in rules]

@ -165,7 +164,6 @@ def gen_diagnostics_python(
 def gen_diagnostics_cpp(
    rules: Sequence[_RuleType], out_cpp_dir: str, template_dir: str
 ) -> None:
-
    rule_lines = [_format_rule_for_cpp(rule) for rule in rules]
    rule_names = [f'"{_kebab_case_to_snake_case(rule["name"])}",' for rule in rules]

@ -206,7 +204,6 @@ def gen_diagnostics(
    out_cpp_dir: str,
    out_docs_dir: str,
 ) -> None:
-
    with open(rules_path, "r") as f:
        rules = yaml.load(f, Loader=torchgen_utils.YamlLoader)

--- a/tools/setup_helpers/gen_version_header.py
+++ b/tools/setup_helpers/gen_version_header.py
@ -41,7 +41,7 @@ def apply_replacements(replacements: Dict[str, str], text: str) -> str:
    Returns:
      Text with replacements applied, if any.
    """
-    for (before, after) in replacements.items():
+    for before, after in replacements.items():
        text = text.replace(before, after)
    return text

--- a/tools/setup_helpers/generate_code.py
+++ b/tools/setup_helpers/generate_code.py
@ -54,7 +54,6 @@ def generate_code(
        operator_selector = SelectiveBuilder.get_nop_selector()

    if subset == "libtorch" or not subset:
-
        gen_autograd(
            native_functions_path or NATIVE_FUNCTIONS_PATH,
            tags_path or TAGS_PATH,
--- a/tools/stats/monitor.py
+++ b/tools/stats/monitor.py
@ -134,7 +134,6 @@ def rocm_get_per_process_gpu_info() -> List[Dict[str, Any]]:


 if __name__ == "__main__":
-
    handle = None
    try:
        pynvml.nvmlInit()
--- a/tools/stats/upload_stats_lib.py
+++ b/tools/stats/upload_stats_lib.py
@ -132,7 +132,10 @@ def upload_to_s3(
        json.dump(doc, body)
        body.write("\n")

-    S3_RESOURCE.Object(f"{bucket_name}", f"{key}",).put(
+    S3_RESOURCE.Object(
+        f"{bucket_name}",
+        f"{key}",
+    ).put(
        Body=gzip.compress(body.getvalue().encode()),
        ContentEncoding="gzip",
        ContentType="application/json",
--- a/tools/test/test_gen_backend_stubs.py
+++ b/tools/test/test_gen_backend_stubs.py
@ -13,6 +13,7 @@ from torchgen.gen_backend_stubs import run
 path = os.path.dirname(os.path.realpath(__file__))
 gen_backend_stubs_path = os.path.join(path, "../torchgen/gen_backend_stubs.py")

+
 # gen_backend_stubs.py is an integration point that is called directly by external backends.
 # The tests here are to confirm that badly formed inputs result in reasonable error messages.
 class TestGenBackendStubs(expecttest.TestCase):
--- a/tools/update_masked_docs.py
+++ b/tools/update_masked_docs.py
@ -9,7 +9,6 @@ import os


 def main() -> None:
-
    target = os.path.join("torch", "masked", "_docs.py")

    try:
--- a/torch/_decomp/init.py
+++ b/torch/_decomp/init.py
@ -169,6 +169,7 @@ def get_decompositions(
 import torch._decomp.decompositions
 import torch._refs

+
 # This list was copied from torch/_inductor/decomposition.py
 # excluding decompositions that results in prim ops
 # Resulting opset of decomposition is core aten ops
--- a/torch/_decomp/decompositions.py
+++ b/torch/_decomp/decompositions.py
@ -88,6 +88,7 @@ pw_cast_for_int_to_real = partial(
    type_casts, type_promotion=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.INT_TO_FLOAT
 )

+
 # This expands x until x.dim() == dim. Might be useful as an operator
 def _unsqueeze_to_dim(x: Tensor, dim: int):
    for _ in range(dim - x.dim()):
@ -619,7 +620,6 @@ def slice_forward(
    end: Optional[int] = None,
    step: int = 1,
 ):
-
    ndim = self.dim()
    if ndim == 0:
        raise RuntimeError("slice() cannot be applied to a 0-dim tensor.")
--- a/torch/_decomp/decompositions_for_jvp.py
+++ b/torch/_decomp/decompositions_for_jvp.py
@ -86,6 +86,7 @@ def _register_jit_decomposition_for_jvp(decomp, use_python=False):

 # The only decompositions here are temporary or hacks for the purposes of jvp

+
 # TODO: do these also belong here?
@maybe_register_decomposition(aten.trace.default)
 def trace(self: Tensor) -> Tensor:
--- a/torch/_dynamo/convert_frame.py
+++ b/torch/_dynamo/convert_frame.py
@ -288,7 +288,6 @@ def _compile(
    hooks: Hooks,
    frame: Optional[types.FrameType] = None,
 ) -> Optional[GuardedCode]:
-
    output: Optional[OutputGraph] = None
    # This is shared across restarts
    mutated_closure_cell_contents: Set[str] = set()
--- a/torch/_dynamo/exc.py
+++ b/torch/_dynamo/exc.py
@ -156,7 +156,6 @@ def filter_stack(stack):


 def format_error_msg(exc, code, record_filename=None, frame=None):
-
    msg = os.linesep * 2

    if config.verbose:
--- a/torch/_dynamo/logging.py
+++ b/torch/_dynamo/logging.py
@ -11,6 +11,7 @@ logging.addLevelName(logging.CODE, "CODE")
 # Disable progress bar by default, not in dynamo config because otherwise get a circular import
 disable_progress = True

+
 # Return all loggers that torchdynamo/torchinductor is responsible for
 def get_loggers():
    return [
--- a/torch/_dynamo/variables/builtin.py
+++ b/torch/_dynamo/variables/builtin.py
@ -355,7 +355,7 @@ class BuiltinVariable(VariableTracker):
            return None

        # Return first handler that matches the type checks
-        for ((type1, type2), handler) in handlers[op]:
+        for (type1, type2), handler in handlers[op]:
            if isinstance(a, type1) and isinstance(b, type2):
                return handler

@ -641,7 +641,6 @@ class BuiltinVariable(VariableTracker):
                )
                for i in [a, b]
            ):
-
                if any([isinstance(val, FakeItemVariable) for val in [a, b]]):
                    return variables.FakeItemVariable.from_tensor_variable(result)

@ -678,7 +677,6 @@ class BuiltinVariable(VariableTracker):
            )
            return SymNodeVariable.create(tx, proxy, None)
        else:
-
            unimplemented(f"unsupported min / max over args {str(a)}, {str(b)}")

    call_min = _call_min_max
--- a/torch/_dynamo/variables/torch.py
+++ b/torch/_dynamo/variables/torch.py
@ -73,6 +73,7 @@ constant_fold_functions = [
 if torch.distributed.is_available():
    constant_fold_functions.append(torch.distributed.is_initialized)

+
 # TODO(voz): perhaps a decorator? This is rather readable for now tho, and not a public API.
 def remap_as_fn___radd__(*args):
    return torch._C._TensorBase.__radd__(*args)
--- a/torch/_inductor/codegen/common.py
+++ b/torch/_inductor/codegen/common.py
@ -412,7 +412,8 @@ class KernelArgs:
 class CSEVariable:
    """A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
    The backends can inherit from this class and overload the "create_cse_var" Kernel to do that.
-    The "update_on_args" method gives you a hook for annotations, see example of TritonCSEVariable in triton.py."""
+    The "update_on_args" method gives you a hook for annotations, see example of TritonCSEVariable in triton.py.
+    """

    def __init__(self, name):
        self.name = name
--- a/torch/_inductor/codegen/triton.py
+++ b/torch/_inductor/codegen/triton.py
@ -1535,7 +1535,6 @@ class TritonScheduling:

        @contextlib.contextmanager
        def end_current_reduction_loop():
-
            if current_loop_writes:
                # flush out any other runnable nodes to reduce number of loops
                for other_node in nodes[index + 1 :]:
--- a/torch/_inductor/config.py
+++ b/torch/_inductor/config.py
@ -183,7 +183,6 @@ class cpp:

 # config specific to codegen/triton.py
 class triton:
-
    # Use cudagraphs on output code
    cudagraphs = False

--- a/torch/_inductor/fx_utils.py
+++ b/torch/_inductor/fx_utils.py
@ -1,5 +1,6 @@
 import torch

+
 # Check the pattern: (nn.module, F.function/torch.Tensor.method) matched.
 # Works for length 2 patterns with 1 module and 1 function/method.
 def matches_module_function_pattern(pattern, node, modules):
--- a/torch/_inductor/ir.py
+++ b/torch/_inductor/ir.py
@ -802,7 +802,6 @@ class Reduction(Loops):
        reduction_numel = V.graph.sizevars.simplify(sympy_product(reduction_ranges))

        if reduction_numel == 0:
-
            # N.B. This is a hack to generate the literal of the given type
            # Ideally, we should be fixing `def constant` in triton.py
            # but it breaks due to hardcoded dtypes in other places
@ -1252,7 +1251,6 @@ class PermuteView(BaseView):
 class SqueezeView(BaseView):
    @classmethod
    def create(cls, x, *, dim=None):
-
        if is_storage_and_layout(x):
            storage, old_layout = as_storage_and_layout(x)
            new_size = []
@ -3828,7 +3826,12 @@ class ConvolutionTransposeUnary(ExternKernelAlloc):
    ):
        kernel = "torch.ops.mkldnn._convolution_transpose_pointwise"
        transposed = True
-        (inputs, constant_args, kernel_layout, _,) = _prepare_convolution_fusion_create(
+        (
+            inputs,
+            constant_args,
+            kernel_layout,
+            _,
+        ) = _prepare_convolution_fusion_create(
            cls,
            x,
            weight,
--- a/torch/_inductor/lowering.py
+++ b/torch/_inductor/lowering.py
@ -2144,7 +2144,6 @@ def scatter(x, dim: int, index, src, **kwargs):
 def scatter_fallback(
    fn, self, dim: int, index, src, *, reduce: str = None, include_self: bool = True
 ):
-
    if reduce not in {None, "sum"} or (
        reduce == "sum" and self.get_dtype() in {torch.bool, torch.int64}
    ):
@ -2158,7 +2157,6 @@ def scatter_fallback(

@register_lowering(aten.scatter_, type_promotion_kind=None)
 def scatter_(self, dim: int, index, src, *, reduce: str = None):
-
    if reduce == "add":
        reduce = "sum"
    elif reduce == "multiply":
@ -2674,7 +2672,6 @@ def constant_boundary_condition_2d(x, fill_value, padding):


 def pooling_size(x, i, kernel_size, stride, padding, ceil_mode):
-
    x_out = ir.FloorDiv(
        x + 2 * padding[i] - (kernel_size[i] - 1) + (stride[i] - 1), stride[i]
    )
@ -3212,7 +3209,6 @@ def avg_pool2d_backward(
    count_include_pad,
    divisor_override=None,
 ):
-
    assert not divisor_override
    if not stride:
        stride = kernel_size
--- a/torch/_inductor/pattern_matcher.py
+++ b/torch/_inductor/pattern_matcher.py
@ -441,7 +441,6 @@ def shape_of_mm(a, b):
    CallFunction(aten.cat, ListOf(CallFunction(aten.mm, Arg(), Arg())), Arg()),
 )
 def cat_mm(match, inputs, dim):
-
    return cat_tuned_op(match, inputs, dim, op=L[aten.mm], shape_of=shape_of_mm)


--- a/torch/_inductor/triton_ops/conv.py
+++ b/torch/_inductor/triton_ops/conv.py
@ -129,7 +129,6 @@ if has_triton():
        # allocate accumulator
        acc = tl.zeros((BLOCK_M, BLOCK_N), dtype=ACC_TYPE)
        for crs in range(0, CRS, BLOCK_K):
-
            # ------ matrix multiplication ------
            acc += tl.dot(matrix_x, matrix_w)
            # ------ update ptrs ------
@ -306,7 +305,6 @@ if has_triton():
        # allocate accumulator
        acc = tl.zeros((BLOCK_M, BLOCK_N), dtype=ACC_TYPE)
        for crs in range(0, CRS, BLOCK_K):
-
            # ------ matrix multiplication ------
            acc += tl.dot(matrix_x, matrix_w)
            # ------ update ptrs ------
--- a/torch/_inductor/triton_ops/conv1x1.py
+++ b/torch/_inductor/triton_ops/conv1x1.py
@ -3,7 +3,6 @@ import torch
 from ..utils import has_triton

 if has_triton():
-
    import triton

    class _conv1x1:
--- a/torch/_lobpcg.py
+++ b/torch/_lobpcg.py
@ -273,7 +273,6 @@ class LOBPCGAutogradFunction(torch.autograd.Function):
        ortho_fparams: Optional[Dict[str, float]] = None,
        ortho_bparams: Optional[Dict[str, bool]] = None,
    ) -> Tuple[Tensor, Tensor]:
-
        # makes sure that input is contiguous for efficiency.
        # Note: autograd does not support dense gradients for sparse input yet.
        A = A.contiguous() if (not A.is_sparse) else A
@ -360,7 +359,6 @@ def lobpcg(
    ortho_fparams: Optional[Dict[str, float]] = None,
    ortho_bparams: Optional[Dict[str, bool]] = None,
 ) -> Tuple[Tensor, Tensor]:
-
    """Find the k largest (or smallest) eigenvalues and the corresponding
    eigenvectors of a symmetric positive definite generalized
    eigenvalue problem using matrix-free LOBPCG methods.
@ -598,7 +596,6 @@ def _lobpcg(
    ortho_fparams: Optional[Dict[str, float]] = None,
    ortho_bparams: Optional[Dict[str, bool]] = None,
 ) -> Tuple[Tensor, Tensor]:
-
    # A must be square:
    assert A.shape[-2] == A.shape[-1], A.shape
    if B is not None:
@ -707,7 +704,6 @@ class LOBPCG:
        method: str,
        tracker: None,
    ) -> None:
-
        # constant parameters
        self.A = A
        self.B = B
@ -833,7 +829,6 @@ class LOBPCG:
            self.call_tracker()

        while not self.stop_iteration():
-
            self.update()

            if not torch.jit.is_scripting() and self.tracker is not None:
--- a/torch/_meta_registrations.py
+++ b/torch/_meta_registrations.py
@ -2486,7 +2486,6 @@ def _cudnn_rnn(
    batch_sizes,
    dropout_state,
 ):
-
    is_input_packed = len(batch_sizes) != 0
    if is_input_packed:
        seq_length = len(batch_sizes)
@ -2773,7 +2772,6 @@ import torch._refs.special


 def activate_meta():
-
    activate_meta_table = {}

    # For a given op, we pick the most specific decomp function from
--- a/torch/_ops.py
+++ b/torch/_ops.py
@ -135,6 +135,7 @@ is_included_in_alias = torch._C._dispatch_is_included_in_alias

 DispatchKey = torch._C.DispatchKey

+
 # Equivalent to computeDispatchTableEntryWithDebug
 def resolve_key(op: OperatorBase, k: DispatchKey):  # type: ignore[valid-type]
    # 1. (Direct) operator registration
--- a/torch/_prims/init.py
+++ b/torch/_prims/init.py
@ -940,6 +940,7 @@ bitwise_xor = _make_elementwise_binary_prim(
 #   doc="",
 # )

+
 # div prim performs truncation division on integer inputs
 #   and true division for floating and complex inputs
 def _div_aten(a, b):
@ -1151,6 +1152,7 @@ zeta = _make_elementwise_binary_prim(
    type_promotion=ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND.DEFAULT,
 )

+
 #
 # View operations
 def _as_strided_meta(
@ -1701,6 +1703,7 @@ split_dim = _make_prim(
    doc=_split_dim_doc,
 )

+
 # Note: allows dimensions to be specified redundantly
 def _squeeze_meta(a: TensorLikeType, dimensions: Sequence) -> TensorLikeType:
    assert isinstance(a, TensorLike)
@ -1980,7 +1983,6 @@ rev = _make_prim(
 def _where_meta(
    pred: TensorLikeType, a: TensorLikeType, b: TensorLikeType
 ) -> TensorLikeType:
-
    return _elementwise_meta(
        a,
        b,
@ -2004,6 +2006,7 @@ where = _make_prim(
    doc=_where_doc,
 )

+
 #
 # Type conversions
 #
@ -2022,7 +2025,6 @@ def _convert_element_type_meta(a: TensorLikeType, dtype: torch.dtype) -> TensorL


 def _convert_element_type_aten(a: Tensor, dtype: torch.dtype) -> Tensor:
-
    # Propagates requires grad when possible
    if not utils.is_grad_dtype(dtype):
        requires_grad = False
@ -2078,6 +2080,7 @@ device_put = _make_prim(
    doc=_device_put_doc,
 )

+
 # NOTE: need to model meta scalars
 # See https://github.com/pytorch/pytorch/issues/78070
 def _item_meta(a: TensorLikeType) -> FakeTensor:
@ -2100,6 +2103,7 @@ item = _make_prim(
    doc=_item_doc,
 )

+
 # NOTE: need to model meta scalars
 # See https://github.com/pytorch/pytorch/issues/78070
 def _maximum_value_meta(dtype: torch.dtype) -> FakeTensor:
@ -2732,6 +2736,7 @@ svd = _make_prim(
 # Randomness Prims
 #

+
 # TODO: add generator support
 # NOTE: there is currently no way of acquiring the "default" torch generator
 def _normal_meta(
--- a/torch/_prims/nvfuser_executor.py
+++ b/torch/_prims/nvfuser_executor.py
@ -60,6 +60,7 @@ DEFAULT_NVFUSER_PYTHON_CONFIG = MappingProxyType(
    }
 )

+
 # nvFuserTensorTemplate and nvFuserScalarTemplate are helper objects
 # for cached construction of the nvFuser's Fusion
 # TODO: change what is stored in the cache for nvFuser's Tensor objects
@ -258,7 +259,6 @@ def nvfuser_execute(gm: GraphModule, *args, executor_parameters=None):
        )
        for arg in flat_args
    ):
-
        # Construction of the fusion is expensive and cached based on the GraphModule
        # and symbolic nvFuser args.
        nv_template_args = to_nvfuser_template_args(flat_args)
--- a/torch/_prims/nvfuser_prims.py
+++ b/torch/_prims/nvfuser_prims.py
@ -223,7 +223,6 @@ _nvfuser_impls["{fname}"] = _{fname}_nvfuser
 def _native_batch_norm_nvfuser(
    fd, input, weight, bias, running_mean, running_var, training, momentum, eps
 ):
-
    """
    if weight is None:
        weight = fd.define_null_tensor()
@ -565,7 +564,6 @@ def register_native_batch_norm():
        momentum: float,
        eps: float,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-
        if torch._prims_common.is_complex_dtype(input.dtype):
            raise NotImplementedError("Complex tensors are not supported")

--- a/torch/_refs/init.py
+++ b/torch/_refs/init.py
@ -379,6 +379,7 @@ from torch._decomp import register_decomposition

 infer_aten_op = object()

+
 # TODO: add type promotion support
 def _make_elementwise_unary_reference(
    type_promotion_kind,
@ -556,7 +557,6 @@ def exp2(a):
    type_promotion_kind=ELEMENTWISE_TYPE_PROMOTION_KIND.NO_OPMATH,
 )
 def fill(a: TensorLikeType, value: NumberType) -> TensorLikeType:
-
    assert isinstance(a, TensorLike)
    assert isinstance(value, Number)

@ -1118,7 +1118,6 @@ def float_power(
    a: Union[TensorLikeType, NumberType],
    b: Union[TensorLikeType, NumberType],
 ) -> Tensor:
-
    if isinstance(a, Number) and isinstance(b, Number):
        raise ValueError(
            "Receive two Number inputs to an elementwise binary operation!"
@ -1168,6 +1167,7 @@ def float_power(
 # For reference, see CPython's implementation:
 # https://github.com/python/cpython/blob/ace008c531dd685a30c1dd68f9b5ba35f20171cf/Objects/floatobject.c#L636

+
 # TODO: add docstring
@_make_elementwise_binary_reference(
    type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
@ -1801,6 +1801,7 @@ def clamp_max(
 # Conditional references
 #

+
 # https://pytorch.org/docs/stable/generated/torch.where.html
 # TODO: implement alternate where
@register_decomposition(aten.where)
@ -4092,7 +4093,6 @@ def new_empty(
    device: Optional[torch.device] = None,
    pin_memory: bool = False,
 ) -> TensorLikeType:
-
    dtype = a.dtype if dtype is None else dtype
    layout = a.layout if layout is None else layout
    device = a.device if device is None else device
@ -4275,7 +4275,6 @@ def empty_like(
    requires_grad: bool = False,
    memory_format: torch.memory_format = torch.preserve_format,
 ) -> TensorLikeType:
-
    dtype = a.dtype if dtype is None else dtype
    layout = a.layout if layout is None else layout
    device = a.device if device is None else device
--- a/torch/_refs/nn/functional/init.py
+++ b/torch/_refs/nn/functional/init.py
@ -82,7 +82,6 @@ def _dropout_helper(
 def alpha_dropout(
    self: TensorLikeType, p: float = 0.5, training: bool = False, inplace: bool = False
 ) -> TensorLikeType:
-
    if inplace:
        raise NotImplementedError

@ -178,7 +177,6 @@ def celu(
 def dropout(
    a: TensorLikeType, p: float = 0.5, training: bool = True, inplace: bool = False
 ) -> TensorLikeType:
-
    if inplace:
        raise NotImplementedError

--- a/torch/_subclasses/meta_utils.py
+++ b/torch/_subclasses/meta_utils.py
@ -295,7 +295,6 @@ class MetaConverter:
                        torch._C.DispatchKey.ADInplaceOrView, False
                    )
                    try:
-
                        if base.dtype == t.dtype:
                            pass
                        elif is_c_of_r(base.dtype, t.dtype):
--- a/torch/_utils.py
+++ b/torch/_utils.py
@ -180,6 +180,7 @@ def _rebuild_tensor_v2(

 _sparse_tensors_to_validate: List["torch.Tensor"] = []

+
 # In _legacy_load() in serialization.py we unpickle storages after the sparse
 # tensors have been already unpickled. Those storages contain data necessary for
 # validating sparse tensors: indices and values. That's why sparse tensors are
--- a/torch/distributed/_composable/_ddp.py
+++ b/torch/distributed/_composable/_ddp.py
@ -103,7 +103,6 @@ class DistributedDataParallel(Module):
        gradient_as_bucket_view=False,
        static_graph=False,
    ):
-
        super().__init__()
        self.logger: Optional[dist.Logger] = None
        if not any((p.requires_grad for p in module.parameters())):
--- a/torch/distributed/fsdp/flat_param.py
+++ b/torch/distributed/fsdp/flat_param.py
@ -1849,10 +1849,10 @@ class FlatParamHandle:
            flat_param.grad = None

    def _deregister_orig_params(self):
-        for (param_name, module, _) in self.flat_param._param_infos:
+        for param_name, module, _ in self.flat_param._param_infos:
            if hasattr(module, param_name):
                delattr(module, param_name)
-        for (param_name, module, _, _, _, _) in self.flat_param._shared_param_infos:
+        for param_name, module, _, _, _, _ in self.flat_param._shared_param_infos:
            if hasattr(module, param_name):
                delattr(module, param_name)

--- a/torch/onnx/_internal/diagnostics/_diagnostic.py
+++ b/torch/onnx/_internal/diagnostics/_diagnostic.py
@ -123,9 +123,9 @@ _context = engine.background_context


@contextlib.contextmanager
-def create_export_diagnostic_context() -> Generator[
-    infra.DiagnosticContext, None, None
-]:
+def create_export_diagnostic_context() -> (
+    Generator[infra.DiagnosticContext, None, None]
+):
    """Create a diagnostic context for export.

    This is a workaround for code robustness since diagnostic context is accessed by
--- a/torch/onnx/_internal/fx/exporter.py
+++ b/torch/onnx/_internal/fx/exporter.py
@ -30,7 +30,6 @@ def _export(
    args,
    **kwargs,
 ) -> Union["onnx.ModelProto", bytes]:
-
    export_options = options.ExportOptions()
    export_options.update(**kwargs)
    # Apply decomposition table to the input graph.
--- a/torch/onnx/_internal/fx/function_dispatcher.py
+++ b/torch/onnx/_internal/fx/function_dispatcher.py
@ -149,9 +149,9 @@ _ATENLIB_FUNCTIONS = {
 }


-def _create_op_overload_to_exporter_key_table() -> Dict[
-    Union[torch._ops.OpOverload, Callable], str
-]:
+def _create_op_overload_to_exporter_key_table() -> (
+    Dict[Union[torch._ops.OpOverload, Callable], str]
+):
    # TODO(justinchuby): Improve how the table is constructed.
    table: Dict[Union[torch._ops.OpOverload, Callable], str] = {}

@ -189,9 +189,9 @@ _OP_OVERLOAD_TO_EXPORTER_KEY_TABLE = _create_op_overload_to_exporter_key_table()


@_beartype.beartype
-def _create_onnx_friendly_decomposition_table() -> Dict[
-    torch._ops.OpOverload, Callable
-]:
+def _create_onnx_friendly_decomposition_table() -> (
+    Dict[torch._ops.OpOverload, Callable]
+):
    decomposition_table: Dict[torch._ops.OpOverload, Callable] = {}
    for op_overload, decomp_fn in torch._decomp.decomposition_table.items():
        # Skip decomposition into "prim::*" ops, because they are not generally supported by ONNX.
--- a/torch/onnx/_internal/fx/passes/fx_to_onnxscript.py
+++ b/torch/onnx/_internal/fx/passes/fx_to_onnxscript.py
@ -337,7 +337,6 @@ def _export_fx_node_to_onnxscript(
            _validate_op_between_ort_torch(node, symbolic_fn, torch_args, torch_kwargs)
        fx_name_to_onnxscipt_value[node.name] = output
    elif node.op == "output":
-
        if isinstance(node.args[0], torch.fx.Node):
            onnx_tensor_or_tensor_tuple = fx_name_to_onnxscipt_value[node.args[0].name]
            onnxscript_graph.register_outputs(onnx_tensor_or_tensor_tuple)
@ -389,7 +388,6 @@ def _export_fx_node_to_onnxscript(
 def export_fx_to_onnxscript(
    fx_module_with_metadata: torch.fx.GraphModule, options: options.ExportOptions
 ):
-
    # Initialize the ONNX graph
    onnxscript_graph = graph_building.TorchScriptGraph()
    tracer = graph_building.TorchScriptTracingEvaluator(onnxscript_graph)
--- a/torch/onnx/_internal/fx/symbolic_exporter.py
+++ b/torch/onnx/_internal/fx/symbolic_exporter.py
@ -253,7 +253,6 @@ def export_without_parameters_and_buffers(
    Tuple[Any, ...],
    Tuple[Any, ...],
 ]:
-
    graph_module, bound_args = _trace_into_fx_graph_via_fx_symbolic_trace(
        module, *args, **kwargs
    )
--- a/torch/onnx/_internal/onnx_proto_utils.py
+++ b/torch/onnx/_internal/onnx_proto_utils.py
@ -292,7 +292,6 @@ def _find_onnxscript_op(


 def _convert_tensor_to_numpy(input: Any) -> Any:
-
    try:
        import numpy as np
    except ImportError:
--- a/torch/onnx/symbolic_opset14.py
+++ b/torch/onnx/symbolic_opset14.py
@ -68,7 +68,6 @@ def batch_norm(
    eps,
    cudnn_enabled,
 ):
-
    if (
        torch.is_autocast_enabled()
        and not symbolic_helper.args_have_same_dtype(
--- a/torch/onnx/symbolic_opset9.py
+++ b/torch/onnx/symbolic_opset9.py
@ -1324,7 +1324,6 @@ def _op_with_optional_float_cast(g: jit_utils.GraphContext, op_name, *args, **kw

    if require_cast:
        for input in inputs:
-
            if input.isCompleteTensor():
                input_scalar_type = _type_utils.JitScalarType.from_value(input)
                if input_scalar_type != dtype_0:
@ -4484,7 +4483,6 @@ def _generic_rnn(
    batch_first=None,
    batch_sizes=None,
 ):
-
    warnings.warn(
        "Exporting a model to ONNX with a batch_size other than 1, "
        + "with a variable length with "
--- a/torch/package/_directory_reader.py
+++ b/torch/package/_directory_reader.py
@ -5,6 +5,7 @@ from typing import cast
 import torch
 from torch.types import Storage

+
 # because get_storage_from_record returns a tensor!?
 class _HasStorage:
    def __init__(self, storage):
--- a/torch/package/_importlib.py
+++ b/torch/package/_importlib.py
@ -16,6 +16,7 @@ _zip_searchorder = (
    (".py", False),
 )

+
 # Replace any occurrences of '\r\n?' in the input string with '\n'.
 # This converts DOS and Mac line endings to Unix line endings.
 def _normalize_line_endings(source):
--- a/torch/package/package_exporter.py
+++ b/torch/package/package_exporter.py
@ -916,7 +916,6 @@ class PackageExporter:

    def _persistent_id(self, obj):
        if torch.is_storage(obj) or isinstance(obj, torch.storage.TypedStorage):
-
            storage: Storage
            if isinstance(obj, torch.storage.TypedStorage):
                # TODO: Once we decide to break serialization FC, we can
--- a/torch/testing/_internal/opinfo/core.py
+++ b/torch/testing/_internal/opinfo/core.py
@ -615,6 +615,7 @@ class AliasInfo:
 #   the great majority of PyTorch's (public) operators.
 #

+
 # Classes and methods for the operator database
@dataclass
 class OpInfo:
@ -1549,6 +1550,7 @@ def make_error_inputs_elementwise_binary(error_inputs_func):

 # The following functions and classes are for testing elementwise binary operators.

+
 # Returns a generator of pairs of contiguous tensors on the requested device
 #   and with the requested dtype.
 #
@ -1997,7 +1999,6 @@ class BinaryUfuncInfo(OpInfo):
        supports_two_python_scalars=False,  # Whether the operator allows scalar x scalar inputs
        **kwargs,
    ):
-
        self._original_binary_ufunc_args = locals().copy()

        # Elementwise binary operations perform the equivalent of test_numpy_refs
@ -2144,7 +2145,6 @@ def _filter_unary_elementwise_tensor(a, *, op):


 def generate_elementwise_unary_tensors(op, *, device, dtype, requires_grad, **kwargs):
-
    # Special-cases bool
    if dtype is torch.bool:
        tensors = (
@ -2491,7 +2491,6 @@ class SpectralFuncInfo(OpInfo):
        decorators=None,
        **kwargs,
    ):
-
        self._original_spectral_func_args = dict(locals()).copy()
        self._original_spectral_func_args.update(kwargs)

--- a/torch/testing/_internal/opinfo/definitions/_masked.py
+++ b/torch/testing/_internal/opinfo/definitions/_masked.py
@ -29,6 +29,7 @@ from torch.testing._internal.opinfo.core import (
 )
 from torch.testing._internal.opinfo.utils import prod_numpy, reference_reduction_numpy

+
 # Used for log_softmax, softmax, softmin
 def sample_inputs_softmax_variant(
    op_info,
--- a/torch/testing/_internal/opinfo/definitions/fft.py
+++ b/torch/testing/_internal/opinfo/definitions/fft.py
@ -53,7 +53,6 @@ class SpectralFuncPythonRefInfo(SpectralFuncInfo):
        supports_nvfuser=True,
        **kwargs,
    ):  # additional kwargs override kwargs inherited from the torch opinfo
-
        self.torch_opinfo_name = torch_opinfo_name
        self.torch_opinfo = _find_referenced_opinfo(
            torch_opinfo_name, torch_opinfo_variant, op_db=op_db
--- a/torch/testing/_internal/opinfo/definitions/special.py
+++ b/torch/testing/_internal/opinfo/definitions/special.py
@ -36,6 +36,7 @@ from torch.testing._internal.opinfo.utils import (
 if TEST_SCIPY:
    import scipy.special

+
 # TODO: Consolidate `i0e` with sample_inputs_unary when `make_tensor`,
 #       supports `exclude` argument.
 #       For more context: https://github.com/pytorch/pytorch/pull/56352#discussion_r633277617
--- a/torch/testing/_internal/opinfo/refs.py
+++ b/torch/testing/_internal/opinfo/refs.py
@ -103,7 +103,6 @@ class PythonRefInfo(OpInfo):
        supports_nvfuser=True,
        **kwargs,
    ):  # additional kwargs override kwargs inherited from the torch opinfo
-
        self.torch_opinfo_name = torch_opinfo_name
        self.torch_opinfo_variant_name = torch_opinfo_variant_name
        self.torch_opinfo = _find_referenced_opinfo(
@ -134,7 +133,6 @@ class ReductionPythonRefInfo(ReductionOpInfo):
        supports_nvfuser=True,
        **kwargs,
    ):  # additional kwargs override kwargs inherited from the torch opinfo
-
        self.torch_opinfo_name = torch_opinfo_name
        self.torch_opinfo_variant_name = torch_opinfo_variant_name
        self.torch_opinfo = _find_referenced_opinfo(
@ -169,7 +167,6 @@ class ElementwiseUnaryPythonRefInfo(UnaryUfuncInfo):
        supports_nvfuser=True,
        **kwargs,
    ):  # additional kwargs override kwargs inherited from the torch opinfo
-
        self.torch_opinfo_name = torch_opinfo_name
        self.torch_opinfo_variant_name = torch_opinfo_variant_name
        self.torch_opinfo = _find_referenced_opinfo(
@ -201,7 +198,6 @@ class ElementwiseBinaryPythonRefInfo(BinaryUfuncInfo):
        supports_nvfuser=True,
        **kwargs,
    ):  # additional kwargs override kwargs inherited from the torch opinfo
-
        self.torch_opinfo_name = torch_opinfo_name
        self.torch_opinfo_variant_name = torch_opinfo_variant_name
        self.torch_opinfo = _find_referenced_opinfo(
--- a/torch/utils/_sympy/reference.py
+++ b/torch/utils/_sympy/reference.py
@ -1,5 +1,6 @@
 import sympy

+
 # The normal Python interpretation of the operators
 # NB: For magic methods this needs to use normal magic methods
 # so that test_magic_methods works
--- a/torchgen/api/cpp.py
+++ b/torchgen/api/cpp.py
@ -313,6 +313,7 @@ JIT_TO_CPP_DEFAULT = {
    "long": "at::kLong",
 }

+
 # Convert a JIT default into C++ expression representing the default
 def default_expr(d: str, t: Type, *, symint: bool) -> str:
    if d == "None" and str(t) == "Tensor?":
--- a/torchgen/api/functionalization.py
+++ b/torchgen/api/functionalization.py
@ -69,6 +69,7 @@ reapply_views_binding = Binding(
    default=None,
 )

+
 # The lambda capture itself doesn't have a name.
 # The name returned here corresponds to the name of the inner function called by the lambda.
 def name(
--- a/Show More
+++ b/Show More