From a0ac63cbd9e32705e67a4f90df5de8363d0c56ff Mon Sep 17 00:00:00 2001
From: Aaron Gokaslan <aaronGokaslan@gmail.com>
Date: Tue, 18 Mar 2025 00:46:07 +0000
Subject: [PATCH] [BE]: Apply ruff PERF403 to use dict comprehensions more
 often (#149257)

Fixes #ISSUE_NUMBER

Pull Request resolved: https://github.com/pytorch/pytorch/pull/149257
Approved by: https://github.com/jansel
---
 .github/scripts/trymerge.py                     |  7 +++----
 benchmarks/operator_benchmark/benchmark_core.py |  3 +--
 pyproject.toml                                  |  1 -
 test/inductor/test_custom_post_grad_passes.py   |  4 +---
 test/test_fx.py                                 |  4 +---
 test/test_jit.py                                |  3 +--
 test/test_linalg.py                             |  4 +---
 torch/_export/serde/serialize.py                |  4 +---
 torch/_functorch/partitioners.py                |  4 +---
 torch/_functorch/top_operators_github_usage.py  |  7 ++-----
 torch/_inductor/autotune_process.py             |  5 +----
 torch/_inductor/memory.py                       |  6 +++---
 torch/_inductor/runtime/triton_heuristics.py    |  3 +--
 torch/_inductor/utils.py                        |  7 ++-----
 torch/ao/ns/_numeric_suite.py                   |  4 +---
 torch/ao/quantization/_correct_bias.py          |  9 +++++----
 .../ao/quantization/fx/qconfig_mapping_utils.py |  6 ++----
 torch/distributed/checkpoint/state_dict.py      |  3 +--
 torch/distributed/fsdp/_optim_utils.py          | 17 ++++++++---------
 torch/distributed/tensor/_ops/_common_rules.py  |  7 ++-----
 torch/onnx/_internal/_exporter_legacy.py        | 15 ++++++++-------
 torch/testing/_internal/common_cuda.py          |  3 +--
 torch/testing/_internal/common_mkldnn.py        |  3 +--
 23 files changed, 48 insertions(+), 81 deletions(-)

diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py
index ed799c4321b..38ca8fda317 100755
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@@ -819,10 +819,9 @@ class GitHubPR:
                     cursor=info["reviews"]["pageInfo"]["startCursor"],
                 )
                 info = rc["data"]["repository"]["pullRequest"]
-        reviews = {}
-        for author, state in self._reviews:
-            if state != "COMMENTED":
-                reviews[author] = state
+        reviews = {
+            author: state for author, state in self._reviews if state != "COMMENTED"
+        }
         return list(reviews.items())
 
     def get_approved_by(self) -> list[str]:
diff --git a/benchmarks/operator_benchmark/benchmark_core.py b/benchmarks/operator_benchmark/benchmark_core.py
index 8d91f4bf475..045a52a5076 100644
--- a/benchmarks/operator_benchmark/benchmark_core.py
+++ b/benchmarks/operator_benchmark/benchmark_core.py
@@ -296,8 +296,7 @@ class BenchmarkRunner:
             (key.strip(), value.strip())
             for key, value in map(lambda str: str.split(":"), key_vals)  # noqa: C417
         ]  # ['M: (32, 16)', 'ZPB: 2'] -> [('M', '(32, 16)'), ('ZPB', '2')]
-        for key, value in key_vals:
-            out[key] = value
+        out.update(key_vals)
 
         return out
 
diff --git a/pyproject.toml b/pyproject.toml
index 8274273157e..a09aead31b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,6 @@ ignore = [
     # these ignores are from ruff PERF; please fix!
     "PERF203",
     "PERF401",
-    "PERF403",
     # these ignores are from PYI; please fix!
     "PYI024",
     "PYI036",
diff --git a/test/inductor/test_custom_post_grad_passes.py b/test/inductor/test_custom_post_grad_passes.py
index 457bbcdb82e..2994b4109e6 100644
--- a/test/inductor/test_custom_post_grad_passes.py
+++ b/test/inductor/test_custom_post_grad_passes.py
@@ -219,9 +219,7 @@ class TestPostGradCustomPrePostPass(TestCustomPassBase):
             for m in matmuls:
                 rhs_vals[m.args[1]].add(m)
 
-            order = {}
-            for idx, n in enumerate(graph.nodes):
-                order[n] = idx
+            order = {n: idx for idx, n in enumerate(graph.nodes)}
 
             for rhs, matmuls in rhs_vals.items():
                 if len(matmuls) == 1:
diff --git a/test/test_fx.py b/test/test_fx.py
index 07401118c42..5b54025d8d3 100644
--- a/test/test_fx.py
+++ b/test/test_fx.py
@@ -2324,9 +2324,7 @@ class TestFX(JitTestCase):
 
         copied_graph = copy.deepcopy(g)
 
-        val_map = {}
-        for orig_node, new_node in zip(g.nodes, copied_graph.nodes):
-            val_map[orig_node] = new_node
+        val_map = dict(zip(g.nodes, copied_graph.nodes))
 
         for orig_node, new_node in zip(g.nodes, copied_graph.nodes):
             orig_users = set(orig_node.users.keys())
diff --git a/test/test_jit.py b/test/test_jit.py
index 0a49ab36871..3af3521f4fc 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -1761,8 +1761,7 @@ graph(%Ra, %Rb):
         for node in g.nodes():
             n_ = g2.createClone(node, lambda x: g_to_g2[x])
             g2.appendNode(n_)
-            for o, no in zip(node.outputs(), n_.outputs()):
-                g_to_g2[o] = no
+            g_to_g2.update(zip(node.outputs(), n_.outputs()))
 
         for node in g.outputs():
             g2.registerOutput(g_to_g2[node])
diff --git a/test/test_linalg.py b/test/test_linalg.py
index bbd7a092671..0cece1a1e1a 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -91,9 +91,7 @@ def tunableop_matmul(device, dtype):
 
 def get_tunableop_validators():
     assert len(torch.cuda.tunable.get_validators()) > 0
-    validators = {}
-    for key, value in torch.cuda.tunable.get_validators():
-        validators[key] = value
+    validators = dict(torch.cuda.tunable.get_validators())
     return validators
 
 class TestLinalg(TestCase):
diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py
index 0506f6ab26f..14cc7d2731b 100644
--- a/torch/_export/serde/serialize.py
+++ b/torch/_export/serde/serialize.py
@@ -1564,9 +1564,7 @@ class ExportedProgramSerializer(metaclass=Final):
         # TODO: Directly serialize exported_program.constants once
         # CustomClassHolders get stored in the ExportedProgram rather than in
         # the graph
-        constants: dict[str, Any] = {}
-        for n, c in gm_serializer.custom_objs.items():
-            constants[n] = c
+        constants: dict[str, Any] = gm_serializer.custom_objs.copy()
         for n, t in exported_program.constants.items():
             assert n not in constants
             constants[n] = t
diff --git a/torch/_functorch/partitioners.py b/torch/_functorch/partitioners.py
index 790ddebf4d9..58374ec70f4 100644
--- a/torch/_functorch/partitioners.py
+++ b/torch/_functorch/partitioners.py
@@ -580,9 +580,7 @@ def reordering_to_mimic_autograd_engine(gm: fx.GraphModule) -> fx.GraphModule:
     for node in gm.graph.find_nodes(op="placeholder"):
         env[node] = new_graph.node_copy(node, lambda x: env[x])
 
-    order = {}
-    for idx, node in enumerate(gm.graph.nodes):
-        order[node] = idx
+    order = {node: idx for idx, node in enumerate(gm.graph.nodes)}
 
     def insert_node_in_graph(node):
         cur_nodes = [node]
diff --git a/torch/_functorch/top_operators_github_usage.py b/torch/_functorch/top_operators_github_usage.py
index 1fcdbe0b41a..6290a155500 100644
--- a/torch/_functorch/top_operators_github_usage.py
+++ b/torch/_functorch/top_operators_github_usage.py
@@ -625,8 +625,5 @@ def get_nn_functional_top_list():
     return top_nn_functional_
 
 
-usage_count = {}
-for k, v in get_nn_functional_top_list():
-    usage_count[k] = v
-for k, v in top_torch:
-    usage_count[k] = v
+usage_count = dict(get_nn_functional_top_list())
+usage_count.update(top_torch)
diff --git a/torch/_inductor/autotune_process.py b/torch/_inductor/autotune_process.py
index 0faca77e4b0..5b0369ab98e 100644
--- a/torch/_inductor/autotune_process.py
+++ b/torch/_inductor/autotune_process.py
@@ -398,12 +398,9 @@ class TuningProcessPool:
         assert self.processes is not None, "Tuning process pool is not initialized"
         assert self.executor is not None
 
-        results = {}
-
         # Use a ThreadExecutorPool to spread the work across the subprocesses and
         # to grab subprocesses as soon as they're free.
-        for choice, result in zip(choices, self.executor.map(self.target, choices)):
-            results[choice] = result
+        results = dict(zip(choices, self.executor.map(self.target, choices)))
 
         return results
 
diff --git a/torch/_inductor/memory.py b/torch/_inductor/memory.py
index 83a927e8c5f..fb3bd2abd85 100644
--- a/torch/_inductor/memory.py
+++ b/torch/_inductor/memory.py
@@ -267,9 +267,9 @@ def estimate_peak_memory(
 
     # get the execution step of each node, this will be used to determine
     # the end_step of buffers
-    node_to_step: dict[BaseSchedulerNode, int] = dict()
-    for step, node in enumerate(nodes):
-        node_to_step[node] = step
+    node_to_step: dict[BaseSchedulerNode, int] = {
+        node: step for step, node in enumerate(nodes)
+    }
 
     # get buffers' size and liveliness information
     buf_info_list: list[BufferInfo] = []
diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py
index ced8b95ca2b..a5242dda345 100644
--- a/torch/_inductor/runtime/triton_heuristics.py
+++ b/torch/_inductor/runtime/triton_heuristics.py
@@ -154,8 +154,7 @@ def _dump_launch_params(args, kwargs, launcher, kernel_name, grid):
         else:
             call_kwargs[k] = v
     if not triton_version_uses_attrs_dict():
-        for k, v in launcher.config.kwargs.items():
-            call_kwargs[k] = v
+        call_kwargs.update(launcher.config.kwargs)
     call_kwargs["num_warps"] = launcher.config.num_warps
     call_kwargs["num_stages"] = launcher.config.num_stages
     args_str = [*call_args]
diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
index 54c4c1ecf0b..4133ac9b295 100644
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@@ -340,8 +340,7 @@ def _type_of(key: Optional[torch.dtype]) -> str:
         "uint64": "u64",
     }
     # reinterpret can create triton type
-    for v in list(tys.values()):
-        tys[v] = v
+    tys.update({v: v for v in list(tys.values())})
     return key if isinstance(key, str) else f"*{tys[dtype_str]}"
 
 
@@ -635,9 +634,7 @@ def get_kernel_metadata(
             single_graph = inductor_nodes[0].graph
             # create a map of idx -> node and cache it
             if not hasattr(single_graph, "_inductor_kernel_metadata_node_to_idx_map"):
-                node_to_idx_map = {}
-                for idx, n in enumerate(single_graph.nodes):
-                    node_to_idx_map[n] = idx
+                node_to_idx_map = {n: idx for idx, n in enumerate(single_graph.nodes)}
                 single_graph._inductor_kernel_metadata_node_to_idx_map = node_to_idx_map  # type: ignore[attr-defined]
             inductor_nodes.sort(
                 key=lambda n: single_graph._inductor_kernel_metadata_node_to_idx_map[n]  # type: ignore[attr-defined]
diff --git a/torch/ao/ns/_numeric_suite.py b/torch/ao/ns/_numeric_suite.py
index 49d51fdcd4e..71ce96e4a38 100644
--- a/torch/ao/ns/_numeric_suite.py
+++ b/torch/ao/ns/_numeric_suite.py
@@ -368,9 +368,7 @@ def prepare_model_with_stubs(
         "quantization_api._numeric_suite.prepare_model_with_stubs"
     )
 
-    float_module_children = {}
-    for name, mod in float_module.named_children():
-        float_module_children[name] = mod
+    float_module_children = dict(float_module.named_children())
 
     reassign = {}
     for name, mod in q_module.named_children():
diff --git a/torch/ao/quantization/_correct_bias.py b/torch/ao/quantization/_correct_bias.py
index e1623ae8ee5..3f480486893 100644
--- a/torch/ao/quantization/_correct_bias.py
+++ b/torch/ao/quantization/_correct_bias.py
@@ -119,10 +119,11 @@ def bias_correction(
         float_model, quantized_model, _supported_modules, MeanShadowLogger
     )
 
-    uncorrected_modules = {}
-    for name, submodule in quantized_model.named_modules():
-        if type(submodule) in target_modules:
-            uncorrected_modules[name] = submodule
+    uncorrected_modules = {
+        name: submodule
+        for name, submodule in quantized_model.named_modules()
+        if type(submodule) in target_modules
+    }
 
     for uncorrected_module in uncorrected_modules:
         quantized_submodule = get_module(quantized_model, uncorrected_module)
diff --git a/torch/ao/quantization/fx/qconfig_mapping_utils.py b/torch/ao/quantization/fx/qconfig_mapping_utils.py
index 47d30e42466..ff45c15946d 100644
--- a/torch/ao/quantization/fx/qconfig_mapping_utils.py
+++ b/torch/ao/quantization/fx/qconfig_mapping_utils.py
@@ -376,10 +376,8 @@ def _get_flattened_qconfig_dict(
     flattened: dict[Union[Callable, str], QConfigAny] = {
         "": qconfig_mapping.global_qconfig
     }
-    for obj, qconfig in qconfig_mapping.object_type_qconfigs.items():
-        flattened[obj] = qconfig
-    for obj, qconfig in qconfig_mapping.module_name_qconfigs.items():
-        flattened[obj] = qconfig
+    flattened.update(qconfig_mapping.object_type_qconfigs)
+    flattened.update(qconfig_mapping.module_name_qconfigs)  # type: ignore[arg-type]
     return flattened
 
 
diff --git a/torch/distributed/checkpoint/state_dict.py b/torch/distributed/checkpoint/state_dict.py
index 033528093c8..8b4827371da 100644
--- a/torch/distributed/checkpoint/state_dict.py
+++ b/torch/distributed/checkpoint/state_dict.py
@@ -596,8 +596,7 @@ def _load_model_state_dict(
             )
         elif info.full_state_dict:
             _distribute_state_dict(state_dict, local_state_dict, device=devices.pop())
-        for fqn, local_state in local_state_dict.items():
-            state_dict[fqn] = local_state
+        state_dict.update(local_state_dict)
 
     with info.fsdp_context():
         return cast(
diff --git a/torch/distributed/fsdp/_optim_utils.py b/torch/distributed/fsdp/_optim_utils.py
index de33ed8ef3f..64d56a33916 100644
--- a/torch/distributed/fsdp/_optim_utils.py
+++ b/torch/distributed/fsdp/_optim_utils.py
@@ -314,11 +314,9 @@ def _unflatten_communicated_optim_state(
             unflat_state_param[state_name] = optim_state
 
         # Add zero-dimension tensor state: take the target rank's value
-        for state_name, zero_dim_tensor in sorted_items(zero_dim_tensor_state):
-            unflat_state_param[state_name] = zero_dim_tensor
+        unflat_state_param.update(sorted_items(zero_dim_tensor_state))
         # Add non-tensor state: take the target rank's value
-        for state_name, non_tensor in sorted_items(non_tensor_state):
-            unflat_state_param[state_name] = non_tensor
+        unflat_state_param.update(sorted_items(non_tensor_state))
         unflat_param_state.append(unflat_state_param)
     return unflat_param_state
 
@@ -1827,11 +1825,12 @@ def _convert_state_with_flat_params(
             )
             if to_save:
                 assert len(unflat_state) == len(optim_state_key.unflat_param_names)
-                for unflat_param_name, unflat_param_state in zip(
-                    optim_state_key.unflat_param_names,
-                    unflat_state,
-                ):
-                    fsdp_osd_state[unflat_param_name] = unflat_param_state
+                fsdp_osd_state.update(
+                    zip(
+                        optim_state_key.unflat_param_names,
+                        unflat_state,
+                    )
+                )
         elif to_save:
             assert len(optim_state_key.unflat_param_names) == 1
             unflat_param_name = optim_state_key.unflat_param_names[0]
diff --git a/torch/distributed/tensor/_ops/_common_rules.py b/torch/distributed/tensor/_ops/_common_rules.py
index 6a5b472685e..7a2f500fca7 100644
--- a/torch/distributed/tensor/_ops/_common_rules.py
+++ b/torch/distributed/tensor/_ops/_common_rules.py
@@ -265,7 +265,6 @@ def pointwise_rule(op_schema: OpSchema, linearity: bool = False) -> OutputShardi
     # check if we replace the all inputs dim char with singleton dimension,
     # if we replace all inputs, we also need to replace the output dimension.
     for output_dim_idx in range(len(out_dimchars)):
-        out_dimchar = out_dimchars[output_dim_idx]
         if singleton_counter[output_dim_idx] == len(input_specs):
             out_dimchars = _replace_char_in_str(out_dimchars, "1", output_dim_idx)
 
@@ -274,12 +273,10 @@ def pointwise_rule(op_schema: OpSchema, linearity: bool = False) -> OutputShardi
     enforce_sharding: dict[str, int] = {}
     if _is_inplace_op(op_schema.op):
         # inplace op should keep the input sharding it writes to
-        for out_dimchar, mesh_dim in zip(out_dimchars, input_specs[0].dim_map):
-            enforce_sharding[out_dimchar] = mesh_dim
+        enforce_sharding.update(zip(out_dimchars, input_specs[0].dim_map))
     elif _is_out_variant_op(op_schema.op):
         out_spec = cast(DTensorSpec, op_schema.kwargs_schema["out"])
-        for out_dimchar, mesh_dim in zip(out_dimchars, out_spec.dim_map):
-            enforce_sharding[out_dimchar] = mesh_dim
+        enforce_sharding.update(zip(out_dimchars, out_spec.dim_map))
 
     return einop_rule(
         fmt,
diff --git a/torch/onnx/_internal/_exporter_legacy.py b/torch/onnx/_internal/_exporter_legacy.py
index 86e91d7b974..3051d2da55a 100644
--- a/torch/onnx/_internal/_exporter_legacy.py
+++ b/torch/onnx/_internal/_exporter_legacy.py
@@ -596,13 +596,14 @@ class Exporter:
             # not valid.
             # Concrete data is expected to be filled for those initializers later during `ONNXProgram.save`.
             if self.options.fake_context is not None:
-                initializers_with_real_tensors: dict[str, torch.Tensor] = {}
-                for (
-                    initializer_name,
-                    initializer,
-                ) in onnxscript_graph.initializers.items():
-                    if not isinstance(initializer, torch._subclasses.FakeTensor):
-                        initializers_with_real_tensors[initializer_name] = initializer
+                initializers_with_real_tensors: dict[str, torch.Tensor] = {
+                    initializer_name: initializer
+                    for (
+                        initializer_name,
+                        initializer,
+                    ) in onnxscript_graph.initializers.items()
+                    if not isinstance(initializer, torch._subclasses.FakeTensor)
+                }
                 onnxscript_graph.initializers = initializers_with_real_tensors
 
             # Export TorchScript graph to ONNX ModelProto.
diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py
index ffa4a61ebf4..0963a45464b 100644
--- a/torch/testing/_internal/common_cuda.py
+++ b/torch/testing/_internal/common_cuda.py
@@ -217,8 +217,7 @@ def tf32_on_and_off(tf32_precision=1e-5):
 
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
-            for k, v in zip(arg_names, args):
-                kwargs[k] = v
+            kwargs.update(zip(arg_names, args))
             cond = torch.cuda.is_tf32_supported()
             if 'device' in kwargs:
                 cond = cond and (torch.device(kwargs['device']).type == 'cuda')
diff --git a/torch/testing/_internal/common_mkldnn.py b/torch/testing/_internal/common_mkldnn.py
index 4a9d01cf9cd..f9a05cf807a 100644
--- a/torch/testing/_internal/common_mkldnn.py
+++ b/torch/testing/_internal/common_mkldnn.py
@@ -60,8 +60,7 @@ def bf32_on_and_off(bf32_precision=1e-5):
 
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
-            for k, v in zip(arg_names, args):
-                kwargs[k] = v
+            kwargs.update(zip(arg_names, args))
             cond = bf32_is_not_fp32()
             if "device" in kwargs:
                 cond = cond and (torch.device(kwargs["device"]).type == "cpu")