diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py index 070d26a4699..e23e049e3bb 100644 --- a/torch/_dynamo/eval_frame.py +++ b/torch/_dynamo/eval_frame.py @@ -1791,7 +1791,7 @@ def rewrite_signature( for i, val in enumerate(sources): dict_of_source_vals[id(val)] = i - for i, val in enumerate(candidates): + for val in candidates: if isinstance(val, tuple(common_constant_types)): matched_elements_positions.append(None) elif id(val) not in dict_of_source_vals: diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 0c6a6f31991..e2e86b03e56 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -319,7 +319,7 @@ class GuardManagerWrapper: is_diff_guard_node = ( node.get_source() in self.diff_guard_sources or node.fail_count() > 0 ) - for idx, (key_mgr, val_mgr) in sorted( + for _idx, (key_mgr, val_mgr) in sorted( node.get_key_value_managers().items() ): is_diff_guard_node |= visit(key_mgr) | visit(val_mgr) @@ -442,7 +442,7 @@ class GuardManagerWrapper: is_subtree_tag_safe = True # Recurse to get the tag safe roots from subtree. - for idx, (key_mgr, val_mgr) in sorted( + for _idx, (key_mgr, val_mgr) in sorted( node.get_key_value_managers().items() ): if key_mgr is not None: @@ -450,9 +450,7 @@ class GuardManagerWrapper: if val_mgr is not None: tag_safe_roots.extend(visit(val_mgr)) - for idx, (key_mgr, val_mgr) in sorted( - node.get_key_value_managers().items() - ): + for key_mgr, val_mgr in node.get_key_value_managers().values(): if key_mgr: is_subtree_tag_safe &= key_mgr.is_tag_safe() diff --git a/torch/_dynamo/variables/optimizer.py b/torch/_dynamo/variables/optimizer.py index 776f7f34d9c..18f75833551 100644 --- a/torch/_dynamo/variables/optimizer.py +++ b/torch/_dynamo/variables/optimizer.py @@ -289,9 +289,7 @@ class OptimizerVariable(UserDefinedObjectVariable): params_vt = group_vt.getitem_const(tx, ConstantVariable.create("params")) all_static = True non_static_grads = [] - for p_ind, (p, p_vt) in enumerate( - zip(group["params"], params_vt.unpack_var_sequence(tx)) - ): + for p, p_vt in zip(group["params"], params_vt.unpack_var_sequence(tx)): param_source = p_vt.source self.tensor_to_source[p] = param_source grad_source = GradSource( @@ -322,12 +320,12 @@ class OptimizerVariable(UserDefinedObjectVariable): # We have to again iterate over the state dict to collect the # tensor_to_source dict. This is used for the finalizer. - for idx, (p, value) in enumerate(self.value.state.items()): + for idx, value in enumerate(self.value.state.values()): p_state_source = DictGetItemSource( state_source, ConstDictKeySource(state_source, idx) ) tx.output.guard_on_key_order.add(p_state_source) - for inner_idx, (k, v) in enumerate(value.items()): + for inner_idx, v in enumerate(value.values()): if ( isinstance(v, torch.Tensor) and v not in self.grad_to_source diff --git a/torch/_functorch/_aot_autograd/collect_metadata_analysis.py b/torch/_functorch/_aot_autograd/collect_metadata_analysis.py index 6f0a76d5d6f..11cef0f9205 100644 --- a/torch/_functorch/_aot_autograd/collect_metadata_analysis.py +++ b/torch/_functorch/_aot_autograd/collect_metadata_analysis.py @@ -240,7 +240,7 @@ def run_functionalized_fw_and_collect_metadata( # Inspect the state of the input tensor functional wrapper to detect input mutation info # If inp[i] has a metadata-only mutation, then maybe_inputs_with_mutated_metadata[i] contains the updated version - for i, (arg, f_arg) in enumerate(zip(flat_args, flat_f_args)): + for arg, f_arg in zip(flat_args, flat_f_args): # NB: Mutation of non-contiguous tensor subclass input can result in a mismatch in # strides between the functionalized arg inner tensors and non-functionalized arg inner # tensors. This is a problem as the inner tensor stride change may not be reflected diff --git a/torch/_functorch/_aot_autograd/runtime_wrappers.py b/torch/_functorch/_aot_autograd/runtime_wrappers.py index 33aea13c336..4846f1ca74e 100644 --- a/torch/_functorch/_aot_autograd/runtime_wrappers.py +++ b/torch/_functorch/_aot_autograd/runtime_wrappers.py @@ -2041,7 +2041,7 @@ To fix this, your tensor subclass must implement the dunder method __force_to_sa assert len(meta.attrs) == len(runtime_subclass_keys) leaves = [] - for i, (attr, attr_meta) in enumerate(meta.attrs.items()): + for attr, attr_meta in meta.attrs.items(): elem = getattr(x, attr) new_elem, elem_leaves = AOTDispatchAutograd.process_runtime_tangent( elem, attr_meta diff --git a/torch/_functorch/_aot_autograd/subclass_parametrization.py b/torch/_functorch/_aot_autograd/subclass_parametrization.py index 3b7f80114bb..0ea6635a62e 100644 --- a/torch/_functorch/_aot_autograd/subclass_parametrization.py +++ b/torch/_functorch/_aot_autograd/subclass_parametrization.py @@ -98,7 +98,7 @@ def unwrap_tensor_subclass_parameters(module: torch.nn.Module) -> torch.nn.Modul module, name, UnwrapTensorSubclass() ) - for name, child in module.named_children(): + for child in module.children(): unwrap_tensor_subclass_parameters(child) return module diff --git a/torch/_functorch/partitioners.py b/torch/_functorch/partitioners.py index 8e194a0f0ce..d7b59ad6075 100644 --- a/torch/_functorch/partitioners.py +++ b/torch/_functorch/partitioners.py @@ -1481,9 +1481,7 @@ def functionalize_rng_ops( ) ) - for rng_count, (base_node, node_pair) in enumerate( - recomputable_rng_ops_map.items() - ): + for rng_count, node_pair in enumerate(recomputable_rng_ops_map.values()): # Step 2 - Modify the fwd pass such that fw_node = node_pair["fwd"] bw_node = node_pair["bwd"] @@ -2714,9 +2712,7 @@ def thread_graphsafe_rng_from_hops(module, is_backward): subgraph = getattr(module, hop_node.args[0].target) if isinstance(subgraph, fx.GraphModule): new_rng_inputs = [] - for idx, placeholder_node in enumerate( - subgraph.graph.find_nodes(op="placeholder") - ): + for placeholder_node in subgraph.graph.find_nodes(op="placeholder"): if rng_string in placeholder_node.name: # Found a rng state placeholder in the hop graph, lets add # the corresponding node in the outer graph diff --git a/torch/_functorch/pyfunctorch.py b/torch/_functorch/pyfunctorch.py index 0a811ed86c2..b76cd191c3c 100644 --- a/torch/_functorch/pyfunctorch.py +++ b/torch/_functorch/pyfunctorch.py @@ -116,7 +116,7 @@ def temporarily_restore_interpreter_stack(stack): pushed.append(s) yield finally: - for s in reversed(pushed): + for _ in reversed(pushed): # TODO: would be nice to assert that the layers are the same, but # Python object identity is not preserved pop_dynamic_layer_stack() diff --git a/torch/_higher_order_ops/utils.py b/torch/_higher_order_ops/utils.py index 9028964baa9..160e149fd76 100644 --- a/torch/_higher_order_ops/utils.py +++ b/torch/_higher_order_ops/utils.py @@ -907,7 +907,7 @@ def diff_tensor_meta( try: if val1 != val2: pair_diffs.append(f"'{meta_name}: {val1} vs {val2}'") - except GuardOnDataDependentSymNode as _: + except GuardOnDataDependentSymNode: pair_diffs.append(f"'{meta_name}: {val1} vs {val2}'") continue return pair_diffs @@ -1197,7 +1197,7 @@ def materialize_callable_in_args(op: HopInstance, args, kwargs): # call_op preserves ordering of proxies via schema materialized_args = [] - for i, (proxy, arg) in enumerate(zip(arg_proxies, schema.arguments)): + for i, proxy in enumerate(arg_proxies): if ( isinstance(proxy, torch.fx.Node) and proxy.op == "get_attr" diff --git a/torch/_higher_order_ops/while_loop.py b/torch/_higher_order_ops/while_loop.py index 1fe172433e5..148f4c516bb 100644 --- a/torch/_higher_order_ops/while_loop.py +++ b/torch/_higher_order_ops/while_loop.py @@ -316,7 +316,7 @@ def while_loop_dense( if stack_output: outs: list[torch.Tensor] = [] - for i, out in enumerate(outputs): + for out in outputs: outs.append(torch.stack(out, dim=0)) return tuple(outs) diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index 512efeb6336..b6ef9006f8d 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -2606,7 +2606,7 @@ def custom_op_wrapper(op: str, *args: Any) -> list[c_void_p] | c_void_p | None: if isinstance(result, (list, tuple)): # unsafe_alloc_void_ptrs_from_tensors expects result contains tensor only result = [torch.tensor([]) if r is None else r for r in result] - for i, r in enumerate(result): + for r in result: assert isinstance(r, torch.Tensor), op + " returns a list of non-tensors" return torch._C._aoti.unsafe_alloc_void_ptrs_from_tensors(result) # type: ignore[arg-type] diff --git a/torch/_inductor/codegen/mps.py b/torch/_inductor/codegen/mps.py index 268d044db6b..d01533382df 100644 --- a/torch/_inductor/codegen/mps.py +++ b/torch/_inductor/codegen/mps.py @@ -895,7 +895,7 @@ class MetalKernel(SIMDKernel): else: dtype_str = self.dtype_to_str(dtype) code.writeline(f"constant {dtype_str}* {inner},") - for outer, inner in self.args.sizevars.items(): + for inner in self.args.sizevars.values(): code.writeline(f"constant long& {inner},") # Write dynamic values as inputs diff --git a/torch/_inductor/codegen/multi_kernel.py b/torch/_inductor/codegen/multi_kernel.py index 9bd0d780f82..57990416d84 100644 --- a/torch/_inductor/codegen/multi_kernel.py +++ b/torch/_inductor/codegen/multi_kernel.py @@ -218,7 +218,7 @@ class MultiKernel: # the multi call kernel. multi_call_args = call_args multi_call_arg_types = arg_types - for i, kernel in enumerate(self.kernels): + for kernel in self.kernels: additional_call_args, additional_arg_types = ( kernel.additional_call_args_and_types() ) diff --git a/torch/_inductor/codegen/triton_combo_kernel.py b/torch/_inductor/codegen/triton_combo_kernel.py index 7778498237c..498492aa92e 100644 --- a/torch/_inductor/codegen/triton_combo_kernel.py +++ b/torch/_inductor/codegen/triton_combo_kernel.py @@ -717,7 +717,7 @@ class ComboKernel(Kernel): self, name: str, call_args: list[Any], arg_types: list[Any] ) -> None: for num, sub_kernel in enumerate(self.sub_kernels): - for i, tree in enumerate(sub_kernel.range_trees): + for tree in sub_kernel.range_trees: numel_name = f"{tree.prefix}numel_{num}" if numel_name not in self.dynamic_shape_args: continue @@ -735,7 +735,7 @@ class ComboKernel(Kernel): def kernel_benchmark_extra_args(self) -> list[str]: extra_args = [] for num, sub_kernel in enumerate(self.sub_kernels): - for i, tree in enumerate(sub_kernel.range_trees): + for tree in sub_kernel.range_trees: numel_name = f"{tree.prefix}numel_{num}" if numel_name not in self.dynamic_shape_args: continue @@ -1018,7 +1018,7 @@ class ComboKernel(Kernel): for num, sub_kernel in enumerate(self.sub_kernels): meta[f"no_x_dim_{num}"] = sub_kernel.no_x_dim - for i, tree in enumerate(sub_kernel.range_trees): + for tree in sub_kernel.range_trees: # pyrefly: ignore [missing-argument] if not tree.is_reduction: numel_name = f"{tree.prefix}numel_{num}" diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py index afc782386e3..829f3ac974d 100644 --- a/torch/_inductor/codegen/wrapper.py +++ b/torch/_inductor/codegen/wrapper.py @@ -3604,16 +3604,12 @@ class PythonWrapperCodegen(CodeGen): self.writeline("if not should_loop:") if stack_output: # Handle the case when loop never executes - for i, (carried_input, carried_buf) in enumerate( - zip(outer_carried_inputs, while_loop.carried_inputs) - ): + for i, carried_input in enumerate(outer_carried_inputs): self.writeline(EnterSubgraphLine(self, while_loop.body_subgraph.graph)) self.writeline(f"{name}[{i}] = {carried_input}.unsqueeze(0).clone()") self.writeline(ExitSubgraphLine(self)) else: - for i, (carried_input, carried_buf) in enumerate( - zip(outer_carried_inputs, while_loop.carried_inputs) - ): + for i, carried_input in enumerate(outer_carried_inputs): self.writeline(EnterSubgraphLine(self, while_loop.body_subgraph.graph)) self.writeline(f"{name}[{i}] = {carried_input}.clone()") self.writeline(ExitSubgraphLine(self)) diff --git a/torch/_inductor/comms.py b/torch/_inductor/comms.py index f063d911b2a..5a1e39bf710 100644 --- a/torch/_inductor/comms.py +++ b/torch/_inductor/comms.py @@ -424,10 +424,7 @@ def _reorder_communication_preserving_peak_memory_internal( return # Candidate becomes last use of some bufs - for ( - gn, - bufs, - ) in group_n_to_bufs_after_swap_dealloc_by_candidate.items(): + for bufs in group_n_to_bufs_after_swap_dealloc_by_candidate.values(): for buf in bufs: buf_to_snode_last_use[buf] = candidate @@ -840,7 +837,7 @@ def _schedule_for_comm( else: schedule(snode) - for snode, deps in unmet_deps.items(): + for deps in unmet_deps.values(): assert len(deps) == 0, ( f"Detected unscheduled nodes. Nodes with unmet dependencies: {unmet_deps}" ) @@ -1552,11 +1549,8 @@ Graph: {graph} node.args = new_args # Delete `fsdp.copy_(unsharded_param, Y)` nodes - for ( - unsharded_param, - fsdp_copy_node_idxes, - ) in unsharded_param_to_fsdp_copy_node_idxes.items(): - for i, fsdp_copy_node_idx in enumerate(fsdp_copy_node_idxes): + for fsdp_copy_node_idxes in unsharded_param_to_fsdp_copy_node_idxes.values(): + for fsdp_copy_node_idx in fsdp_copy_node_idxes: fsdp_copy_node = node_list[fsdp_copy_node_idx] graph.erase_node(fsdp_copy_node) diff --git a/torch/_inductor/comms_debug.py b/torch/_inductor/comms_debug.py index b6012828b87..20c9779a4ef 100644 --- a/torch/_inductor/comms_debug.py +++ b/torch/_inductor/comms_debug.py @@ -46,7 +46,7 @@ def _debug_iterative_memory_recompute( if iter_cm != new_cm: log = "ITERATIVE CURR MEMORY CANDIDATE DOES NOT MATCH" iterative_recompute_error = True - for i, gn in enumerate(gns): + for gn in gns: iter_gnm = iter_curr_memory[gn] new_gnm = est_curr_memory[gn] if iter_gnm != new_gnm: @@ -65,7 +65,7 @@ def _debug_iterative_memory_recompute( f"\nCANDIDATE_NEW_ALLOCFREE:{snodes_allocfree[candidate]}" ) peak_log = "" - for i, (pre, post) in enumerate(snodes_curr_memory): + for i, (pre, _post) in enumerate(snodes_curr_memory): if est_peak_memory == pre: n = snodes[i] peak_log = ( diff --git a/torch/_inductor/fx_passes/post_grad.py b/torch/_inductor/fx_passes/post_grad.py index 330fb34e8bb..7d5079c922f 100644 --- a/torch/_inductor/fx_passes/post_grad.py +++ b/torch/_inductor/fx_passes/post_grad.py @@ -454,7 +454,7 @@ def decompose_map_to_while_loop(gm: torch.fx.GraphModule): graph_pass.apply(gm) - for node in gm.graph.find_nodes( + for _node in gm.graph.find_nodes( op="call_function", target=torch.ops.higher_order.map_impl ): raise AssertionError("map is not lowered to while_loop") @@ -666,7 +666,7 @@ def decompose_scan_to_while_loop(gm: torch.fx.GraphModule): graph_pass.apply(gm) - for node in gm.graph.find_nodes( + for _node in gm.graph.find_nodes( op="call_function", target=torch.ops.higher_order.scan ): raise AssertionError("scan is not lowered to while_loop") @@ -1265,7 +1265,7 @@ def decompose_triton_kernel_wrapper_functional(graph): graph_pass.apply(graph) - for node in graph.find_nodes( + for _ in graph.find_nodes( op="call_function", target=torch.ops.higher_order.triton_kernel_wrapper_functional, ): diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index a5da990e4ba..55f582c5d78 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -8770,9 +8770,7 @@ class WhileLoop(ExternKernel): seen_buffers: OrderedSet[int] = OrderedSet() result: list[Union[IRNode, TensorBox, ShapeAsConstantBuffer]] = [] - for i, (original_input, unwrapped_buffer) in enumerate( - zip(carried_inputs, unwrapped_buffers) - ): + for original_input, unwrapped_buffer in zip(carried_inputs, unwrapped_buffers): if id(unwrapped_buffer) in seen_buffers: result.append(ExternKernel.copy_input(original_input)) else: diff --git a/torch/_inductor/pattern_matcher.py b/torch/_inductor/pattern_matcher.py index c457a4a863f..d1604672950 100644 --- a/torch/_inductor/pattern_matcher.py +++ b/torch/_inductor/pattern_matcher.py @@ -743,7 +743,7 @@ class _TargetArgsExpr(_TargetExpr): assert len(node_items) == len(self_items) m = Match(ctx, self) - for i, pattern, child_node in zip(itertools.count(), self_items, node_items): + for pattern, child_node in zip(self_items, node_items): if isinstance(pattern, PatternExpr): child_match = ctx.match(pattern, child_node) if not is_match(child_match): diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py index bce826ef479..a5b3f6b6832 100644 --- a/torch/_inductor/scheduler.py +++ b/torch/_inductor/scheduler.py @@ -2850,7 +2850,7 @@ class Scheduler: # NB: None means that the dependency is on an input. Don't actually # generate a dependency because if we do, Inductor will start trying # to free the unbacked int but that's pointless - for name, val in V.graph.graph_inputs.items(): + for val in V.graph.graph_inputs.values(): if isinstance(val, sympy.Expr): for fs in val.free_symbols: unbacked_symbol_to_origin_node[fs] = None @@ -3550,9 +3550,7 @@ class Scheduler: future_choices: list[tuple[Any, Optional[LambdaFuture], ModuleType]] = [] for hint_override in config.multi_kernel_hints: choice_timings = multi_node.choice_timings(hint_override) - for choice, unfused_time in sorted( - choice_timings.items(), key=lambda x: x[1] - ): + for choice, _ in sorted(choice_timings.items(), key=lambda x: x[1]): if not isinstance( choice, torch._inductor.select_algorithm.TritonTemplateCaller ): diff --git a/torch/_inductor/tiling_utils.py b/torch/_inductor/tiling_utils.py index 4327637a872..8b4bab3bca9 100644 --- a/torch/_inductor/tiling_utils.py +++ b/torch/_inductor/tiling_utils.py @@ -425,7 +425,7 @@ def apply_var_mapping( new_ranges, norm_pw_vars + norm_red_vars, strict=True ): range_vars = [] - for i in range(len(new_range)): + for _ in range(len(new_range)): range_vars.append(flat_vars[count]) count += 1 diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index ab1914deac8..3162e002a75 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -348,7 +348,7 @@ def _do_bench_using_profiling( ] ) as p: # Benchmark - for i in range(n_repeat): + for _ in range(n_repeat): # we clear the L2 cache before each run cache.zero_() # record time of `fn` diff --git a/torch/_subclasses/fake_tensor.py b/torch/_subclasses/fake_tensor.py index bf1ed1ff2b1..5fb1213c6d2 100644 --- a/torch/_subclasses/fake_tensor.py +++ b/torch/_subclasses/fake_tensor.py @@ -3118,7 +3118,7 @@ def _validate_symbolic_output_for_caching( if is_tracing: # Check for SymNode types in PROXY mode - this should bypass caching # regardless of whether symbols are known or not - for node in _iterate_nodes(output): + for _ in _iterate_nodes(output): raise _BypassDispatchCache("Proxy mode with SymNode output") else: # Check for unrepresented symbols in tensor expressions diff --git a/torch/ao/ns/_numeric_suite.py b/torch/ao/ns/_numeric_suite.py index 1c8e751b1eb..a4b873cb7d1 100644 --- a/torch/ao/ns/_numeric_suite.py +++ b/torch/ao/ns/_numeric_suite.py @@ -137,7 +137,7 @@ def _get_logger_dict_helper( def get_prefix(prefix): return prefix if prefix == "" else prefix + "." - for name, child in mod.named_children(): + for child in mod.children(): if isinstance(child, Logger): target_dict[get_prefix(prefix) + "stats"] = child.stats break diff --git a/torch/ao/ns/fx/graph_passes.py b/torch/ao/ns/fx/graph_passes.py index 9a93e9ad582..1b172649944 100644 --- a/torch/ao/ns/fx/graph_passes.py +++ b/torch/ao/ns/fx/graph_passes.py @@ -909,8 +909,7 @@ def create_a_shadows_b( # is added prev_node_c_list = [env_c[arg.name] for arg in prev_node_b] - for arg_idx, arg in enumerate(prev_node_b): - prev_node_c = prev_node_c_list[arg_idx] + for arg_idx, prev_node_c in enumerate(prev_node_c_list): env_c[prev_node_c.name] = _insert_logger_after_node( prev_node_c, gm_b, diff --git a/torch/ao/quantization/_correct_bias.py b/torch/ao/quantization/_correct_bias.py index 3f480486893..4309e4530cb 100644 --- a/torch/ao/quantization/_correct_bias.py +++ b/torch/ao/quantization/_correct_bias.py @@ -151,6 +151,6 @@ def bias_correction( bias.data = updated_bias # Resets the data contained in the loggers - for name, submodule in quantized_model.named_modules(): + for submodule in quantized_model.modules(): if isinstance(submodule, MeanShadowLogger): submodule.clear() diff --git a/torch/autograd/gradcheck.py b/torch/autograd/gradcheck.py index 53c8c28af97..053be3450d6 100644 --- a/torch/autograd/gradcheck.py +++ b/torch/autograd/gradcheck.py @@ -297,7 +297,7 @@ def _get_numerical_jacobian( inp_indices = [ i for i, a in enumerate(target) if is_tensor_like(a) and a.requires_grad ] - for i, (inp, inp_idx) in enumerate(zip(_iter_tensors(target, True), inp_indices)): + for inp, inp_idx in zip(_iter_tensors(target, True), inp_indices): jacobians += [ get_numerical_jacobian_wrt_specific_input( fn, @@ -549,7 +549,7 @@ def _get_analytical_jacobian_forward_ad( with fwAD.dual_level(): fw_grads = [] dual_inputs = [] - for i, inp in enumerate(inputs): + for inp in inputs: if is_tensor_like(inp) and inp.requires_grad: if inp.layout == torch._mkldnn: # type: ignore[attr-defined] raise ValueError( @@ -1275,7 +1275,7 @@ def _test_undefined_forward_mode(func, outputs, inputs): tensor_indices.add(i) dual_inputs.append(inp) - for i, (fw_grad, u) in enumerate(zip(fw_grads, all_u)): + for fw_grad, u in zip(fw_grads, all_u): fw_grad.copy_(u.view_as(fw_grad)) for idx, inp in enumerate(inputs): diff --git a/torch/distributed/_serialization.py b/torch/distributed/_serialization.py index d9c3bfe6b8d..c13ba46ba57 100644 --- a/torch/distributed/_serialization.py +++ b/torch/distributed/_serialization.py @@ -41,7 +41,7 @@ class _PseudoZipFile: pickle.dump(entries, f, protocol=DEFAULT_PROTOCOL) - for key, (data, length) in self.records.items(): + for data, _ in self.records.values(): if isinstance(data, bytes): f.write(data) elif isinstance(data, str): diff --git a/torch/distributed/checkpoint/state_dict.py b/torch/distributed/checkpoint/state_dict.py index 479027a2ea9..16d988a7910 100644 --- a/torch/distributed/checkpoint/state_dict.py +++ b/torch/distributed/checkpoint/state_dict.py @@ -578,7 +578,7 @@ def _load_model_state_dict( assign = False if info.broadcast_from_rank0 or info.full_state_dict: devices = set() - for key, value in local_state_dict.items(): + for value in local_state_dict.values(): if torch.is_tensor(value) and value.dim() > 0: devices.add(value.device) # In lora state_dict, there could be multiple devices, with meta device inside. diff --git a/torch/distributed/fsdp/_flat_param.py b/torch/distributed/fsdp/_flat_param.py index 8adde16de6b..85e4c23d509 100644 --- a/torch/distributed/fsdp/_flat_param.py +++ b/torch/distributed/fsdp/_flat_param.py @@ -2087,14 +2087,14 @@ class FlatParamHandle: param.grad.data = view else: param.grad = view - for i, ( + for ( param_name, module, module_name, prim_param_name, prim_module, _, - ) in enumerate(self.flat_param._shared_param_infos): + ) in self.flat_param._shared_param_infos: _p_assert( hasattr(module, param_name), f"{module_name + '.' + param_name if module_name else param_name} is missing", @@ -2171,11 +2171,8 @@ class FlatParamHandle: param.data = flat_param[offset : offset + numel_in_shard] if self.flat_param._shared_params is None: raise AssertionError("Expected _shared_params to be not None") - for i, ( - param, - (param_name, module, _, prim_param_name, prim_module, _), - ) in enumerate( - zip(self.flat_param._shared_params, self.flat_param._shared_param_infos) + for param, (param_name, module, _, prim_param_name, prim_module, _) in zip( + self.flat_param._shared_params, self.flat_param._shared_param_infos ): self._setattr_param(module, param_name, param) prim_param = getattr(prim_module, prim_param_name) @@ -2388,14 +2385,14 @@ class FlatParamHandle: # TODO: If we want to handle shared parameters, we need to re-generate # the shared parameter data structures in case sharedness changed. - for i, ( + for ( param_name, module, _, prim_param_name, prim_module, _, - ) in enumerate(flat_param._shared_param_infos): + ) in flat_param._shared_param_infos: if getattr(module, param_name) is not getattr(prim_module, prim_param_name): raise NotImplementedError( "Changing shared parameters is not supported yet" diff --git a/torch/distributed/pipelining/_IR.py b/torch/distributed/pipelining/_IR.py index 52e56dd3f95..16ef54ede01 100644 --- a/torch/distributed/pipelining/_IR.py +++ b/torch/distributed/pipelining/_IR.py @@ -924,7 +924,7 @@ class Pipe(torch.nn.Module): pass # This is done by (1) `_sink_params` at each submodule; - for name, submod in split.named_children(): + for submod in split.children(): if isinstance(submod, fx.GraphModule): _sink_params(submod, inputs_to_state, []) submod.graph.lint() diff --git a/torch/distributed/tensor/_api.py b/torch/distributed/tensor/_api.py index 865de11dacc..05abf41ed49 100644 --- a/torch/distributed/tensor/_api.py +++ b/torch/distributed/tensor/_api.py @@ -967,7 +967,7 @@ def distribute_module( if partition_fn is None: # if partition_fn not specified, we by default replicate # all module params/buffers - for name, submod in module.named_modules(): + for submod in module.modules(): replicate_module_params_buffers(submod, device_mesh) else: # apply partition_fun to submodules diff --git a/torch/distributed/tensor/_ops/_einsum_strategy.py b/torch/distributed/tensor/_ops/_einsum_strategy.py index 14dcb395b9b..9d46ede21f9 100644 --- a/torch/distributed/tensor/_ops/_einsum_strategy.py +++ b/torch/distributed/tensor/_ops/_einsum_strategy.py @@ -170,7 +170,7 @@ def gen_einsum_strategies( # linearity strategy if linearity: linearity_placement_list: list[Placement] = [Partial()] - for input_dim in input_dims: + for _ in input_dims: linearity_placement_list.append(Partial()) strategies_over_one_mesh_dim.append(linearity_placement_list) diff --git a/torch/export/dynamic_shapes.py b/torch/export/dynamic_shapes.py index e362e833424..24964c27472 100644 --- a/torch/export/dynamic_shapes.py +++ b/torch/export/dynamic_shapes.py @@ -1332,7 +1332,7 @@ def refine_dynamic_shapes_from_suggested_fixes( roots.add(c.root.__name__) # type: ignore[attr-defined] # check keys are existing dims or new roots - for k, c in shape_fixes.items(): + for k in shape_fixes.keys(): assert k in name_to_dim or k in roots # cache so we don't produce multiple derived dim objects diff --git a/torch/fx/experimental/graph_gradual_typechecker.py b/torch/fx/experimental/graph_gradual_typechecker.py index d1ca9bc0c88..a6e9a270578 100644 --- a/torch/fx/experimental/graph_gradual_typechecker.py +++ b/torch/fx/experimental/graph_gradual_typechecker.py @@ -101,11 +101,11 @@ def broadcast_types(t1, t2): # We make the types the same length which is the first requirement # for consistency if s1 > s2: - for i in range(s1 - s2): + for _ in range(s1 - s2): new_t2.insert(0, 1) elif s2 > s1: - for i in range(s2 - s1): + for _ in range(s2 - s1): new_t1.insert(0, 1) # we replace occurrences of "1" with each tensor with diff --git a/torch/fx/experimental/sym_node.py b/torch/fx/experimental/sym_node.py index a617d4fe558..d07d235e513 100644 --- a/torch/fx/experimental/sym_node.py +++ b/torch/fx/experimental/sym_node.py @@ -1871,7 +1871,7 @@ def _make_user_magic(method, user_type): setattrs(user_type, f"__r{method_name}__", rbinary_magic_impl) -for method, func in magic_methods.items(): # type: ignore[assignment] +for method in magic_methods.keys(): # type: ignore[assignment] if method in only_bool_magic_methods: _make_user_magic(method, SymBool) continue diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 010539d9ff7..fb55839fd4e 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -3342,7 +3342,7 @@ class DimConstraints: # alter derivations that depend on old root, to unify to new root # e.g. dx=3*_dx+1, dy=dx+1 -> dy=3*_dx+2 for old_root in introduced_roots.values(): - for k, c in list(results.items()): + for c in results.values(): if ( "eq" in c and isinstance(c["eq"], sympy.Expr) diff --git a/torch/jit/_script.py b/torch/jit/_script.py index 8b2ecf566a3..0a09884c952 100644 --- a/torch/jit/_script.py +++ b/torch/jit/_script.py @@ -1066,7 +1066,7 @@ def call_prepare_scriptable_func_impl(obj, memo): else: new_obj_dict[name] = sub_module - for k, v in new_obj_dict.items(): + for v in new_obj_dict.values(): obj.__dict__[name] = v return obj diff --git a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py index 72c5074eb38..e1b34469fbf 100644 --- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py +++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py @@ -6099,7 +6099,7 @@ def index_add(g: jit_utils.GraphContext, self, dim, index, other, alpha=None): if other_dim_rank != self_dim_rank: delta = self_dim_rank - other_dim_rank - for i in range(delta): + for _ in range(delta): other = symbolic_helper._unsqueeze_helper( g, other, [symbolic_helper._get_tensor_rank(other)] ) @@ -6126,10 +6126,10 @@ def index_add(g: jit_utils.GraphContext, self, dim, index, other, alpha=None): ) other = expand_as(g, other, new_shape) - for i in range(dim): + for _ in range(dim): index = symbolic_helper._unsqueeze_helper(g, index, [0]) - for i in range(self_dim_rank - dim - 1): + for _ in range(self_dim_rank - dim - 1): index = symbolic_helper._unsqueeze_helper( g, index, [symbolic_helper._get_tensor_rank(index)] ) diff --git a/torch/onnx/ops/_symbolic_impl.py b/torch/onnx/ops/_symbolic_impl.py index 4876612ad97..aafe9c00828 100644 --- a/torch/onnx/ops/_symbolic_impl.py +++ b/torch/onnx/ops/_symbolic_impl.py @@ -78,7 +78,7 @@ class EncodedAttrs: attr_floats=[], attr_strs=[], ) - for i, (k, v) in enumerate(attrs.items()): + for k, v in attrs.items(): encoded.attr_keys.append(k) if isinstance(v, int): start_pos = len(encoded.attr_ints) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 0cecc762bce..47517e8ff9b 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -445,11 +445,9 @@ def sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs): ) # Checking for permutations of weights and biases as `None` - weights = [channels, None, None] - biases = [None, channels, None] is_training = [True, False, False] - for weight, bias, training in zip(weights, biases, is_training, strict=True): + for training in is_training: yield SampleInput( make_arg(input_shape), args=( diff --git a/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py b/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py index ca9bc297010..32498f6d149 100644 --- a/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py +++ b/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py @@ -465,7 +465,7 @@ class DdpUnderDistAutogradTest(RpcAgentTestFixture): ) # Destroy process groups - for idx, trainer_rref in enumerate(trainer_rrefs): + for trainer_rref in trainer_rrefs: _remote_method_async(Trainer.destroy_pg, trainer_rref).wait() # Send shutdown signals. diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index 499341b0795..7ae7f9a5994 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -6094,7 +6094,7 @@ class DistributedTest: dim=1, ).cuda(rank) - for i in range(100): + for _ in range(100): y = model(input_var[rank].cuda(rank)) y.mean().backward() diff --git a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py index 3c5c9101e43..1b371d3ee6e 100644 --- a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py +++ b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py @@ -1988,7 +1988,7 @@ class DistAutogradTest(CommonDistAutogradTest): self.assertEqual(self.world_size - 1, len(known_context_ids)) t1 = torch.rand((3, 3), requires_grad=True) - for i in range(100): + for _ in range(100): dst = self._next_rank() t1 = rpc.rpc_sync(worker_name(dst), torch.add, args=(t1, t1)) diff --git a/torch/testing/_internal/triton_utils.py b/torch/testing/_internal/triton_utils.py index 0964c68ebb2..a0fcbaee30f 100644 --- a/torch/testing/_internal/triton_utils.py +++ b/torch/testing/_internal/triton_utils.py @@ -823,7 +823,7 @@ if has_triton(): mask = offsets < n_elements x = tl.load(in_ptr0 + offsets, mask=mask) y = tl.load(in_ptr1 + offsets, mask=mask) - for i in range(2): + for _ in range(2): output = x + y tl.store(out_ptr + offsets, output, mask=mask) i = 2 diff --git a/torch/utils/data/dataframes_pipes.ipynb b/torch/utils/data/dataframes_pipes.ipynb index 2f995aab05a..bc4abeba15b 100644 --- a/torch/utils/data/dataframes_pipes.ipynb +++ b/torch/utils/data/dataframes_pipes.ipynb @@ -355,7 +355,7 @@ "dp = dp.shuffle()\n", "dp = dp.batch(2)\n", "print(\"Iterate over DataFrame batches\")\n", - "for i,v in enumerate(dp):\n", + "for v in dp:\n", " print(v)\n", "\n", "# this is similar to batching of regular DataPipe\n",