diff --git a/test/ao/sparsity/test_kernels.py b/test/ao/sparsity/test_kernels.py index 7e4337ba431..1fb8d46adf9 100644 --- a/test/ao/sparsity/test_kernels.py +++ b/test/ao/sparsity/test_kernels.py @@ -147,7 +147,6 @@ def _sparse_layer_test_helper( W_zp = 0 X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32) - float_bias = torch.randn(output_channels, dtype=torch.float32) # generate a weight which we'll insert into the model W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32) diff --git a/test/ao/sparsity/test_qlinear_packed_params.py b/test/ao/sparsity/test_qlinear_packed_params.py index eb186d4245f..1c4c58a9366 100644 --- a/test/ao/sparsity/test_qlinear_packed_params.py +++ b/test/ao/sparsity/test_qlinear_packed_params.py @@ -30,7 +30,6 @@ class TestQlinearPackedParams(TestCase): row_block_size = 1 col_block_size = 4 out_features = weight_fp32.shape[0] - in_features = weight_fp32.shape[1] scales = [2.0, 6.0, 12.0] zero_points = [ @@ -201,14 +200,11 @@ class TestQlinearPackedParams(TestCase): row_block_size = 1 col_block_size = 4 out_features = weight_fp32.shape[0] - in_features = weight_fp32.shape[1] scales = [2.0, 3.0, 7.0] zero_points = [0 for _ in range(out_features)] dtype = torch.qint8 - x = torch.rand(size=(1, weight_fp32.shape[1])) - def make_lin_get_state_weight_bias_and_save(): weight = torch.quantize_per_tensor( weight_fp32, diff --git a/test/ao/sparsity/test_sparsifier.py b/test/ao/sparsity/test_sparsifier.py index a39f97ad3d5..097d4890dc8 100644 --- a/test/ao/sparsity/test_sparsifier.py +++ b/test/ao/sparsity/test_sparsifier.py @@ -86,7 +86,7 @@ class TestBaseSparsifier(TestCase): sparsifier0.prepare(model0, [{"tensor_fqn": "linear1.weight"}]) mask = model0.linear1.parametrizations["weight"][0].mask mask.data = torch.arange(mask.shape[0] * mask.shape[1]).reshape(mask.shape) - for step in range(step_count): + for _ in range(step_count): sparsifier0.step() state_dict = sparsifier0.state_dict() diff --git a/test/ao/sparsity/test_sparsity_utils.py b/test/ao/sparsity/test_sparsity_utils.py index 0dd7c9722c0..b29be49d571 100644 --- a/test/ao/sparsity/test_sparsity_utils.py +++ b/test/ao/sparsity/test_sparsity_utils.py @@ -124,7 +124,7 @@ class TestSparsityUtilFunctions(TestCase): list_of_modules = [m for _, m in model.named_modules()] + [model] for module in list_of_modules: module_fqn = module_to_fqn(model, module) - for tensor_name, tensor in module.named_parameters(recurse=False): + for tensor_name, _ in module.named_parameters(recurse=False): tensor_fqn = ( module_fqn + ("." if module_fqn != "" else "") + tensor_name ) diff --git a/test/ao/sparsity/test_structured_sparsifier.py b/test/ao/sparsity/test_structured_sparsifier.py index ff4ffa4a308..00fdbed68af 100644 --- a/test/ao/sparsity/test_structured_sparsifier.py +++ b/test/ao/sparsity/test_structured_sparsifier.py @@ -269,7 +269,6 @@ class TestBaseStructuredSparsifier(TestCase): def _test_step_linear_on_device(self, model, device): model = model.to(device) - x = torch.ones(7, 7, device=device) pruner = SimplePruner(None) pruner.prepare(model, None) pruner.enable_mask_update = True @@ -808,7 +807,7 @@ class TestBaseStructuredSparsifier(TestCase): pruned_model = fx_pruner.prune() pruned_model.eval() out_pruned, lstm_out_pruned = pruned_model(lstm_input) - r, c = lstm_out_expected.size() + _, c = lstm_out_expected.size() # We cannot check that y_expected == y_pruned as usual because # zeros vs. missing elements yield different numerical results. @@ -891,7 +890,7 @@ class TestBaseStructuredSparsifier(TestCase): pruned_model = fx_pruner.prune() pruned_model.eval() out_pruned, lstm_out_pruned = pruned_model(lstm_input) - r, c = lstm_out_expected.size() + _, c = lstm_out_expected.size() # We cannot check that y_expected == y_pruned as usual because # zeros vs. missing elements yield different numerical results. diff --git a/test/autograd/test_functional.py b/test/autograd/test_functional.py index 6147ee1989d..5c086162bbc 100644 --- a/test/autograd/test_functional.py +++ b/test/autograd/test_functional.py @@ -670,7 +670,7 @@ class TestAutogradFunctional(TestCase): x = ctors.randn(3) with warnings.catch_warnings(record=True) as wa: - result = api(foo, x, vectorize=True) + api(foo, x, vectorize=True) self.assertEqual(len(wa), 0) @base_and_logging_tensor @@ -762,7 +762,7 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) with self.assertRaisesRegex(RuntimeError, "not supported together"): - res = autogradF.jacobian(foo, inp, strict=True, vectorize=True) + autogradF.jacobian(foo, inp, strict=True, vectorize=True) @base_and_logging_tensor def test_jacobian_no_grad(self, ctors): @@ -1122,7 +1122,7 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) with self.assertRaisesRegex(RuntimeError, "not supported together"): - res = autogradF.hessian(foo, inp, strict=True, vectorize=True) + autogradF.hessian(foo, inp, strict=True, vectorize=True) @base_and_logging_tensor def test_hessian_no_grad(self, ctors): diff --git a/test/bottleneck_test/test_cuda.py b/test/bottleneck_test/test_cuda.py index 5a28fe87a17..d9f9b0b8274 100644 --- a/test/bottleneck_test/test_cuda.py +++ b/test/bottleneck_test/test_cuda.py @@ -18,7 +18,7 @@ def main(): data = torch.randn(10, 50).cuda() model = Model().cuda() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - for i in range(10): + for _ in range(10): optimizer.zero_grad() loss = model(data) loss.backward() diff --git a/test/custom_operator/test_custom_ops.py b/test/custom_operator/test_custom_ops.py index 83bb0572266..38c7349f139 100644 --- a/test/custom_operator/test_custom_ops.py +++ b/test/custom_operator/test_custom_ops.py @@ -78,9 +78,9 @@ def forward(self, arg0_1): x = torch.randn(3, device="meta") self.assertNotIn("my_custom_ops2", sys.modules.keys()) with self.assertRaisesRegex(NotImplementedError, r"'my_custom_ops2'"): - y = torch.ops.custom.sin.default(x) + torch.ops.custom.sin.default(x) torch.ops.import_module("my_custom_ops2") - y = torch.ops.custom.sin.default(x) + torch.ops.custom.sin.default(x) def test_calling_custom_op_string(self): output = ops.custom.op2("abc", "def") diff --git a/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py b/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py index 319e72baafa..4029bdd1af6 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py @@ -35,7 +35,7 @@ class _TestClipGradNormBase(FSDPTest): vector_norm_fn = functools.partial(torch.linalg.vector_norm, ord=norm_type) dp_mesh = dp_mesh or init_device_mesh("cuda", (self.world_size,)) torch.manual_seed(42 + dp_mesh.get_local_rank() + 1) - for iter_idx in range(10): + for _ in range(10): ref_optim.zero_grad() ref_model(inp).sum().backward() optim.zero_grad() diff --git a/test/distributed/_composable/fsdp/test_fully_shard_comm.py b/test/distributed/_composable/fsdp/test_fully_shard_comm.py index 99be766cb43..4b5397c02d2 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_comm.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_comm.py @@ -250,8 +250,8 @@ class TestFullyShardCollectiveOps(FSDPTestMultiThread): self.assertEqual(group.size(), self.world_size) all_reduce_stream = torch.cuda.Stream() ( - reduce_scatter_input, - reduce_scatter_event, + _, + _, post_reduce_event, _, _, @@ -406,7 +406,7 @@ class TestFullyShardCommunication(FSDPTest): torch.manual_seed(42 + self.rank) inp = torch.randint(0, model_args.vocab_size, (2, 16), device="cuda") - for iter_idx in range(10): + for _ in range(10): ref_loss = ref_model(inp).sum() ref_loss.backward() for param in ref_model.parameters(): @@ -501,7 +501,7 @@ class TestFullyShardPrefetch(FSDPTest): self, reshard_after_forward: Union[bool, int], checkpoint_impl: Optional[str] ): n_layers = 3 - model, optim, inp = self._init_transformer( + model, _, inp = self._init_transformer( n_layers, reshard_after_forward, checkpoint_impl ) events: List[EventType] = [] @@ -843,7 +843,7 @@ class TestFullyShardPrefetch(FSDPTest): with patch_unshard(unshard_with_record), patch_post_backward( post_backward_with_record ): - for iter_idx in range(3): + for _ in range(3): loss = model(inp) expected_events = [ ( @@ -922,7 +922,7 @@ class TestFullyShardPrefetch(FSDPTest): with patch_unshard(unshard_with_record), patch_post_backward( post_backward_with_record ): - for iter_idx in range(3): + for _ in range(3): loss = model(inp) expected_events = [ ("unshard", "", TrainingState.FORWARD), diff --git a/test/distributed/_composable/fsdp/test_fully_shard_compile.py b/test/distributed/_composable/fsdp/test_fully_shard_compile.py index 2780bd75a38..381a696a70a 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_compile.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_compile.py @@ -662,7 +662,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]}, def __init__(self, n_layers): super().__init__() self.layers = torch.nn.ModuleList() - for layer_id in range(n_layers): + for _ in range(n_layers): self.layers.append(TestSubmodule(hidden_dim)) def forward(self, x): @@ -684,7 +684,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]}, fsdp_config = {} mesh = init_device_mesh("cuda", (self.world_size,)) model = TestModule(n_layers=3) - for layer_id, mod in enumerate(model.layers): + for mod in model.layers: fully_shard(mod, mesh=mesh, reshard_after_forward=True, **fsdp_config) model = fully_shard( model, mesh=mesh, reshard_after_forward=True, **fsdp_config @@ -871,7 +871,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]}, else: v.requires_grad_(False) assert requires_grad_param_count == n_layers * len(requires_grad_params) - for layer_id, mod in enumerate(model.layers): + for _, mod in enumerate(model.layers): fully_shard(mod, mesh=mesh, reshard_after_forward=True, **fsdp_config) model = fully_shard( model, mesh=mesh, reshard_after_forward=True, **fsdp_config @@ -1087,7 +1087,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]}, setattr(m.encoder, name, new_child) m = FSDP(m, sharding_strategy=ShardingStrategy.FULL_SHARD, use_orig_params=True) inp = torch.randn(32, 784, device="cuda") - out = m(inp) + m(inp) if __name__ == "__main__": diff --git a/test/distributed/_composable/fsdp/test_fully_shard_extensions.py b/test/distributed/_composable/fsdp/test_fully_shard_extensions.py index 522466a7881..7fc1679468b 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_extensions.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_extensions.py @@ -241,7 +241,7 @@ class TestFullyShardAllGatherExtensionsMultiProcess( losses.append(_model(inp).sum()) losses[-1].backward() if _model is ref_model: - for param_name, param in _model.named_parameters(): + for _, param in _model.named_parameters(): dist.all_reduce(param.grad) param.grad.detach().div_(self.world_size) self.assertEqual(losses[0], losses[1]) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_init.py b/test/distributed/_composable/fsdp/test_fully_shard_init.py index 83bf6a245a0..49bd3848a85 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_init.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_init.py @@ -904,7 +904,7 @@ class TestFullyShardProcessGroupInit(FSDPTestMultiThread): ) self.assertEqual(mesh.mesh, ref_mesh.mesh) self.assertEqual(mesh._coordinate_on_dim, ref_mesh._coordinate_on_dim) - for (tag, ranks, group_name), (ref_tag, ref_ranks, ref_group_name) in zip( + for (_, ranks, _), (_, ref_ranks, _) in zip( mesh._dim_group_infos, ref_mesh._dim_group_infos ): # Since we manually constructed new subgroups, the test and ref diff --git a/test/distributed/_composable/fsdp/test_fully_shard_logging.py b/test/distributed/_composable/fsdp/test_fully_shard_logging.py index ba77ce82218..94e57b2fc36 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_logging.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_logging.py @@ -26,7 +26,7 @@ class LoggingTests(LoggingTestCase): env["WORLD_SIZE"] = "1" env["MASTER_PORT"] = "34715" env["MASTER_ADDR"] = "localhost" - stdout, stderr = self.run_process_no_exception( + _, stderr = self.run_process_no_exception( """\ import logging import torch diff --git a/test/distributed/_composable/fsdp/test_fully_shard_training.py b/test/distributed/_composable/fsdp/test_fully_shard_training.py index 550c0633e3f..575a7d6059c 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_training.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_training.py @@ -590,7 +590,7 @@ class TestFullyShard1DTrainingCore(FSDPTest): torch.manual_seed(42 + self.rank) inp = torch.randint(0, model_args.vocab_size, (2, 8), device="cuda") - for iter_idx in range(10): + for _ in range(10): losses: List[torch.Tensor] = [] for _model, _optim in ((ref_model, ref_optim), (model, optim)): _optim.zero_grad() @@ -624,12 +624,12 @@ class TestFullyShard1DTrainingCore(FSDPTest): # sync point after each iteration ref_losses: List[torch.Tensor] = [] losses: List[torch.Tensor] = [] - for iter_idx in range(10): + for _ in range(10): ref_optim.zero_grad() ref_losses.append(ref_model(inp).sum()) ref_losses[-1].backward() ref_optim.step() - for iter_idx in range(10): + for _ in range(10): optim.zero_grad() losses.append(model(inp).sum()) losses[-1].backward() @@ -1185,7 +1185,7 @@ class TestFullyShardNDTraining(FSDPTest): foreach: bool, ): global_mesh = self.init_global_mesh() - pp_mesh, dp_mesh, tp_mesh = ( + _, dp_mesh, tp_mesh = ( global_mesh["pp"], global_mesh["dp"], global_mesh["tp"], @@ -1217,7 +1217,7 @@ class TestFullyShardNDTraining(FSDPTest): _optim.step() self.assertEqual(losses[0], losses[1]) - for n, p in model.named_parameters(): + for _, p in model.named_parameters(): self.assertIsInstance(p, DTensor) self.assertEqual(p.device_mesh.ndim, 2) self.assertEqual(len(p.placements), 2) @@ -1288,7 +1288,7 @@ class TestFullyShardHSDP3DTraining(FSDPTest): _optim.step() self.assertEqual(losses[0], losses[1]) - for n, p in model.named_parameters(): + for _, p in model.named_parameters(): self.assertIsInstance(p, DTensor) self.assertEqual(p.device_mesh.ndim, 3) self.assertEqual(len(p.placements), 3) diff --git a/test/distributed/_composable/test_checkpoint.py b/test/distributed/_composable/test_checkpoint.py index dd04011d0ff..566694931d8 100644 --- a/test/distributed/_composable/test_checkpoint.py +++ b/test/distributed/_composable/test_checkpoint.py @@ -119,7 +119,6 @@ class TestCheckpoint(TestCase): # no checkpoint with MemoryDelta(x.device) as mem1: loss1 = net1(x1).sum() - graph_size1 = self._get_graph_size(loss1) loss1.backward() # with checkpoint diff --git a/test/distributed/_composable/test_composability/test_2d_composability.py b/test/distributed/_composable/test_composability/test_2d_composability.py index 57f7f32f164..c596139c364 100644 --- a/test/distributed/_composable/test_composability/test_2d_composability.py +++ b/test/distributed/_composable/test_composability/test_2d_composability.py @@ -244,7 +244,6 @@ class TestFullyShard2DTraining(FSDPTest): ref_model.parameters(), model.named_parameters() ): full_grad = param.grad.full_tensor() - ref_grad = ref_param.grad self.assertEqual(ref_param.grad, full_grad) ref_optim.step() @@ -285,7 +284,7 @@ class TestFullyShard2DTraining(FSDPTest): # called, but they will just be no-ops without issuing any kernels. # We prefer to keep the no-op check at the c10d level, not in FSDP. inp = torch.randn((4, mlp_dim), device="cuda") # same on all ranks - for iter_idx in range(10): + for _ in range(10): ref_optim.zero_grad() optim.zero_grad() @@ -583,9 +582,7 @@ class TestNew2dParallelTraining(DTensorTestBase): "net1": ColwiseParallel(), "net2": RowwiseParallel(), } - model_2d = parallelize_module( - SimpleModel().cuda(), mesh_2d["tp"], parallelize_plan - ) + parallelize_module(SimpleModel().cuda(), mesh_2d["tp"], parallelize_plan) @with_comms @skip_if_lt_x_gpu(4) @@ -833,7 +830,6 @@ class TestNew2dParallelStateDict(DTensorTestBase): # Create a model without wrapper torch.manual_seed(0) no_wrap_model = simple_model().cuda(self.rank) - no_wrap_state_dict = no_wrap_model.state_dict() no_wrap_optim = torch.optim.Adam(no_wrap_model.parameters(), lr=0.01) no_wrap_model(no_wrap_model.get_input().cuda(self.rank)).sum().backward() no_wrap_optim.step() @@ -890,8 +886,6 @@ class TestNew2dParallelStateDict(DTensorTestBase): set_optimizer_state_dict( model_2d, optimizers=optim_2d, optim_state_dict=ref_optim_2d_osd ) - new_optim_2d_osd = get_optimizer_state_dict(model_2d, optimizers=optim_2d) - ref_optim_2d_osd_states = ref_optim_2d_osd["state"] new_optim_2d_osd_states = optim_2d_osd["state"] diff --git a/test/distributed/_composable/test_composability/test_pp_composability.py b/test/distributed/_composable/test_composability/test_pp_composability.py index aa9265cf94c..c82b08bb21f 100644 --- a/test/distributed/_composable/test_composability/test_pp_composability.py +++ b/test/distributed/_composable/test_composability/test_pp_composability.py @@ -119,7 +119,7 @@ class ComposabilityTest(MultiProcessTestCase): ) @parametrize("use_new_runtime", [False, True]) def test_manual_with_data_parallel(self, dp_type, ScheduleClass, use_new_runtime): - device = torch.device("cuda", self.device) + _device_raii = torch.device("cuda", self.device) torch.cuda.set_device(self.device) store = torch.distributed.FileStore(self.file_name, self.world_size) torch.distributed.init_process_group( @@ -398,7 +398,7 @@ class ComposabilityTest(MultiProcessTestCase): ], ) def test_3d_with_tp_dp_pp(self, ScheduleClass, MixedPrecisionParam): - device = torch.device("cuda", self.device) + _device_raii = torch.device("cuda", self.device) torch.cuda.set_device(self.device) store = torch.distributed.FileStore(self.file_name, self.world_size) torch.distributed.init_process_group( diff --git a/test/distributed/_composable/test_replicate_with_compiler.py b/test/distributed/_composable/test_replicate_with_compiler.py index 8690bef6cc2..91c3ecb4798 100644 --- a/test/distributed/_composable/test_replicate_with_compiler.py +++ b/test/distributed/_composable/test_replicate_with_compiler.py @@ -329,11 +329,11 @@ class ReplicateTest(MultiProcessInductorTestCase): code = self._test_bucketing() self.assertEqual(counters["inductor"]["ddp_buckets"], 3) fc = FileCheck() - for i in range(3): + for _ in range(3): fc.check("cpp_fused_").check( "torch.ops._c10d_functional.all_reduce_coalesced_.default(" ) - for i in range(3): + for _ in range(3): fc.check("torch.ops._c10d_functional.wait_tensor.default") fc.run(code) @@ -342,11 +342,11 @@ class ReplicateTest(MultiProcessInductorTestCase): code = self._test_bucketing(init_process_group=False, loop=2) self.assertEqual(counters["inductor"]["ddp_buckets"], 3) fc = FileCheck() - for i in range(3): + for _ in range(3): fc.check("cpp_fused_").check( "torch.ops._c10d_functional.all_reduce_coalesced_.default(" ) - for i in range(3): + for _ in range(3): fc.check("torch.ops._c10d_functional.wait_tensor.default") fc.run(code) @@ -371,11 +371,11 @@ class ReplicateTest(MultiProcessInductorTestCase): code = self._test_bucketing() self.assertEqual(counters["inductor"]["ddp_buckets"], 3) fc = FileCheck() - for i in range(3): + for _ in range(3): fc.check("aten.flatten.using_ints(").check("cpp_fused_").check( "torch.ops._c10d_functional.all_reduce_.default(" ) - for i in range(3): + for _ in range(3): fc.check("torch.ops._c10d_functional.wait_tensor.default") fc.run(code) @@ -383,11 +383,11 @@ class ReplicateTest(MultiProcessInductorTestCase): code = self._test_bucketing(init_process_group=False, loop=2) self.assertEqual(counters["inductor"]["ddp_buckets"], 3) fc = FileCheck() - for i in range(3): + for _ in range(3): fc.check("aten.flatten.using_ints(").check("cpp_fused_").check( "torch.ops._c10d_functional.all_reduce_.default(" ) - for i in range(3): + for _ in range(3): fc.check("torch.ops._c10d_functional.wait_tensor.default") fc.run(code) diff --git a/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py b/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py index bacb36e47f9..094bc0f53d9 100644 --- a/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py +++ b/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py @@ -129,7 +129,7 @@ class TestShardedTensorBinaryOps(ShardedTensorTestBase): def test_torch_equal(self): """Test torch.equal(ShardedTensor, ShardedTensor)""" - spec, alt_spec = self.get_gpu_specs() + spec, _ = self.get_gpu_specs() st1, st2 = self.get_random_tensors(spec, spec, 10, 10) self.assertTrue(torch.equal(st1, st2)) @@ -145,7 +145,7 @@ class TestShardedTensorBinaryOps(ShardedTensorTestBase): def test_torch_allclose(self): """Test torch.allclose(ShardedTensor, ShardedTensor)""" - spec, alt_spec = self.get_gpu_specs() + spec, _ = self.get_gpu_specs() st1, st2 = self.get_random_tensors(spec, spec, 10, 10) self.assertTrue(torch.allclose(st1, st2)) diff --git a/test/distributed/_shard/sharded_tensor/ops/test_init.py b/test/distributed/_shard/sharded_tensor/ops/test_init.py index bf4cbd924c8..c33136f33ee 100644 --- a/test/distributed/_shard/sharded_tensor/ops/test_init.py +++ b/test/distributed/_shard/sharded_tensor/ops/test_init.py @@ -40,8 +40,6 @@ class TestShardedTensorNNInit(ShardedTensorTestBase): ], ) h, w = 8, 2 - expected_h = 2 - expected_device = torch.device(f"cuda:{self.rank}") a, b = 10, 20 seed = 1234 @@ -75,8 +73,6 @@ class TestShardedTensorNNInit(ShardedTensorTestBase): ], ) h, w = 8, 2 - expected_h = 2 - expected_device = torch.device(f"cuda:{self.rank}") mean, std = 10, 5 seed = 1234 @@ -110,8 +106,6 @@ class TestShardedTensorNNInit(ShardedTensorTestBase): ], ) h, w = 8, 2 - expected_h = 2 - expected_device = torch.device(f"cuda:{self.rank}") a, mode, nonlinearity = 0, "fan_in", "leaky_relu" seed = 1234 diff --git a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py index 730b2c2c0ac..fb8b9778073 100644 --- a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py +++ b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py @@ -456,7 +456,7 @@ class TestLocalTensor(ShardedTensorTestBase): with self.assertRaisesRegex( NotImplementedError, "Only single local shard is supported." ): - local_shard = st.local_tensor() + st.local_tensor() class TestShardedTensorChunked(ShardedTensorTestBase): @@ -981,7 +981,6 @@ class TestShardedTensorChunked(ShardedTensorTestBase): # Validate remote shards. remote_shards = st.remote_shards() self.assertEqual(3, len(remote_shards)) - owners = {} for rpc_rank, shards in remote_shards.items(): self.assertEqual(2, len(shards)) for remote_shard in shards: @@ -1364,14 +1363,14 @@ class TestShardedTensorChunked(ShardedTensorTestBase): with self.assertRaisesRegex(RuntimeError, "Local rank at save time was"): with load_with_process_group(pg): # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load - state_dict_deser = torch.load(buffer, weights_only=False) + torch.load(buffer, weights_only=False) else: with self.assertRaisesRegex( RuntimeError, "Local world size at save time was" ): with load_with_process_group(pg): # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load - state_dict_deser = torch.load(buffer, weights_only=False) + torch.load(buffer, weights_only=False) dist.destroy_process_group() buffer.seek(0) @@ -1379,7 +1378,7 @@ class TestShardedTensorChunked(ShardedTensorTestBase): RuntimeError, "Need to initialize default process group" ): # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load - state_dict_deser = torch.load(buffer, weights_only=False) + torch.load(buffer, weights_only=False) rpc.shutdown() @with_comms @@ -1396,8 +1395,8 @@ class TestShardedTensorChunked(ShardedTensorTestBase): "rank:3/cuda:3", ], ) - st1 = sharded_tensor.empty(spec, 10, 20, init_rrefs=True) - st2 = sharded_tensor.empty(spec, 10, 20) + sharded_tensor.empty(spec, 10, 20, init_rrefs=True) + sharded_tensor.empty(spec, 10, 20) create_tensors() self.assertEqual(0, len(sharded_tensor.api._sharded_tensor_map)) @@ -2204,7 +2203,6 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase): else: self.assertEqual(2, len(remote_shards)) - owners = {} for rpc_rank, shards in remote_shards.items(): self.assertEqual(2, len(shards)) for remote_shard in shards: @@ -2418,10 +2416,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): placement=f"rank:{self.rank}/cuda:{self.rank}", ) with self.assertRaisesRegex(ValueError, "Shard tensor size does not match"): - local_shard_from_wrong_meta = sharded_tensor.Shard( - local_tensor, - metadata=wrong_local_shard_metadata, - ) + sharded_tensor.Shard(local_tensor, metadata=wrong_local_shard_metadata) @with_comms @skip_if_lt_x_gpu(4) @@ -2696,7 +2691,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): empty_local_shards = [] with self.assertRaisesRegex(ValueError, "have no local shards on all ranks"): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( empty_local_shards, [10, 10], init_rrefs=True ) @@ -2706,7 +2701,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): with self.assertRaisesRegex( ValueError, "Only torch.strided layout is currently supported" ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_layout_shards, [10, 10], init_rrefs=True ) @@ -2719,23 +2714,19 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): ValueError, "Only torch.contiguous_format memory_format is currently supported", ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_memory_format_shards, [10, 10], init_rrefs=True ) with self.assertRaisesRegex(ValueError, "Shard tensor size does not match"): - wrong_size_shards = [ - sharded_tensor.Shard( - torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata - ) - ] + sharded_tensor.Shard( + torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata + ) with self.assertRaisesRegex( ValueError, "Local shard tensor device does not match" ): - wrong_device_shards = [ - sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata) - ] + sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata) @with_comms @skip_if_lt_x_gpu(4) @@ -2756,7 +2747,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): ValueError, "ShardedTensor global_size property does not match from different ranks!", ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_dtype_shards, tensor_overall_size, init_rrefs=True ) @@ -2771,7 +2762,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): ValueError, "ShardedTensor dtype property does not match from different ranks!", ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_dtype_shards, [10, 10], init_rrefs=True ) @@ -2788,7 +2779,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): ValueError, "ShardedTensor requires_grad property does not match from different ranks!", ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_requires_grad_shards, [10, 10], init_rrefs=True ) @@ -2818,7 +2809,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): with self.assertRaisesRegex( ValueError, "Local shards' tensor pin_memory property need to be the same" ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_pin_memory_local_shards, [10, 10], init_rrefs=True ) @@ -2832,7 +2823,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): ValueError, "ShardedTensor pin_memory property does not match from different ranks!", ): - st = sharded_tensor.init_from_local_shards( + sharded_tensor.init_from_local_shards( wrong_pin_memory_shards_cross_ranks, [10, 10], init_rrefs=True ) @@ -2945,19 +2936,15 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase): with self.assertRaisesRegex( ValueError, "Shard tensor size does not match with metadata.shard_lengths" ): - wrong_size_shards = [ - sharded_tensor.Shard( - torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata - ) - ] + sharded_tensor.Shard( + torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata + ) with self.assertRaisesRegex( ValueError, "Local shard tensor device does not match with local Shard's placement", ): - wrong_device_shards = [ - sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata) - ] + sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata) wrong_dtype_shards = [ sharded_tensor.Shard( diff --git a/test/distributed/_shard/sharding_plan/test_sharding_plan.py b/test/distributed/_shard/sharding_plan/test_sharding_plan.py index 4625bebf41d..7310c43bb4a 100644 --- a/test/distributed/_shard/sharding_plan/test_sharding_plan.py +++ b/test/distributed/_shard/sharding_plan/test_sharding_plan.py @@ -42,7 +42,7 @@ class ChunkAllShardingPlanner(ShardingPlanner): def build_plan(self, module: nn.Module) -> ShardingPlan: named_params = module.named_parameters() plan = {} - for name, param in named_params: + for name, _ in named_params: plan[name] = ChunkShardingSpec(self.dim, placements=self.devices) return ShardingPlan(plan=plan) diff --git a/test/distributed/_tensor/debug/test_comm_mode.py b/test/distributed/_tensor/debug/test_comm_mode.py index 3428bca2c83..fb194f46197 100644 --- a/test/distributed/_tensor/debug/test_comm_mode.py +++ b/test/distributed/_tensor/debug/test_comm_mode.py @@ -92,7 +92,6 @@ class TestCommMode(TestCase): self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 1) def test_comm_mode_with_dtensor(self): - world_pg = self.world_pg mesh = DeviceMesh(self.device_type, list(range(self.world_size))) def f(x, y): @@ -118,8 +117,6 @@ class TestCommMode(TestCase): if not torch.cuda.is_available(): return - world_pg = self.world_pg - inp = torch.rand(2, 8, 16).cuda() all_gather_out = inp.new_empty(self.world_size * 2, 8, 16) @@ -202,7 +199,7 @@ class TestCommMode(TestCase): self.checksAssert(comm_mode, c10d_ops.reduce_scatter_, 1, 1) # tests c10d reduce_scatter_tensor_coalesced - with comm_mode as A, dist._coalescing_manager() as B: + with comm_mode, dist._coalescing_manager(): dist.reduce_scatter_tensor(all_gather_out, inp) self.checksAssert(comm_mode, c10d_ops.reduce_scatter_tensor_coalesced_, 1, 1) diff --git a/test/distributed/_tensor/debug/test_comm_mode_features.py b/test/distributed/_tensor/debug/test_comm_mode_features.py index fc19cddb58f..aa54491f4ce 100644 --- a/test/distributed/_tensor/debug/test_comm_mode_features.py +++ b/test/distributed/_tensor/debug/test_comm_mode_features.py @@ -251,7 +251,7 @@ class TestCommModeFeatures(DTensorTestBase): comm_mode.comm_module_counts, {"Global": {"forward": {}, "backward": {}}}, ) - output_tp = model(inp) + model(inp) model_args = ModelArgs(dropout_p=0.0) model2 = Transformer(model_args).to(device=self.device_type) @@ -264,7 +264,7 @@ class TestCommModeFeatures(DTensorTestBase): comm_mode = CommDebugMode() with comm_mode: - output = model2(inp) + model2(inp) # checks to see if all collectives were correctly traced at the module-level self.assertEqual( diff --git a/test/distributed/_tensor/test_dtensor.py b/test/distributed/_tensor/test_dtensor.py index bf2613f4e67..0e62bbf2ee8 100644 --- a/test/distributed/_tensor/test_dtensor.py +++ b/test/distributed/_tensor/test_dtensor.py @@ -155,14 +155,12 @@ class DTensorTest(DTensorTestBase): device_mesh = DeviceMesh(self.device_type, list(range(self.world_size))) shard0_spec = [Shard(0)] local_tensor = torch.randn(4, 8) - global_shape = torch.Size([self.world_size * 4, 8]) dist_tensor = DTensor.from_local(local_tensor, device_mesh, shard0_spec) # won't affect stride self.assertEqual(dist_tensor.stride(), (8, 1)) shard1_spec = [Shard(1)] local_tensor = torch.randn(8, 4) - global_shape = torch.Size([8, self.world_size * 4]) dist_tensor = DTensor.from_local(local_tensor, device_mesh, shard1_spec) # will affect stride after DT initialized self.assertEqual(dist_tensor.stride(), (4 * self.world_size, 1)) @@ -170,7 +168,6 @@ class DTensorTest(DTensorTestBase): # if initialized from a transposed mat local_tensor = torch.randn(8, 4, 8) local_tensor_t = local_tensor.permute(1, 2, 0) - global_shape = torch.Size([4, self.world_size * 8, 8]) self.assertEqual(local_tensor_t.stride(), (8, 1, 32)) dist_tensor = DTensor.from_local(local_tensor_t, device_mesh, shard1_spec) global_stride = (8 * self.world_size, 1, 32 * self.world_size) @@ -257,7 +254,7 @@ class DTensorTest(DTensorTestBase): with self.assertRaisesRegex( RuntimeError, "Please pass both shape and stride at the same time." ): - dtensor = DTensor.from_local( + DTensor.from_local( tensor_list[self.rank], device_mesh, (Shard(0),), @@ -267,7 +264,7 @@ class DTensorTest(DTensorTestBase): with self.assertRaisesRegex( RuntimeError, "Please pass both shape and stride at the same time." ): - dtensor = DTensor.from_local( + DTensor.from_local( tensor_list[self.rank], device_mesh, (Shard(0),), @@ -1043,7 +1040,7 @@ class DTensorLogTest(LoggingTestCase): env["MASTER_PORT"] = "12345" env["MASTER_ADDR"] = "localhost" - stdout, stderr = self.run_process_no_exception( + _, stderr = self.run_process_no_exception( """\ import logging import torch diff --git a/test/distributed/_tensor/test_dtensor_compile.py b/test/distributed/_tensor/test_dtensor_compile.py index 17939ba4785..356f5ff2c42 100644 --- a/test/distributed/_tensor/test_dtensor_compile.py +++ b/test/distributed/_tensor/test_dtensor_compile.py @@ -234,8 +234,8 @@ class TestDTensorCompile(torch._dynamo.test_case.TestCase): requires_grad=x.requires_grad, ) - out = fn(x) - out2 = torch.compile(fn, backend="eager")(x) + fn(x) + torch.compile(fn, backend="eager")(x) def test_dtensor_constructor_w_dynamo_disable(self): mesh = DeviceMesh(self.device_type, torch.arange(self.world_size)) @@ -599,7 +599,7 @@ class TestDTensorCompile(torch._dynamo.test_case.TestCase): @torch.compile(backend=cnt) def fn(x): - dt = DTensor.from_local(x, mesh, [placement], run_check=False) + DTensor.from_local(x, mesh, [placement], run_check=False) x = torch.ones(4, 4, requires_grad=True) @@ -659,7 +659,7 @@ class TestDTensorCompile(torch._dynamo.test_case.TestCase): x2 = x_dt.redistribute(mesh, [Replicate()], async_op=True) x2 = x2.to_local() self.assertTrue(isinstance(x2, AsyncCollectiveTensor)) - out = opt_fn(x2) + opt_fn(x2) # The important part: we get a wait_tensor() in the graph. # At runtime, the input to the graph is an AsyncCollectiveTensor, # and inside the graph we need to issue a wait() to synchronize. @@ -880,8 +880,6 @@ class TestDTensorCompileE2E(DTensorTestBase): mesh_dim_names=["dp", "tp"], ) - fsdp_pg = twod_mesh.get_group(mesh_dim=0) - inp = torch.rand(20, 10, device=self.device_type) parallelize_plan = { "mlp_0.net1": ColwiseParallel(), diff --git a/test/distributed/_tensor/test_random_ops.py b/test/distributed/_tensor/test_random_ops.py index d5aa7efb8fa..7b918b76993 100644 --- a/test/distributed/_tensor/test_random_ops.py +++ b/test/distributed/_tensor/test_random_ops.py @@ -249,7 +249,7 @@ class DistTensorRandomOpTest(DTensorTestBase): device_mesh = DeviceMesh(self.device_type, torch.arange(self.world_size)) # seed synchronization happens after the first `distribute_tensor` call - dtensor = distribute_tensor( + distribute_tensor( torch.empty([self.world_size], device="cuda"), device_mesh, [Shard(0)] ) self.assertEqual(seed_from_rank_0, random._rng_tracker.get_seed("parallel-rng")) diff --git a/test/distributed/_tensor/test_redistribute.py b/test/distributed/_tensor/test_redistribute.py index 7b7531692fa..34ae3514ae1 100644 --- a/test/distributed/_tensor/test_redistribute.py +++ b/test/distributed/_tensor/test_redistribute.py @@ -309,7 +309,7 @@ class RedistributeTest(DTensorTestBase): shard_tensor = distribute_tensor(local_tensor, device_mesh, shard_spec) self.assertEqual(shard_tensor.placements[0].dim, 1) reshard_tensor = shard_tensor.redistribute(device_mesh, shard_minus_spec) - self.assertEqual(shard_tensor.placements[0].dim, 1) + self.assertEqual(reshard_tensor.placements[0].dim, 1) @with_comms def test_redistribute_uneven_sharding(self): diff --git a/test/distributed/_tensor/test_tensor_ops.py b/test/distributed/_tensor/test_tensor_ops.py index f9153c126bc..b389c682456 100644 --- a/test/distributed/_tensor/test_tensor_ops.py +++ b/test/distributed/_tensor/test_tensor_ops.py @@ -622,7 +622,7 @@ class DistTensorOpsTest(DTensorTestBase): self.assertEqual(misses, 2) # convert to fp32 again and see if there's cache hit - fp32_sharded_dtensor1 = bf16_sharded_dtensor1.float() + bf16_sharded_dtensor1.float() hits, misses, _, _ = _get_sharding_prop_cache_info() # by now we should have cache hit self.assertEqual(hits, 1) diff --git a/test/distributed/_tensor/test_utils.py b/test/distributed/_tensor/test_utils.py index f9ebf57d1dc..a9798f9d434 100644 --- a/test/distributed/_tensor/test_utils.py +++ b/test/distributed/_tensor/test_utils.py @@ -133,7 +133,6 @@ class UtilTest(DTensorTestBase): global_tensor_shape, global_mesh, placements ) assert global_mesh.get_coordinate is not None - dp_replic_rank = global_mesh.get_local_rank("dp_replic") dp_shard_rank = global_mesh.get_local_rank("dp_shard") tp_rank = global_mesh.get_local_rank("tp") shard_idx_on_dim_0 = tp_rank * dp_shard_size + dp_shard_rank diff --git a/test/distributed/_tensor/test_xla_integration.py b/test/distributed/_tensor/test_xla_integration.py index 970b1a195df..2c11dd16ab9 100644 --- a/test/distributed/_tensor/test_xla_integration.py +++ b/test/distributed/_tensor/test_xla_integration.py @@ -150,7 +150,7 @@ class DTensorXLAIntegrationTest(TestCase): shard_spec = [Shard(0)] # annoate fc1 and fc2 if isinstance(mod, nn.Linear): - for name, param in mod.named_parameters(): + for _, param in mod.named_parameters(): # annotate the parameter tensors directly distribute_tensor(param, mesh, shard_spec) diff --git a/test/distributed/algorithms/quantization/test_quantization.py b/test/distributed/algorithms/quantization/test_quantization.py index 94a1c763474..6713d4cc3f5 100644 --- a/test/distributed/algorithms/quantization/test_quantization.py +++ b/test/distributed/algorithms/quantization/test_quantization.py @@ -1,4 +1,5 @@ # Owner(s): ["oncall: distributed"] +# ruff: noqa: F841 import os import sys diff --git a/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py b/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py index 4b0f3d6e045..a9a67f8b304 100644 --- a/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py +++ b/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py @@ -277,7 +277,7 @@ class TestE2ESaveAndLoad(DTensorTestBase, VerifyStateDictMixin): self.assertEqual(loss, dist_loss) dist_msd, dist_osd = get_state_dict(dist_model, optimizers=dist_optim) - model_sd, optim_sd = get_state_dict(model, optimizers=optim) + model_sd, _ = get_state_dict(model, optimizers=optim) self._verify_msd(model_sd, dist_msd) self._verify_osd_by_load(model, optim, self._optim(model), dist_osd) diff --git a/test/distributed/checkpoint/e2e/test_fine_tuning.py b/test/distributed/checkpoint/e2e/test_fine_tuning.py index b91b48e6f4c..799d304ab54 100644 --- a/test/distributed/checkpoint/e2e/test_fine_tuning.py +++ b/test/distributed/checkpoint/e2e/test_fine_tuning.py @@ -96,7 +96,7 @@ class TestFineTuning(DTensorTestBase): optim = torch.optim.Adam(model.parameters(), lr=1e-3) # Training - for i in range(3): + for _ in range(3): batch = torch.rand(32, DIM, device="cuda") loss = model(batch).sum() loss.backward() @@ -161,7 +161,7 @@ class TestFineTuning(DTensorTestBase): self.assertEqual(i, 0) # Training - for j in range(3): + for _ in range(3): batch = torch.rand(32, DIM, device="cuda") loss = model(batch).sum() loss.backward() diff --git a/test/distributed/checkpoint/test_checkpoint.py b/test/distributed/checkpoint/test_checkpoint.py index 050f7df25da..7c6923800fd 100644 --- a/test/distributed/checkpoint/test_checkpoint.py +++ b/test/distributed/checkpoint/test_checkpoint.py @@ -85,11 +85,9 @@ class TestDistributedCheckpointing(ShardedTensorTestBase): ) st = sharded_tensor.zeros(spec, 4, 4, dtype=torch.float64) - mapping = {} - md = _create_default_local_metadata({"st": st}) - st_md = md.state_dict_metadata["st"] + self.assertEqual(1, len(st_md.chunks)) @with_comms(init_rpc=False) diff --git a/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py b/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py index 5f98aa82191..8e49edf1472 100644 --- a/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py +++ b/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py @@ -86,7 +86,6 @@ class TestFsdpTpCheckpointConversion(DTensorTestBase): tp_model.load_state_dict(tp_state_dict) # Check parameters are equal after loading. - tp_state_dict_after_load = tp_model.state_dict() for fsdp_item, tp_item in zip(fsdp_state_dict.items(), tp_state_dict.items()): fsdp_k, fsdp_v = fsdp_item tp_k, tp_v = tp_item diff --git a/test/distributed/checkpoint/test_hsdp_checkpoint.py b/test/distributed/checkpoint/test_hsdp_checkpoint.py index 23ca7c9463b..dc9c8518962 100644 --- a/test/distributed/checkpoint/test_hsdp_checkpoint.py +++ b/test/distributed/checkpoint/test_hsdp_checkpoint.py @@ -120,7 +120,6 @@ class TestHSDPCheckpoint(DTensorTestBase): ) model.load_state_dict(state_dict_to_save["model"]) - state_dict_after_load = model.state_dict() # After loading, the current model state dict should be the same as state_dict_to_save. for (k1, v1), (k2, v2) in zip( state_dict_to_save["model"].items(), model.state_dict().items() diff --git a/test/distributed/checkpoint/test_nested_dict.py b/test/distributed/checkpoint/test_nested_dict.py index 4b873210f42..bf9a61fe114 100644 --- a/test/distributed/checkpoint/test_nested_dict.py +++ b/test/distributed/checkpoint/test_nested_dict.py @@ -43,7 +43,7 @@ class TestFlattening(TestCase): "k3": ["x", 99, [{"k3": "y"}]], } - flatten_dict, mapping = flatten_state_dict(state_dict) + _, mapping = flatten_state_dict(state_dict) """ flatten_dict: {'k0': [1], 'k2.0': tensor([1]), 'k2.1': 99, 'k2.2.0.k3': tensor(1), 'k3': ['x', 99, [{'k3': 'y'}]]} diff --git a/test/distributed/checkpoint/test_save_load_api.py b/test/distributed/checkpoint/test_save_load_api.py index 5f215d35da8..862f59f00da 100644 --- a/test/distributed/checkpoint/test_save_load_api.py +++ b/test/distributed/checkpoint/test_save_load_api.py @@ -40,21 +40,19 @@ class TestSaveAndLoadAPI(DTensorTestBase): device_mesh = init_device_mesh(self.device_type, (self.world_size,)) model = FSDP(model, device_mesh=device_mesh) dcp.save(model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "first")) - sd = dcp.load( - model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "first") - ) + dcp.load(model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "first")) with patch.object( dcp.FileSystemReader, "validate_checkpoint_id", return_value=False - ) as m1: + ): with patch.object( dcp.FileSystemWriter, "validate_checkpoint_id", return_value=False - ) as m2: + ): dcp.save( model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "second"), ) - sd = dcp.load( + dcp.load( model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "second"), ) @@ -62,7 +60,7 @@ class TestSaveAndLoadAPI(DTensorTestBase): with self.assertRaisesRegex(RuntimeError, "Cannot detect"): dcp.save(model.state_dict(), checkpoint_id="abc://abc.abc") with self.assertRaisesRegex(RuntimeError, "Cannot detect"): - sd = dcp.load(model.state_dict(), checkpoint_id="abc://abc.abc") + dcp.load(model.state_dict(), checkpoint_id="abc://abc.abc") if __name__ == "__main__": diff --git a/test/distributed/checkpoint/test_state_dict.py b/test/distributed/checkpoint/test_state_dict.py index b6f4b9bed3c..581968335fd 100644 --- a/test/distributed/checkpoint/test_state_dict.py +++ b/test/distributed/checkpoint/test_state_dict.py @@ -81,7 +81,7 @@ class TestStateDict(DTensorTestBase, VerifyStateDictMixin): # Train 10 steps. _dist_optim = [dist_optim] if not isinstance(dist_optim, list) else dist_optim - for i in range(10): + for _ in range(10): optim.zero_grad() for d_optim in _dist_optim: d_optim.zero_grad() diff --git a/test/distributed/checkpoint/test_state_dict_utils.py b/test/distributed/checkpoint/test_state_dict_utils.py index 1bab6be151e..04ddd764418 100644 --- a/test/distributed/checkpoint/test_state_dict_utils.py +++ b/test/distributed/checkpoint/test_state_dict_utils.py @@ -104,7 +104,7 @@ class TestStateDictUtils(DTensorTestBase): return tensor, dist_tensor ltensor, ldtensor = [], [] - for i in range(10): + for _ in range(10): tensor, dtensor = create_dtensor() ltensor.append(tensor) ltensor.append(torch.ones(10, device=torch.device("cuda"))) diff --git a/test/distributed/elastic/multiprocessing/api_test.py b/test/distributed/elastic/multiprocessing/api_test.py index 98ff8f1a309..b41b45d7e41 100644 --- a/test/distributed/elastic/multiprocessing/api_test.py +++ b/test/distributed/elastic/multiprocessing/api_test.py @@ -259,7 +259,7 @@ class _StartProcessesTest(TestCase): ) -> None: mp_queue = mp.get_context("spawn").Queue() child_nproc = 2 - ctx = mp.spawn( + mp.spawn( start_processes_zombie_test, nprocs=1, args=(entrypoint, mp_queue, self.log_dir(), child_nproc), diff --git a/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py b/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py index 89329c380f3..2685d0e2da0 100644 --- a/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py +++ b/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py @@ -165,7 +165,7 @@ class CreateBackendTest(TestCase): def test_create_backend_returns_backend_if_is_host_is_not_specified_and_store_already_exists( self, ) -> None: - store = TCPStore( # type: ignore[call-arg] # noqa: F841 + TCPStore( # type: ignore[call-arg] # noqa: F841 self._expected_endpoint_host, self._expected_endpoint_port, is_master=True ) diff --git a/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py b/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py index a65a042a244..8eb54041c23 100644 --- a/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py +++ b/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py @@ -99,7 +99,7 @@ class RendezvousTimeoutTest(TestCase): ValueError, rf"^The join timeout \({join_timeout}\) must be positive.$", ): - timeout = RendezvousTimeout(join_timeout) + RendezvousTimeout(join_timeout) class NodeDescTest(TestCase): @@ -1637,7 +1637,7 @@ class CreateHandlerTest(TestCase): def _ignore_exception(exception_type: Exception, fn: Callable): try: fn() - except exception_type as e: + except exception_type: pass diff --git a/test/distributed/elastic/rendezvous/rendezvous_backend_test.py b/test/distributed/elastic/rendezvous/rendezvous_backend_test.py index fef7545fcd2..107a3bbfbad 100644 --- a/test/distributed/elastic/rendezvous/rendezvous_backend_test.py +++ b/test/distributed/elastic/rendezvous/rendezvous_backend_test.py @@ -70,7 +70,7 @@ class RendezvousBackendTestMixin(ABC): self.assertTrue(has_set) def test_set_state_sets_backend_state_if_token_is_current(self) -> None: - state1, token1, has_set1 = self._set_state(b"x") + _, token1, has_set1 = self._set_state(b"x") state2, token2, has_set2 = self._set_state(b"y", token1) @@ -80,7 +80,7 @@ class RendezvousBackendTestMixin(ABC): self.assertTrue(has_set2) def test_set_state_returns_current_backend_state_if_token_is_old(self) -> None: - state1, token1, _ = self._set_state(b"x") + _, token1, _ = self._set_state(b"x") state2, token2, _ = self._set_state(b"y", token1) diff --git a/test/distributed/elastic/timer/file_based_local_timer_test.py b/test/distributed/elastic/timer/file_based_local_timer_test.py index c06f3520bac..39d215f9319 100644 --- a/test/distributed/elastic/timer/file_based_local_timer_test.py +++ b/test/distributed/elastic/timer/file_based_local_timer_test.py @@ -113,7 +113,7 @@ if not (IS_WINDOWS or IS_MACOS): num_clients = 10 num_requests_per_client = 10 processes = [] - for i in range(num_clients): + for _ in range(num_clients): p = mp.Process( target=func, args=(num_requests_per_client, self.file_path) ) @@ -190,7 +190,7 @@ if not (IS_WINDOWS or IS_MACOS): """ client = timer.FileTimerClient(file_path) sem.release() - for i in range(0, n): + for _ in range(0, n): client.acquire("test_scope", 0) time.sleep(interval) diff --git a/test/distributed/fsdp/test_checkpoint_wrapper.py b/test/distributed/fsdp/test_checkpoint_wrapper.py index d1f189fed4d..06aa6900430 100644 --- a/test/distributed/fsdp/test_checkpoint_wrapper.py +++ b/test/distributed/fsdp/test_checkpoint_wrapper.py @@ -159,7 +159,7 @@ class CheckpointWrapperTest(TestCase): if use_reentrant else CheckpointImpl.NO_REENTRANT, ) - for i in range(self.n): + for _ in range(self.n): l = nn.Sequential( nn.Linear(256, 256), nn.Linear(256, 256), nn.Linear(256, 256) ) diff --git a/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py b/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py index 97e7d56b97b..281f11d6d66 100644 --- a/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py +++ b/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py @@ -303,13 +303,13 @@ class TestFSDPWithDeviceMeshAndDTensor(DTensorTestBase): RuntimeError, "DeviceMesh is not compatible with LOCAL_STATE_DICT." ): with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT): - state_dict = model.state_dict() + model.state_dict() with self.assertRaisesRegex( RuntimeError, "DeviceMesh is not compatible with LOCAL_STATE_DICT." ): with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT): - optim_state_dict = FSDP.optim_state_dict(model, optim) + FSDP.optim_state_dict(model, optim) instantiate_parametrized_tests(TestFSDPWithDeviceMeshAndDTensor) diff --git a/test/distributed/fsdp/test_fsdp_fine_tune.py b/test/distributed/fsdp/test_fsdp_fine_tune.py index 446d1be04f0..754c33bfdfd 100644 --- a/test/distributed/fsdp/test_fsdp_fine_tune.py +++ b/test/distributed/fsdp/test_fsdp_fine_tune.py @@ -364,9 +364,8 @@ class TestFSDPFineTune(FSDPTest): ) torch.manual_seed(self.rank + 1) losses = [] - for idx in range(6): + for _ in range(6): frozen_input = torch.randn((4, 4), device="cuda", requires_grad=False) - learnable_input = torch.randn((4, 4), device="cuda", requires_grad=True) for _model, _optim in ((model, model_optim), (ref_model, ref_model_optim)): loss = _model(frozen_input, frozen_input).sum() losses.append(loss) diff --git a/test/distributed/fsdp/test_fsdp_freezing_weights.py b/test/distributed/fsdp/test_fsdp_freezing_weights.py index 7d662cfcba3..0ffe6054bd3 100644 --- a/test/distributed/fsdp/test_fsdp_freezing_weights.py +++ b/test/distributed/fsdp/test_fsdp_freezing_weights.py @@ -182,7 +182,7 @@ class TestFreezingWeights(FSDPTest): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9) - for iteration in range(3): + for _ in range(3): out = model(batch) fake_loss = criterion(out, target) optimizer.zero_grad() diff --git a/test/distributed/fsdp/test_fsdp_memory.py b/test/distributed/fsdp/test_fsdp_memory.py index ea200e9ae10..08ab6049ced 100644 --- a/test/distributed/fsdp/test_fsdp_memory.py +++ b/test/distributed/fsdp/test_fsdp_memory.py @@ -108,8 +108,6 @@ class TestFSDPMemory(FSDPTest): def _dist_train(self, with_checkpoint, expected, model_hidden_dim, iterations): gpu_id = self.rank - world_size = self.world_size - batch = torch.randn(size=(2, 3, 224, 224)).cuda() model = create_model( diff --git a/test/distributed/fsdp/test_fsdp_misc.py b/test/distributed/fsdp/test_fsdp_misc.py index 2bd0d719a31..39b9b2d317d 100644 --- a/test/distributed/fsdp/test_fsdp_misc.py +++ b/test/distributed/fsdp/test_fsdp_misc.py @@ -278,9 +278,9 @@ class TestFSDPMiscMultiProcess(FSDPTest): ) x = torch.randn(10, 10, device="cuda") y = torch.randn(10, 10, device="cuda") - for i in range(4): + for _ in range(4): if use_second_layer: - a, b = fsdp(x, y) + a, _ = fsdp(x, y) else: a = fsdp(x, y) loss = a.sum() @@ -509,7 +509,7 @@ class TestFSDPMiscMultiProcess(FSDPTest): def test_fsdp_cpu_training(self): """Tests FSDP training on CPU.""" gloo_pg = dist.new_group(backend="gloo") - for ss in [ + for ss in [ # noqa: F841 ShardingStrategy.NO_SHARD, ShardingStrategy.FULL_SHARD, ShardingStrategy.SHARD_GRAD_OP, @@ -857,13 +857,13 @@ class TestFSDPMiscMultiThread(FSDPTestMultiThread): torch.cuda.set_device(self.rank) # Test CPU no_params = nn.ReLU() - module = FSDP(no_params) + FSDP(no_params) # Test CUDA no_params = nn.ReLU().cuda() - module = FSDP(no_params) + FSDP(no_params) # Test CPU + device_id no_params = nn.ReLU() - module = FSDP(no_params, device_id=torch.cuda.current_device()) + FSDP(no_params, device_id=torch.cuda.current_device()) # For modules with no params, wrong device_id will raise error about # inconsistency between compute_device and device_id, since compute_device # is computed as torch.cuda.current_device when there are no params. diff --git a/test/distributed/fsdp/test_fsdp_mixed_precision.py b/test/distributed/fsdp/test_fsdp_mixed_precision.py index 30b628b6a3c..0e4b0d0497c 100644 --- a/test/distributed/fsdp/test_fsdp_mixed_precision.py +++ b/test/distributed/fsdp/test_fsdp_mixed_precision.py @@ -1139,7 +1139,6 @@ class TestFSDPDifferentSubmodulePrecision(FSDPTest): model = SaveForwardInputsModel( forward_inputs=forward_inputs, cast_forward_inputs=False ).cuda() - c1, c2 = model.c1, model.c2 x = torch.zeros(2, 100, device="cuda") # float16 on one submodule and float32 on everything else diff --git a/test/distributed/fsdp/test_fsdp_multiple_wrapping.py b/test/distributed/fsdp/test_fsdp_multiple_wrapping.py index caa4c6cb98a..42ef25b9d6f 100644 --- a/test/distributed/fsdp/test_fsdp_multiple_wrapping.py +++ b/test/distributed/fsdp/test_fsdp_multiple_wrapping.py @@ -45,7 +45,7 @@ class TestMultipleWrapping(FSDPTest): model = FSDP(inner_model).cuda() optim = SGD(model.parameters(), lr=0.1) - for i in range(3): + for _ in range(3): input = torch.rand((1, 5), dtype=torch.float).cuda() input.requires_grad = True output = model(input) diff --git a/test/distributed/fsdp/test_fsdp_optim_state.py b/test/distributed/fsdp/test_fsdp_optim_state.py index 6926a486c8c..df8f5419c63 100644 --- a/test/distributed/fsdp/test_fsdp_optim_state.py +++ b/test/distributed/fsdp/test_fsdp_optim_state.py @@ -1510,7 +1510,7 @@ class TestFSDPOptimState(FSDPTest): ) = self._init_nested_model(wrap=False, use_multiple_param_groups=False) if should_check_method_fn("rekey_optim_state_dict"): with context_fn(): - rekeyed_osd = FSDP.rekey_optim_state_dict( + FSDP.rekey_optim_state_dict( fsdp_osd, # from `full_optim_state_dict()` OptimStateKeyType.PARAM_ID, nonwrapped_model, @@ -1650,7 +1650,7 @@ class TestFSDPOptimState(FSDPTest): ) # Make optim1 has a different state. - for i in range(5): + for _ in range(5): batch = torch.rand(5, 8).cuda() loss = models[1](batch).sum() loss.backward() @@ -1765,7 +1765,7 @@ class TestFSDPOptimState(FSDPTest): initializer = self._model_class[model_class] # First, run a wrapped model with full world size for a few iterations - model1, optim1, optim_input1 = initializer( + model1, optim1, _ = initializer( wrap=True, use_multiple_param_groups=use_multiple_param_groups, ) @@ -1788,7 +1788,7 @@ class TestFSDPOptimState(FSDPTest): new_group = dist.distributed_c10d._get_default_group() # Second, run a wrapped model with (possibly) halved world size and # (possibly) differing `optim_input` across ranks - model2, optim2, optim_input2 = initializer( + model2, optim2, _ = initializer( wrap=True, group=new_group, use_multiple_param_groups=use_multiple_param_groups, @@ -1861,7 +1861,8 @@ class TestFSDPOptimState(FSDPTest): FSDP.optim_state_dict(model, optim), osd, check_same_param_keys=True ) step() - osd_to_load = FSDP.optim_state_dict_to_load( + + osd_to_load = FSDP.optim_state_dict_to_load( # noqa: F841 model, optim, osd, load_directly=True ) self._check_same_state( @@ -1994,7 +1995,7 @@ class TestFSDPOptimState(FSDPTest): loss.backward() fsdp_optim.step() orig_state_dict = deepcopy(fsdp_optim.state_dict()) - optim_state_dict = FSDP.optim_state_dict(fsdp_model, fsdp_optim) + FSDP.optim_state_dict(fsdp_model, fsdp_optim) FSDP.optim_state_dict_to_load( fsdp_model, fsdp_optim, diff --git a/test/distributed/fsdp/test_fsdp_state_dict.py b/test/distributed/fsdp/test_fsdp_state_dict.py index a246375caba..0a8a4f57684 100644 --- a/test/distributed/fsdp/test_fsdp_state_dict.py +++ b/test/distributed/fsdp/test_fsdp_state_dict.py @@ -966,7 +966,7 @@ class TestFSDPStateDict(FSDPTest): setattr(module, LINEAR_SKIP, linear_skip) return fsdp, linear_skip_tensor_names - fsdp, linear_skip_tensor_names = _create_module() + fsdp, _ = _create_module() # Run a forward pass inp = torch.randn((1, 10), device=torch.cuda.current_device()) loss = fsdp(inp) diff --git a/test/distributed/fsdp/test_fsdp_unshard_params.py b/test/distributed/fsdp/test_fsdp_unshard_params.py index fe8a00892e2..e2eea11ac2e 100644 --- a/test/distributed/fsdp/test_fsdp_unshard_params.py +++ b/test/distributed/fsdp/test_fsdp_unshard_params.py @@ -634,7 +634,7 @@ class TestUnshardParams(TestUnshardParamsBase): model = FSDP(model, auto_wrap_policy=ModuleWrapPolicy((nn.Sequential,))) with FSDP.summon_full_params(model[0]): # Check that the summoned module does not have its flat parameter - for param_name, param in model[0].named_parameters(): + for param_name, _ in model[0].named_parameters(): self.assertFalse(FLAT_PARAM in param_name) self.assertGreater(len(list(model[0].parameters())), 1) diff --git a/test/distributed/fsdp/test_fsdp_use_orig_params.py b/test/distributed/fsdp/test_fsdp_use_orig_params.py index e477c043c4d..996f1840454 100644 --- a/test/distributed/fsdp/test_fsdp_use_orig_params.py +++ b/test/distributed/fsdp/test_fsdp_use_orig_params.py @@ -260,7 +260,7 @@ class TestFSDPUseOrigParamsMultipleParamGroups(FSDPTest): model = FSDP(copy.deepcopy(base_model), self.process_group, **fsdp_kwargs) model = torch.compile(model) optim = torch.optim.Adam(model.parameters(), lr=1e-2) - for i in range(10): + for _ in range(10): losses = [] inp = ref_model.get_input(torch.device("cuda")) for _model, _optim in ((ref_model, ref_optim), (model, optim)): diff --git a/test/distributed/fsdp/test_utils.py b/test/distributed/fsdp/test_utils.py index adc338dcf9a..739100f1d3e 100644 --- a/test/distributed/fsdp/test_utils.py +++ b/test/distributed/fsdp/test_utils.py @@ -118,7 +118,7 @@ class TestUtils(TestCase): x.fill_(0) x = nn.utils.rnn.pack_padded_sequence(x, seq_length) - x, h = rnn(x) + x, _ = rnn(x) x = _apply_to_tensors(fill_fn, x) x, _ = nn.utils.rnn.pad_packed_sequence(x) self.assertEqual(torch.sum(x), 0) diff --git a/test/distributed/launcher/launch_test.py b/test/distributed/launcher/launch_test.py index b8312de37fa..1ef7fa7e284 100644 --- a/test/distributed/launcher/launch_test.py +++ b/test/distributed/launcher/launch_test.py @@ -41,7 +41,6 @@ class LaunchTest(unittest.TestCase): def test_launch_without_env(self): nnodes = 1 nproc_per_node = 4 - world_size = nnodes * nproc_per_node sock = get_socket_with_port() with closing(sock): master_port = sock.getsockname()[1] diff --git a/test/distributed/pipelining/model_registry.py b/test/distributed/pipelining/model_registry.py index da081451372..05d4e54176f 100644 --- a/test/distributed/pipelining/model_registry.py +++ b/test/distributed/pipelining/model_registry.py @@ -114,7 +114,7 @@ class CustomLinearDx(Function): @staticmethod def backward(ctx, grad_output): - input_val, weight, bias = ctx.saved_tensors + input_val, weight, _ = ctx.saved_tensors grad_input = grad_output.mm(weight) ctx.module.cached_context[ctx.layer_idx].append(grad_output.clone()) ctx.module.cached_context[str(ctx.layer_idx) + "_input"].append( @@ -131,7 +131,7 @@ class CustomLinearDxDw(Function): @staticmethod def backward(ctx, grad_output): - input_val, weight, bias = ctx.saved_tensors + input_val, weight, _ = ctx.saved_tensors grad_input = grad_output.mm(weight) grad_weight = grad_output.t().mm(input_val) grad_bias = grad_output.sum(0) diff --git a/test/distributed/pipelining/test_backward.py b/test/distributed/pipelining/test_backward.py index a19092d8a21..218da6b07ed 100644 --- a/test/distributed/pipelining/test_backward.py +++ b/test/distributed/pipelining/test_backward.py @@ -74,7 +74,7 @@ class StageBackwardTests(TestCase): # Forward, then backward of loss with respect to inputs out = mod(x) loss = loss_fn(out, target) - dinputs, param_groups = stage_backward_input( + dinputs, _param_groups = stage_backward_input( stage_outputs_or_loss=(loss,), output_grads=None, input_values=[x], @@ -88,7 +88,7 @@ class StageBackwardTests(TestCase): torch.testing.assert_close(x.grad, ref_x.grad) torch.testing.assert_close(dinputs[0], ref_x.grad) - for name, p in mod.named_parameters(): + for _, p in mod.named_parameters(): # Check that the weight gradients were not updated self.assertEqual(p.grad, None) @@ -109,7 +109,7 @@ class StageBackwardTests(TestCase): # Forward, then backward of loss with respect to inputs out = mod(x) loss = loss_fn(out, target) - dinputs, param_groups = stage_backward_input( + _dinputs, param_groups = stage_backward_input( stage_outputs_or_loss=(loss,), output_grads=None, input_values=[x], @@ -157,7 +157,7 @@ class StageBackwardTests(TestCase): for x in inputs: out = mod(x) loss = loss_fn(out, target) - dinputs, param_groups = stage_backward_input( + _dinputs, param_groups = stage_backward_input( stage_outputs_or_loss=(loss,), output_grads=None, input_values=[x], diff --git a/test/distributed/pipelining/test_schedule.py b/test/distributed/pipelining/test_schedule.py index d1025f786c6..639a0fc2d0b 100644 --- a/test/distributed/pipelining/test_schedule.py +++ b/test/distributed/pipelining/test_schedule.py @@ -264,7 +264,7 @@ class TestSchedulePlan(TestCase): ] schedule = ScheduleClass(stages, num_microbatches) - formatted_pipeline_order = _format_pipeline_order( + _formatted_pipeline_order = _format_pipeline_order( schedule.pipeline_order ) @@ -305,10 +305,7 @@ class TestSchedulePlan(TestCase): for i in range(num_local_stages) ] schedule = ScheduleClass(stages, num_microbatches) - formatted_pipeline_order = _format_pipeline_order( - schedule.pipeline_order - ) - # print(formatted_pipeline_order) + _format_pipeline_order(schedule.pipeline_order) def stage_to_rank(stage): return stage % group_size diff --git a/test/distributed/pipelining/test_schedule_multiproc.py b/test/distributed/pipelining/test_schedule_multiproc.py index f41c06b6b31..b4e5ef3eaa3 100644 --- a/test/distributed/pipelining/test_schedule_multiproc.py +++ b/test/distributed/pipelining/test_schedule_multiproc.py @@ -151,7 +151,7 @@ class ScheduleTest(MultiProcContinousTest): schedule.step(x) elif self.rank == self.world_size - 1: losses = [] - out = schedule.step(target=target, losses=losses) + schedule.step(target=target, losses=losses) else: schedule.step() @@ -412,7 +412,6 @@ class ScheduleTest(MultiProcContinousTest): if hasattr(ScheduleClass, "num_microbatches") else 8 ) - input_args = x.chunk(num_microbatches)[0] stages = [ PipelineStage( stage_module, @@ -548,7 +547,6 @@ class ScheduleTest(MultiProcContinousTest): loss_fn = torch.nn.MSELoss(reduction="sum") # Create a pipeline stage to wrap that submodule - input_args = x.chunk(num_microbatches)[0] stage_indices = rank_stages[self.rank] print(f"Rank {self.rank} stages: {stage_indices}") submod_names = [f"layers.{i}" for i in stage_indices] @@ -582,7 +580,7 @@ class ScheduleTest(MultiProcContinousTest): schedule.step(x) elif self.rank == self.world_size - 1: losses = [] - out = schedule.step(target=target, losses=losses) + schedule.step(target=target, losses=losses) else: schedule.step() self.assertEqual( @@ -887,7 +885,6 @@ class ScheduleTest(MultiProcContinousTest): # Create a pipeline stage to wrap that submodule chunks = 2 - input_args = x.chunk(chunks)[0] stages = [ PipelineStage( stage_module, diff --git a/test/distributed/pipelining/test_stage.py b/test/distributed/pipelining/test_stage.py index b02e7e25aff..450e719377f 100644 --- a/test/distributed/pipelining/test_stage.py +++ b/test/distributed/pipelining/test_stage.py @@ -310,9 +310,6 @@ class StageTest(MultiProcContinousTest): full_mod.to(self.device) stage_mod = full_mod.get_submodule(f"layers.{self.rank}") - x = torch.randn(batch_size, d_hid, device=self.device) - target = torch.randn(batch_size, d_hid, device=self.device) - stage_with_dw_builder = PipelineStage( stage_mod, self.rank, diff --git a/test/distributed/pipelining/test_unflatten.py b/test/distributed/pipelining/test_unflatten.py index 9e63c3b8084..ba0b3c62a2f 100644 --- a/test/distributed/pipelining/test_unflatten.py +++ b/test/distributed/pipelining/test_unflatten.py @@ -58,7 +58,7 @@ class UnflattenTests(TestCase): # Check qualnames for stage_idx in range(pipe.num_stages): stage_mod = pipe.get_stage_module(stage_idx) - for param_name, param in stage_mod.named_parameters(): + for param_name, _ in stage_mod.named_parameters(): assert ( param_name in orig_state_dict ), f"{param_name} not in original state dict" diff --git a/test/distributed/tensor/parallel/test_micro_pipeline_tp.py b/test/distributed/tensor/parallel/test_micro_pipeline_tp.py index 5502116284a..abde0ca518d 100644 --- a/test/distributed/tensor/parallel/test_micro_pipeline_tp.py +++ b/test/distributed/tensor/parallel/test_micro_pipeline_tp.py @@ -87,7 +87,9 @@ class MicroPipelineTPTest(TestCase): a = all_gather_tensor(inp, gather_dim=0, group=group.group_name) b = all_gather_tensor(inp, gather_dim=1, group=group.group_name) c = _fp8_all_gather(inp, gather_dim=0, group_name=group.group_name) - d = _fp8_all_gather(inp, gather_dim=1, group_name=group.group_name) + d = _fp8_all_gather( # noqa: F841 + inp, gather_dim=1, group_name=group.group_name + ) return a, b, c inp = torch.rand(64, 32, device="cuda") diff --git a/test/distributed/tensor/parallel/test_tp_examples.py b/test/distributed/tensor/parallel/test_tp_examples.py index 43662c4d6cf..e448953c676 100644 --- a/test/distributed/tensor/parallel/test_tp_examples.py +++ b/test/distributed/tensor/parallel/test_tp_examples.py @@ -311,7 +311,7 @@ class DistTensorParallelExampleTest(DTensorTestBase): torch.manual_seed(0) steps = 10 if type(model) is torch.float64 else 1 - for iter in range(steps): + for _ in range(steps): inp = torch.randint( model_args.vocab_size, inp_size, device=self.device_type ) diff --git a/test/distributed/tensor/parallel/test_tp_style.py b/test/distributed/tensor/parallel/test_tp_style.py index 28ff10bab09..6c965edf73d 100644 --- a/test/distributed/tensor/parallel/test_tp_style.py +++ b/test/distributed/tensor/parallel/test_tp_style.py @@ -223,7 +223,7 @@ class TensorParallelStyleTest(DTensorTestBase): AssertionError, "input_layouts and desired_input_layouts should have same length!", ): - prepare_inps_dimension_mismatch = PrepareModuleInput( + PrepareModuleInput( input_layouts=Shard(0), desired_input_layouts=(Replicate(), None) ) # Raise assertion error if module inputs and input_layouts do not have same length. diff --git a/test/distributed/test_c10d_common.py b/test/distributed/test_c10d_common.py index 87937b74d33..4fd0e43d706 100644 --- a/test/distributed/test_c10d_common.py +++ b/test/distributed/test_c10d_common.py @@ -182,7 +182,7 @@ class TimeoutTest(TestCase): threads.append(t) t.start() - for i, thread in enumerate(threads): + for _, thread in enumerate(threads): thread.join() # we expect the world_size-1 threads to have failed @@ -583,14 +583,14 @@ class CommonDistributedDataParallelTest: ) ) with err_ctx: - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.CheckpointOnceModule(use_reentrant=use_reentrant), process_group=process_group, use_bucket_view=use_bucket_view, find_unused_parameters=True, ) # test passes when static_graph is true - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.CheckpointOnceModule(use_reentrant=use_reentrant), process_group=process_group, use_bucket_view=use_bucket_view, @@ -615,7 +615,7 @@ class CommonDistributedDataParallelTest: ) ) with err_ctx: - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.CheckpointTwiceModule(use_reentrant=use_reentrant), process_group=process_group, use_bucket_view=use_bucket_view, @@ -623,7 +623,7 @@ class CommonDistributedDataParallelTest: ) with err_ctx: - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.CheckpointTwiceModule(use_reentrant=use_reentrant), process_group=process_group, use_bucket_view=use_bucket_view, @@ -641,7 +641,7 @@ class CommonDistributedDataParallelTest: process_group = self._get_process_group() for use_bucket_view in (True, False): # Test passes when static_graph=True. - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.CheckpointTwiceModule(use_reentrant=use_reentrant), process_group=process_group, use_bucket_view=use_bucket_view, @@ -656,7 +656,7 @@ class CommonDistributedDataParallelTest: """ process_group = self._get_process_group() for use_bucket_view in (True, False): - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.DynamicCheckpointTwiceModule(use_reentrant=False), process_group=process_group, use_bucket_view=use_bucket_view, @@ -675,7 +675,7 @@ class CommonDistributedDataParallelTest: """ process_group = self._get_process_group() for use_bucket_view in (True, False): - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.DynamicCheckpointTwiceModuleWeightSharing(use_reentrant=False), process_group=process_group, use_bucket_view=use_bucket_view, @@ -719,7 +719,7 @@ class CommonDistributedDataParallelTest: process_group = self._get_process_group() torch.cuda.set_device(self.rank) for use_bucket_view in (True, False): - model = self._test_ddp_checkpointing( + self._test_ddp_checkpointing( self.CheckpointTwiceModuleWeightSharing(), process_group=process_group, use_bucket_view=use_bucket_view, @@ -737,7 +737,7 @@ class CommonDistributedDataParallelTest: "Expect `start_powerSGD_iter` > 1 if `use_error_feedback` or `warm_start` is enabled, " "because PowerSGD can only be applied after the first two iterations in DDP.", ): - state = powerSGD.PowerSGDState( + powerSGD.PowerSGDState( process_group=None, matrix_approximation_rank=1, start_powerSGD_iter=start_powerSGD_iter, diff --git a/test/distributed/test_c10d_functional_native.py b/test/distributed/test_c10d_functional_native.py index b1c99145311..4db81a0b21d 100644 --- a/test/distributed/test_c10d_functional_native.py +++ b/test/distributed/test_c10d_functional_native.py @@ -429,7 +429,7 @@ class TestWithNCCL(MultiProcessTestCase): input = torch.full((10, 10), float(self.rank), device=self.device) self.assertEqual(torch._C._distributed_c10d._get_work_registry_size(), 0) - output = torch.ops._c10d_functional.all_reduce( + torch.ops._c10d_functional.all_reduce( input, "avg", "default", @@ -550,7 +550,7 @@ class CompileTest(TestCase): assert "= torch.ops._c10d_functional.wait_tensor.default" not in code # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (arg,)) + AOTIRunnerUtil.run("cuda", func, (arg,)) torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -596,7 +596,7 @@ class CompileTest(TestCase): assert "= torch.ops._c10d_functional.wait_tensor.default" not in code # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (args,)) + out = AOTIRunnerUtil.run("cuda", func, (args,)) # noqa: F841 torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -708,7 +708,7 @@ class CompileTest(TestCase): assert "= torch.ops._c10d_functional.wait_tensor.default" not in code # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (arg,)) + AOTIRunnerUtil.run("cuda", func, (arg,)) torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -742,7 +742,7 @@ class CompileTest(TestCase): ) # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (args,)) + out = AOTIRunnerUtil.run("cuda", func, (args,)) # noqa: F841 torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "This is a GPU test!") @@ -764,7 +764,7 @@ class CompileTest(TestCase): ) # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (arg,)) + AOTIRunnerUtil.run("cuda", func, (arg,)) torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -790,7 +790,7 @@ class CompileTest(TestCase): ) # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (arg,)) + AOTIRunnerUtil.run("cuda", func, (arg,)) torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -910,7 +910,7 @@ class CompileTest(TestCase): ) # Test aoti - out = AOTIRunnerUtil.run("cuda", func, (arg,)) + AOTIRunnerUtil.run("cuda", func, (arg,)) torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py index ee1e04a2be7..968238be5cf 100644 --- a/test/distributed/test_c10d_gloo.py +++ b/test/distributed/test_c10d_gloo.py @@ -1920,7 +1920,7 @@ class DistributedDataParallelTest( ddp_state_dict = torch.load(checkpoint_path, map_location=map_location) for model in [ddp_withload, model_withload]: - for p in ddp_withload.parameters(): + for p in model.parameters(): with torch.no_grad(): p.zero_() ddp_withload.load_state_dict(ddp_state_dict) @@ -1973,7 +1973,8 @@ class DistributedDataParallelTest( This unit test verifies whether the Future object is passed properly. The callback function creates a Future object and sets a value to it. """ - store = c10d.FileStore(self.file_name, self.world_size) + store = c10d.FileStore(self.file_name, self.world_size) # noqa: F841 + process_group = self._get_process_group() # Test on CPU diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index 35fb3217ce2..930f3811c4d 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -366,7 +366,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase): thread.start() # We would get stuck here due to d2h if we didn't abort. - t_cpu = t.cpu() + t.cpu() thread.join() @@ -741,7 +741,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase): # First allreduce to initialize default PG's communicator. pg.allreduce(t).wait() # PG1 is an PG without comms initialized, since we don't call collective on it - new_pg1 = c10d.new_group([0, 1]) + new_pg1 = c10d.new_group([0, 1]) # noqa: F841 new_pg2 = c10d.new_group([0, 1]) t2 = torch.rand(10, 10, device=device) @@ -807,7 +807,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase): # 'timeout' kwarg (or its kwdefault) taking precedence opts = dist.ProcessGroupNCCL.Options() opts._timeout = timedelta(seconds=123) - with warnings.catch_warnings(record=True) as w: + with warnings.catch_warnings(record=True): dist.init_process_group(**base_opts, pg_options=opts) # TODO(whc) i verified that we are indeed emitting this warning, and i can't figure out why i can't catch it. # self.assertEqual(len(w), 1) @@ -1266,30 +1266,26 @@ class DistributedDataParallelTest( "DistributedDataParallel device_ids and output_device arguments only work with " "single-device/multiple-device GPU modules or CPU modules", ): - ddp_model = DistributedDataParallel( + DistributedDataParallel( model, output_device=gpus[1], process_group=process_group ) with self.assertRaisesRegex( ValueError, "device_ids can only be None or contain a single element." ): - ddp_model = DistributedDataParallel( - model, device_ids=gpus, process_group=process_group - ) + DistributedDataParallel(model, device_ids=gpus, process_group=process_group) with self.assertRaisesRegex( ValueError, "input module must be on the same type of devices" ): model.fc1 = model.fc1.cpu() - ddp_model = DistributedDataParallel(model, process_group=process_group) + DistributedDataParallel(model, process_group=process_group) model = model.cpu() with self.assertRaisesRegex( ValueError, "device_ids can only be None or contain a single element." ): - ddp_model = DistributedDataParallel( - model, device_ids=gpus, process_group=process_group - ) + DistributedDataParallel(model, device_ids=gpus, process_group=process_group) def _test_fp16(self, gradient_as_bucket_view=False): process_group = self._get_process_group() @@ -1940,11 +1936,9 @@ class DistributedDataParallelTest( ), named_msg, ) - for j, ((param_name, p), p_ddp) in enumerate( - zip( - m_child.named_parameters(), - m_ddp_child.parameters(), - ) + for (param_name, p), p_ddp in zip( + m_child.named_parameters(), + m_ddp_child.parameters(), ): named_msg = ( layer_name + "." + param_name + " " + iter_msg @@ -2010,15 +2004,13 @@ class DistributedDataParallelTest( m = ConvNet(layer_devs, layer_formats, layer_dtypes) if self.rank == 0: - m_ddp = DistributedDataParallel( - m, device_ids=[dev0], process_group=process_group - ) + DistributedDataParallel(m, device_ids=[dev0], process_group=process_group) else: with self.assertRaisesRegex( RuntimeError, ".* appears not to match strides of the same param in process 0", ): - m_ddp = DistributedDataParallel( + DistributedDataParallel( m, device_ids=[dev0], process_group=process_group ) @@ -2356,7 +2348,7 @@ class DistributedDataParallelTest( process_group=process_group, ) - for i in range(3): + for _ in range(3): m.zero_grad(set_to_none=try_set_to_none) m(1).sum().backward() @@ -2701,7 +2693,7 @@ class WorkHookTest(MultiProcessTestCase): pg._register_on_completion_hook(hook) tensor = torch.ones([2, 3]).cuda(self.rank) * self.rank work_count = 3 - for i in range(work_count): + for _ in range(work_count): work += 1 pg.broadcast([tensor]).wait() @@ -2806,7 +2798,7 @@ class NcclErrorHandlingTest(MultiProcessTestCase): # Run some GPU operations to make sure cuda has not gotten stuck. # It was observed cuda could get stuck if NCCL communicators were # not properly aborted before throwing RuntimeError. - a = torch.rand(10).cuda(self.rank) + torch.rand(10).cuda(self.rank) elif self.rank == 1: # Clean up structures (ex: files for FileStore before going down) del process_group @@ -2947,7 +2939,7 @@ class NcclErrorHandlingTest(MultiProcessTestCase): os.environ["TORCH_NCCL_BLOCKING_WAIT"] = val store = c10d.FileStore(self.file_name, self.world_size) with self.assertRaises(RuntimeError): - process_group = c10d.ProcessGroupNCCL(store, self.rank, self.world_size) + c10d.ProcessGroupNCCL(store, self.rank, self.world_size) @requires_nccl() @skip_if_lt_x_gpu(3) @@ -4223,7 +4215,7 @@ class NCCLTraceTestBase(MultiProcessTestCase): def _join_processes(self, fn): # We need to patch sys.exit() as skip_if will use sys.exit() and # the exit code from the this process will not be catched. - with mock.patch("sys.exit") as exit_mock: + with mock.patch("sys.exit"): fn() super()._join_processes(fn) @@ -4231,7 +4223,7 @@ class NCCLTraceTestBase(MultiProcessTestCase): proc = torch.multiprocessing.get_context("spawn").Process self.children_pipes = [] parent_pipes = [] - for i in range(self.world_size): + for _ in range(self.world_size): parent_conn, child_conn = torch.multiprocessing.Pipe() self.children_pipes.append(child_conn) parent_pipes.append(parent_conn) @@ -4346,7 +4338,7 @@ class NCCLTraceTest(NCCLTraceTestBase): pg._enable_collectives_timing() device = self.local_device a = torch.full((3, 4), float(self.rank), device=device) - for i in range(2): + for _ in range(2): f = pg.allreduce(a) f.wait() torch.cuda.synchronize(device=device) @@ -4372,7 +4364,7 @@ class NCCLTraceTest(NCCLTraceTestBase): pg._enable_collectives_timing() device = self.local_device a = torch.full((3, 4), float(self.rank), device=device) - for i in range(2): + for _ in range(2): f = pg.allreduce(a) f.wait() torch.cuda.synchronize(device=device) @@ -4420,7 +4412,7 @@ class NCCLTraceTest(NCCLTraceTestBase): pg = self._create_process_group_nccl() device = self.local_device a = torch.full((3, 4), float(self.rank), device=device) - for i in range(2): + for _ in range(2): f = pg.allreduce(a) f.wait() torch.cuda.synchronize(device=device) @@ -4436,7 +4428,7 @@ class NCCLTraceTest(NCCLTraceTestBase): pg = self._create_process_group_nccl() device = self.local_device a = torch.full((3, 4), float(self.rank), device=device) - for i in range(2): + for _ in range(2): # test some other primitives to make sure # their strings are valid xs = [torch.ones(3, 4, device=device)] @@ -4496,7 +4488,7 @@ class NCCLTraceTest(NCCLTraceTestBase): pg = self._create_process_group_nccl() device = self.local_device # send more works than the buffer size to overwrite the previous entry - for i in range(12): + for _ in range(12): a = [torch.ones(3, 4, device=device)] pg.broadcast(a).wait() torch.cuda.synchronize(device=device) @@ -4611,7 +4603,7 @@ class NCCLTraceTest(NCCLTraceTestBase): th.start() # fill the cuda buffer, at around 1024 events # this will stall - for i in range(2000): + for _ in range(2000): a = a + a th.join() else: @@ -4646,7 +4638,7 @@ class NCCLTraceTest(NCCLTraceTestBase): num_coalesced_ops = 20 ops_per_coalesce = len(op_sizes_per_coalesce) - for i in range(num_coalesced_ops): + for _ in range(num_coalesced_ops): ops = [] for input_sizes in op_sizes_per_coalesce: tensor = torch.zeros(input_sizes).to(self.local_device) @@ -4745,7 +4737,7 @@ class NCCLTraceTest(NCCLTraceTestBase): pg._enable_collectives_timing() num_repeats = 10 ops_per_repeat = len(op_sizes) - for i in range(num_repeats): + for _ in range(num_repeats): for input_sizes in op_sizes: tensor = torch.zeros(input_sizes).to(self.local_device) if self.rank == 0: @@ -5047,7 +5039,7 @@ class NcclErrorDumpTest(NCCLTraceTestBase): # Block the current stream on the NCCL stream work.wait() # Run some GPU operations - a = torch.rand(10).cuda(self.rank) + torch.rand(10).cuda(self.rank) elif self.rank == 1: # Clean up structures (ex: files for FileStore before going down) del process_group @@ -5108,7 +5100,6 @@ class ProcessGroupNCCLLargerScaleTest(MultiProcessTestCase): tensor = torch.full((1,), self.rank).cuda(device) ng1 = c10d.split_group(pg, [[0, 1], [2, 3, 4, 5, 6, 7]]) - backend1 = ng1._get_backend(torch.device(device)) # comm split happens eagerly since device_id is passed to init_process_group. self.assertEqual(backend.comm_split_count(), 1) diff --git a/test/distributed/test_c10d_ops_nccl.py b/test/distributed/test_c10d_ops_nccl.py index f0249877c63..73bad39956c 100644 --- a/test/distributed/test_c10d_ops_nccl.py +++ b/test/distributed/test_c10d_ops_nccl.py @@ -162,7 +162,6 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest): @requires_nccl() @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_allreduce_ops(self): - device_count = torch.cuda.device_count() pg = self.pg local_device_id = self.rank_to_GPU[self.rank][0] @@ -303,9 +302,8 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest): pg = self.pg rank = self.rank_to_GPU[self.rank][0] with torch.cuda.device(rank): - for i in range(10): + for _ in range(10): xs = [torch.FloatTensor([1]).cuda(rank)] - ys = [torch.FloatTensor([4]).cuda(rank)] for _ in range(30): pg.allreduce(xs[0]).wait() @@ -410,7 +408,7 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest): output_tensors.append([t.cuda(device=gpu) for t in output_per_gpu]) expected_output.append([t.cuda(device=gpu) for t in expected_per_gpu]) - result = allgather(output_tensors, tensors) + allgather(output_tensors, tensors) # Verification self.assertEqual(output_tensors, expected_output) @@ -558,7 +556,7 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest): # init output output_ts = [] - for rank in range(self.world_size): + for _ in range(self.world_size): output_ts.append(torch.tensor([-1]).cuda(device_id)) with self.assertRaisesRegex(ValueError, "invalid root rank"): @@ -914,7 +912,6 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest): @requires_nccl() @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_send_recv(self): - pg = self.pg device = self.rank_to_GPU[self.rank][0] # Generate the same random tensor @@ -930,7 +927,6 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest): @requires_nccl() @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_send_recv_complex(self): - pg = self.pg device = self.rank_to_GPU[self.rank][0] # Generate the same random tensor diff --git a/test/distributed/test_c10d_ucc.py b/test/distributed/test_c10d_ucc.py index b7f778656d6..de61a4a8739 100644 --- a/test/distributed/test_c10d_ucc.py +++ b/test/distributed/test_c10d_ucc.py @@ -755,7 +755,7 @@ class DistributedDataParallelTest( ddp_state_dict = torch.load(checkpoint_path, map_location=map_location) for model in [ddp_withload, model_withload]: - for p in ddp_withload.parameters(): + for p in model.parameters(): with torch.no_grad(): p.zero_() ddp_withload.load_state_dict(ddp_state_dict) diff --git a/test/distributed/test_collective_utils.py b/test/distributed/test_collective_utils.py index 727850680aa..ee93d56efb8 100644 --- a/test/distributed/test_collective_utils.py +++ b/test/distributed/test_collective_utils.py @@ -57,7 +57,7 @@ class TestCollectiveUtils(MultiProcessTestCase): Ensure broadcast has no dependency on torch.distributed when run in single process. """ func = mock.MagicMock() - res = broadcast(data_or_fn=func, rank=0) + broadcast(data_or_fn=func, rank=0) func.assert_called_once() def test_broadcast_result_raises_exceptions_from_func( @@ -98,7 +98,7 @@ class TestCollectiveUtils(MultiProcessTestCase): Ensure all_gather has no dependency on torch.distributed when run in single process. """ func = mock.MagicMock() - res = all_gather(data_or_fn=func) + all_gather(data_or_fn=func) func.assert_called_once() def test_all_gather_result_raises_exceptions_from_func( diff --git a/test/distributed/test_data_parallel.py b/test/distributed/test_data_parallel.py index 9ef576ec1df..26f64df90d9 100644 --- a/test/distributed/test_data_parallel.py +++ b/test/distributed/test_data_parallel.py @@ -791,8 +791,8 @@ class TestDataParallel(TestCase): ), named_msg, ) - for j, ((param_name, p), p_dp) in enumerate( - zip(m_child.named_parameters(), m_dp_child.parameters()) + for (param_name, p), p_dp in zip( + m_child.named_parameters(), m_dp_child.parameters() ): named_msg = ( layer_name + "." + param_name + " " + iter_msg diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py index 54665934e52..b39ffd375f2 100644 --- a/test/distributed/test_device_mesh.py +++ b/test/distributed/test_device_mesh.py @@ -88,7 +88,7 @@ class DeviceMeshTest(DTensorTestBase): def test_assert_invalid_mesh_tensor(self): mesh = torch.arange(self.world_size).to(self.rank) with self.assertRaises(ValueError): - device_mesh = DeviceMesh(self.device_type, mesh) + DeviceMesh(self.device_type, mesh) @with_comms() def test_2d_mesh_non_eager_init_subgroup(self): @@ -144,7 +144,7 @@ class DeviceMeshTest(DTensorTestBase): RuntimeError, "Optional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.", ): - local_rank = mesh_2d.get_local_rank() + mesh_2d.get_local_rank() @with_comms def test_get_local_rank(self): @@ -258,7 +258,7 @@ class DeviceMeshTest(DTensorTestBase): ): # test init_device_mesh with an invalid device type that contains a GPU index mesh_shape = (2, self.world_size // 2) - mesh_2d = init_device_mesh( + init_device_mesh( "cuda:0", mesh_shape=mesh_shape, mesh_dim_names=("dp", "tp") ) @@ -453,7 +453,7 @@ class InitDeviceMeshTest(DTensorTestBase): RuntimeError, "Each mesh_dim_name must be unique.", ): - mesh = init_device_mesh( + init_device_mesh( self.device_type, (2, 4), mesh_dim_names=["dp", "dp"], @@ -465,7 +465,7 @@ class InitDeviceMeshTest(DTensorTestBase): RuntimeError, "mesh_shape and mesh_dim_names should have same length!", ): - mesh = init_device_mesh( + init_device_mesh( self.device_type, (8,), mesh_dim_names=["dp", "tp"], @@ -483,7 +483,7 @@ class TestDeviceMeshGetItem(DTensorTestBase): RuntimeError, "Cannot slice a DeviceMesh without mesh_dim_names!" ): mesh = init_device_mesh(self.device_type, (2, 4)) - child_mesh = mesh["DP"] + mesh["DP"] @with_comms def test_raises_invalid_mesh_dim_name(self): @@ -493,7 +493,7 @@ class TestDeviceMeshGetItem(DTensorTestBase): mesh = init_device_mesh( self.device_type, (2, 4), mesh_dim_names=mesh_dim_names ) - child_mesh = mesh[child_mesh_dim_name] + mesh[child_mesh_dim_name] @with_comms def test_get_item_2d(self): @@ -514,7 +514,6 @@ class TestDeviceMeshGetItem(DTensorTestBase): tp_group_idx = self.rank // 4 self.assertEqual(tp_mesh.mesh, pg_ranks_by_dim_name["TP"][tp_group_idx]) - dp_mesh = mesh_2d["DP"] dp_group_idx = self.rank % 4 self.assertEqual(mesh_2d["DP"].mesh, pg_ranks_by_dim_name["DP"][dp_group_idx]) @@ -564,17 +563,15 @@ class TestDeviceMeshGetItem(DTensorTestBase): def test_cache_and_reuse_submesh_slice_result(self): mesh = init_device_mesh(self.device_type, (2, 4), mesh_dim_names=("dp", "tp")) - dp_mesh = mesh["dp"] ref_pg_count = _world.group_count # When we call the "dp" slice second time, it should not create any new pg. # As we are just using the cached result so the pg count should be the same. - dp_mesh_2 = mesh["dp"] self.assertEqual(ref_pg_count, _world.group_count) # When we call the "tp" slice, it should not create a new pg, as the "tp" slice would # just reuse the parent mesh pg. - tp_mesh = mesh["tp"] + mesh["tp"] self.assertEqual(_world.group_count, ref_pg_count) @with_comms @@ -603,7 +600,7 @@ class TestDeviceMeshGetItem(DTensorTestBase): KeyError, "Invalid mesh_dim_names", ): - cp_dp_mesh = mesh_3d["cp", "dp"] + mesh_3d["cp", "dp"] @with_comms def test_flatten_mesh_3d(self): @@ -767,9 +764,9 @@ class TestMeshEnv(DTensorTestBase): ) with FakeTensorMode(): - dp_mesh = mesh_2d["DP"] - tp_mesh = mesh_2d["TP"] - dp_tp_mesh = mesh_2d["DP", "TP"] + mesh_2d["DP"] + mesh_2d["TP"] + mesh_2d["DP", "TP"] class DeviceMeshCollectiveTest(DTensorTestBase): diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py index cdf834cb69e..df132dae265 100644 --- a/test/distributed/test_dynamo_distributed.py +++ b/test/distributed/test_dynamo_distributed.py @@ -421,7 +421,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase): self.weight2 = nn.Parameter(torch.randn(512, 512)) def forward(self, x, y): - u0, u1 = y.tolist() + u0, _ = y.tolist() x = torch.cat([x, x]) y = x @ self.weight1 z = (x + y @ self.weight2) * u0 @@ -442,7 +442,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase): self.weight2 = nn.Parameter(torch.randn(512, 512)) def forward(self, x, y): - u0, u1 = y.tolist() + u0, _ = y.tolist() a = torch.ones(u0) x = torch.cat([x, x]) y = x @ self.weight1 @@ -466,7 +466,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase): def forward(self, x, y): # partition one (contains the u0 def) - u0, u1 = y.tolist() + u0, _ = y.tolist() x = torch.cat([x, x]) y1 = x @ self.weight1 # partition two (contains the variable) @@ -511,7 +511,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase): ): super().__init__() layers = [] - for l in range(2): + for _ in range(2): layer = nn.ModuleList( [ nn.LayerNorm(96), @@ -529,7 +529,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase): for m in self.layers: x = x.reshape(B * F, T, H) x = m[0](x) - x, attn = m[1].forward(x, x, x) + x, _ = m[1].forward(x, x, x) x = x.reshape(B, F, T, H) return x @@ -937,8 +937,8 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase): @torch.compile() def f(x, y): - zx = x.shape - zy = y.shape + zx = x.shape # noqa: F841 + zy = y.shape # noqa: F841 return x.sum() + y.sum() if self.rank == 0: @@ -967,10 +967,10 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase): @torch.compile() def f(x, y): - z = y + z = y # noqa: F841 print("woof") - zx = x.shape - zy = y.shape + zx = x.shape # noqa: F841 + zy = y.shape # noqa: F841 return x.sum() + y.sum() if self.rank == 0: @@ -999,8 +999,8 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase): @torch.compile() def f(x, y): - zx = x.shape - zy = y.shape + zx = x.shape # noqa: F841 + zy = y.shape # noqa: F841 return x.sum() + y.sum() if self.rank == 0: @@ -1405,7 +1405,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase): model = DDP(model, device_ids=self.device_ids) hidden_states = torch.randn(B, S, H * D).to(device) - attention_scores = model(hidden_states) + model(hidden_states) torch.cuda.synchronize() @patch.object(config, "optimize_ddp", True) @@ -1461,7 +1461,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase): model = DDP(model, device_ids=self.device_ids) hidden_states = torch.randn(B, S, H * D).to(device) - attention_scores = model(hidden_states) + model(hidden_states) torch.cuda.synchronize() @patch.object(config, "optimize_ddp", True) @@ -1723,7 +1723,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase): def test_fsdp_orig_params_assert(self): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") + m, inputs, _ = get_model(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=False) fsdp_m = torch.compile(fsdp_m) self.assertRaisesRegex( diff --git a/test/distributed/test_functional_api.py b/test/distributed/test_functional_api.py index e401076de7a..b31fdeb94e6 100644 --- a/test/distributed/test_functional_api.py +++ b/test/distributed/test_functional_api.py @@ -130,7 +130,7 @@ class TestExpand(MultiThreadedTestCase): tag, rankset, group_size = ft_c._expand_group(dist.group.WORLD, "bla") self.assertEqual("bla", tag) - my_pg, others = new_subgroups(group_size=2) + my_pg, _ = new_subgroups(group_size=2) tag, rankset, group_size = ft_c._expand_group(my_pg) self.assertEqual(c10d._get_group_tag(my_pg), tag) self.assertEqual(dist.get_process_group_ranks(my_pg), rankset) @@ -588,7 +588,7 @@ class TestCollectivesWithDistributedBackend(DistributedTestBase): def allreduce(t, pg): return ft_c.all_reduce(t, "sum", pg) - compiled_allreduce = torch.compile(allreduce, fullgraph=True) + compiled_allreduce = torch.compile(allreduce, fullgraph=True) # noqa: F841 dist.init_process_group( backend="fake", rank=0, @@ -615,9 +615,7 @@ class TestCollectivesWithDistributedBackend(DistributedTestBase): return batch * 5 compiled_func = torch.compile(func) - ret = compiled_func( - torch.ones((100,), device=device), self.process_group, self.rank - ) + compiled_func(torch.ones((100,), device=device), self.process_group, self.rank) dist.barrier() @@ -715,7 +713,7 @@ class TestFunctionalAutograd(MultiThreadedTestCase): out = compiled(t, self.world_size) out.backward() - res, codes = run_and_get_code(run_with_backward) + _, codes = run_and_get_code(run_with_backward) for code in codes: FileCheck().check_count( "_c10d_functional.all_to_all_single.default", 1, exactly=True diff --git a/test/distributed/test_inductor_collectives.py b/test/distributed/test_inductor_collectives.py index 92a2fd6ee2c..31d65b1c592 100644 --- a/test/distributed/test_inductor_collectives.py +++ b/test/distributed/test_inductor_collectives.py @@ -411,7 +411,7 @@ class TestCollectivesMultiProc(DynamoDistributedMultiProcTestCase): y = self.emb(x) last_dim = y.dim() - 1 y = y.transpose_(0, last_dim).contiguous() - res = _functional_collectives.all_gather_tensor(y, 0, ranks, tag) + _functional_collectives.all_gather_tensor(y, 0, ranks, tag) out = y.transpose_(0, last_dim).contiguous() return out diff --git a/test/distributed/test_launcher.py b/test/distributed/test_launcher.py index e2bd1a510d1..decae9d1c7c 100644 --- a/test/distributed/test_launcher.py +++ b/test/distributed/test_launcher.py @@ -35,7 +35,6 @@ class TestDistributedLaunch(TestCase): def test_launch_user_script(self): nnodes = 1 nproc_per_node = 4 - world_size = nnodes * nproc_per_node sock = get_socket_with_port() with closing(sock): master_port = sock.getsockname()[1] diff --git a/test/distributed/test_store.py b/test/distributed/test_store.py index b2976abd087..bbd075e93a6 100644 --- a/test/distributed/test_store.py +++ b/test/distributed/test_store.py @@ -553,7 +553,7 @@ class LibUvTCPStoreTest(TCPStoreTest): ) with self.assertRaisesRegex(NotImplementedError, err_msg_reg): - store = dist.TCPStore( + dist.TCPStore( addr, port, 1, @@ -748,7 +748,7 @@ class RendezvousTCPTest(TestCase): url = self.create_tcp_url() test_store_timeout = timedelta(seconds=0.1) gen0 = dist.rendezvous(url + "&rank=0", timeout=timedelta(seconds=10)) - store0, rank0, size0 = next(gen0) + store0, _, _ = next(gen0) store0.set_timeout(test_store_timeout) # this should time out in 0.1s. If the timeout passed into rendezvous was # not respected, it will take much longer to timeout. @@ -766,7 +766,7 @@ class RendezvousTCPTest(TestCase): url = self.create_tcp_url() test_store_timeout = timedelta(seconds=0.1) gen0 = dist.rendezvous(url + "&rank=0", timeout=timedelta(seconds=10)) - store0, rank0, size0 = next(gen0) + store0, _, _ = next(gen0) store0.set_timeout(test_store_timeout) # this should time out in 10s. If the timeout passed into rendezvous was # not respected, it will take much longer to timeout. @@ -787,7 +787,7 @@ class RendezvousTCPTest(TestCase): def test_tcp_store_url_with_libuv(self): url = self.create_tcp_url() gen0 = dist.rendezvous(url + "&rank=0&use_libuv=1") - store0, rank0, size0 = next(gen0) + store0, _, _ = next(gen0) self.assertTrue(store0.libuvBackend) @@ -1078,7 +1078,7 @@ class TestClientProtocol(TestCase): thread = threading.Thread(target=listen) thread.start() - store = dist.TCPStore( + dist.TCPStore( host_name="localhost", port=port, world_size=2, diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py index 72d53b94cc1..3c33567c795 100644 --- a/test/distributed/test_symmetric_memory.py +++ b/test/distributed/test_symmetric_memory.py @@ -332,7 +332,6 @@ class SymmetricMemoryTest(MultiProcessTestCase): K = 32 group = dist.group.WORLD rank = self.rank - world_size = self.world_size torch.manual_seed(42 + rank) A_shard = torch.rand(BATCH, M // self.world_size, K, device="cuda") @@ -428,7 +427,6 @@ class SymmetricMemoryTest(MultiProcessTestCase): K = 32 group = dist.group.WORLD rank = self.rank - world_size = self.world_size if gather_dim == 0: leading_dims = (BATCH // self.world_size, M) @@ -513,7 +511,6 @@ class SymmetricMemoryTest(MultiProcessTestCase): K = 32 group = dist.group.WORLD rank = self.rank - world_size = self.world_size torch.manual_seed(42 + rank) A = torch.rand(BATCH, M, K, device="cuda") @@ -546,7 +543,6 @@ class SymmetricMemoryTest(MultiProcessTestCase): K = 32 group = dist.group.WORLD rank = self.rank - world_size = self.world_size torch.manual_seed(42 + rank) A = torch.rand(BATCH, M, K, device="cuda").to(torch.float8_e4m3fn) diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py index 8de1c1dce87..1756bf5afa8 100644 --- a/test/distributions/test_distributions.py +++ b/test/distributions/test_distributions.py @@ -1314,7 +1314,7 @@ class TestDistributions(DistributionsTestCase): if not msk.all(): counts = np.concatenate([counts[msk], np.sum(counts[~msk], keepdims=True)]) pmf = np.concatenate([pmf[msk], np.sum(pmf[~msk], keepdims=True)]) - chisq, p = scipy.stats.chisquare(counts, pmf * num_samples) + _, p = scipy.stats.chisquare(counts, pmf * num_samples) self.assertGreater(p, failure_rate, message) def _check_enumerate_support(self, dist, examples): @@ -1912,9 +1912,7 @@ class TestDistributions(DistributionsTestCase): @set_default_dtype(torch.double) def test_one_hot_categorical_2d(self): probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]] - probabilities_1 = [[1.0, 0.0], [0.0, 1.0]] p = torch.tensor(probabilities, requires_grad=True) - s = torch.tensor(probabilities_1, requires_grad=True) self.assertEqual(OneHotCategorical(p).sample().size(), (2, 3)) self.assertEqual( OneHotCategorical(p).sample(sample_shape=(3, 4)).size(), (3, 4, 2, 3) @@ -2074,13 +2072,11 @@ class TestDistributions(DistributionsTestCase): @set_default_dtype(torch.double) def test_relaxed_one_hot_categorical_2d(self): probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]] - probabilities_1 = [[1.0, 0.0], [0.0, 1.0]] temp = torch.tensor([3.0], requires_grad=True) # The lower the temperature, the more unstable the log_prob gradcheck is # w.r.t. the sample. Values below 0.25 empirically fail the default tol. temp_2 = torch.tensor([0.25], requires_grad=True) p = torch.tensor(probabilities, requires_grad=True) - s = torch.tensor(probabilities_1, requires_grad=True) self.assertEqual(RelaxedOneHotCategorical(temp, p).sample().size(), (2, 3)) self.assertEqual( RelaxedOneHotCategorical(temp, p).sample(sample_shape=(3, 4)).size(), @@ -3939,7 +3935,7 @@ class TestDistributions(DistributionsTestCase): for dim in range(2, 5): log_probs = [] lkj = LKJCholesky(dim, concentration=1.0, validate_args=True) - for i in range(2): + for _ in range(2): sample = lkj.sample() sample_tril = tril_matrix_to_vec(sample, diag=-1) log_prob = lkj.log_prob(sample) @@ -6241,7 +6237,7 @@ class TestLazyLogitsInitialization(DistributionsTestCase): except NotImplementedError: pass self.assertNotIn("probs", dist.__dict__, msg=message) - batch_shape, event_shape = dist.batch_shape, dist.event_shape + dist.batch_shape, dist.event_shape self.assertNotIn("probs", dist.__dict__, msg=message) def test_lazy_probs_initialization(self): @@ -6258,7 +6254,7 @@ class TestLazyLogitsInitialization(DistributionsTestCase): except NotImplementedError: pass self.assertNotIn("logits", dist.__dict__, msg=message) - batch_shape, event_shape = dist.batch_shape, dist.event_shape + dist.batch_shape, dist.event_shape self.assertNotIn("logits", dist.__dict__, msg=message) @@ -6565,6 +6561,7 @@ class TestFunctors(DistributionsTestCase): expected_jac = sum( [t1.log_abs_det_jacobian(x1, y1), t2.log_abs_det_jacobian(x2, y2)] ) + self.assertEqual(actual_jac, expected_jac) def test_stack_transform(self): x1 = -1 * torch.arange(1, 101, dtype=torch.float) @@ -6628,18 +6625,18 @@ class TestValidation(DistributionsTestCase): for v in torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0]): # samples with incorrect shape must throw ValueError only try: - log_prob = d_val.log_prob(v) + d_val.log_prob(v) except ValueError: pass # get sample of correct shape val = torch.full(d_val.batch_shape + d_val.event_shape, v) # check samples with incorrect support try: - log_prob = d_val.log_prob(val) + d_val.log_prob(val) except ValueError as e: if e.args and "must be within the support" in e.args[0]: try: - log_prob = d_nonval.log_prob(val) + d_nonval.log_prob(val) except RuntimeError: pass diff --git a/test/dynamo/test_activation_checkpointing.py b/test/dynamo/test_activation_checkpointing.py index d02140c6e2a..b41bc285a2e 100644 --- a/test/dynamo/test_activation_checkpointing.py +++ b/test/dynamo/test_activation_checkpointing.py @@ -1260,7 +1260,7 @@ Non-primal fwd outputs from model w/o backward hook: {mod_no_hook_fwd_outputs_no super().__init__() def forward(self, x, ys): - a = torch.sin(x) + a = torch.sin(x) # noqa: F841 b = torch.cos(ys[0]) c = torch.cos(ys[1]) return (x, [b, c]) diff --git a/test/dynamo/test_aot_autograd.py b/test/dynamo/test_aot_autograd.py index f8cc7e16da2..9f76483f2d5 100644 --- a/test/dynamo/test_aot_autograd.py +++ b/test/dynamo/test_aot_autograd.py @@ -453,7 +453,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): a = torch.randn(3, 3, requires_grad=True) b = torch.randn(3, 3, requires_grad=True) a1, a2 = a.clone(), a.clone() - b1, b2 = b.clone(), b.clone() + _, b2 = b.clone(), b.clone() failure_reason = None @@ -481,7 +481,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): c = torch.randn(3, 3, requires_grad=True) d = torch.randn(3, 3, requires_grad=True) c3, c4 = c.clone(), c.clone() - d3, d4 = d.clone(), d.clone() + _, d4 = d.clone(), d.clone() f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F()) f(c3, c3, 3, 3) @@ -507,7 +507,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): b = torch.randn(3, 3, requires_grad=True) z = a a1, a2 = a.clone(), a.clone() - b1, b2 = b.clone(), b.clone() + _, b2 = b.clone(), b.clone() failure_reason = None @@ -543,7 +543,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): a = torch.randn(3, 3, requires_grad=True) b = torch.randn(3, 3, requires_grad=True) a1, a2 = a.clone(), a.clone() - b1, b2 = b.clone(), b.clone() + _, b2 = b.clone(), b.clone() failure_reason = None @@ -571,7 +571,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): c = torch.randn(3, 3, requires_grad=True) d = torch.randn(3, 3, requires_grad=True) c3, c4 = c.clone(), c.clone() - d3, d4 = d.clone(), d.clone() + _, d4 = d.clone(), d.clone() f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F()) f([3, 2, 1], [4, 5, 6], c3, c3) @@ -593,7 +593,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): a = torch.randn(3, 3, requires_grad=True) b = torch.randn(3, 3, requires_grad=True) a1, a2 = a.clone(), a.clone() - b1, b2 = b.clone(), b.clone() + _, b2 = b.clone(), b.clone() failure_reason = None @@ -621,7 +621,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): c = torch.randn(3, 3, requires_grad=True) d = torch.randn(3, 3, requires_grad=True) c3, c4 = c.clone(), c.clone() - d3, d4 = d.clone(), d.clone() + _, d4 = d.clone(), d.clone() f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F()) f(c3, c3) @@ -642,7 +642,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): a = torch.randn(3, 3, requires_grad=True) b = torch.randn(3, 3, requires_grad=True) a1, a2, a3, a4 = a.clone(), a.clone(), a.clone(), a.clone() - b1, b2, b3, b4 = b.clone(), b.clone(), b.clone(), b.clone() + _, b2, b3, b4 = b.clone(), b.clone(), b.clone(), b.clone() failure_reason = None @@ -670,7 +670,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase): c = torch.randn(3, 3, requires_grad=True) d = torch.randn(3, 3, requires_grad=True) c3, c4 = c.clone(), c.clone() - d3, d4 = d.clone(), d.clone() + _, d4 = d.clone(), d.clone() f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F()) f(a3, b3, c3, c3) @@ -1017,7 +1017,7 @@ SeqNr|OrigAten|SrcFn|FwdSrcFn activities=[torch.profiler.ProfilerActivity.CPU], record_shapes=True, ) as kineto_prof: - res = model_instance(*args) + model_instance(*args) bwd_set = set() prof_str = "SeqNr|Thread|FwdThread|Name\n" for event in kineto_prof.events(): @@ -1191,7 +1191,7 @@ SeqNr|OrigAten|SrcFn|FwdSrcFn x = torch.randn(3, requires_grad=True) with self.assertRaisesRegex(RuntimeError, "Cannot access data pointer"): - y = torch.compile(f, backend="aot_eager", fullgraph=True)(x) + torch.compile(f, backend="aot_eager", fullgraph=True)(x) self.assertTrue(backward_called) # We don't know how to catch multiple mutations to the same memory location diff --git a/test/dynamo/test_aot_autograd_cache.py b/test/dynamo/test_aot_autograd_cache.py index 6dba4f0b9ee..228c45a4ff6 100644 --- a/test/dynamo/test_aot_autograd_cache.py +++ b/test/dynamo/test_aot_autograd_cache.py @@ -157,7 +157,7 @@ class AOTAutogradCacheTests(InductorTestCase): with torch.autograd._force_original_view_tracking(True): compiled_fn = torch.compile(fn) - out = compiled_fn(torch.rand(2, 3)) + compiled_fn(torch.rand(2, 3)) self.assertEqual(counters["aot_autograd"]["autograd_cache_miss"], 1) self.assertEqual(counters["aot_autograd"]["autograd_cache_bypass"], 1) @@ -654,7 +654,7 @@ class AOTAutogradCachePicklerTests(torch._dynamo.test_case.TestCase): def fn(x): return x.sin().cos() - def fn2(x): + def fn2(x): # noqa: F841 y = x.sin() z = y.cos() return z diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py index 3b2ee9ad8d6..5ba4c71b3ea 100644 --- a/test/dynamo/test_autograd_function.py +++ b/test/dynamo/test_autograd_function.py @@ -760,7 +760,7 @@ class GraphModule(torch.nn.Module): def backward(ctx, gO): return torch.tensor(float("nan")).expand(10, 10) - def run_fn(a): + def run_fn(a): # noqa: F841 out = MyFunc2.apply(a) return out.sum() @@ -837,11 +837,11 @@ class GraphModule(torch.nn.Module): x = torch.randn(5, 5, requires_grad=True) y = torch.randn(5, 5, requires_grad=True) - q, p = Identity.apply(x, y) + Identity.apply(x, y) a = torch.rand(1, 2) b = torch.rand(1, requires_grad=True) - view_a = MyFn.apply(a) + MyFn.apply(a) a = torch.ones(2, requires_grad=True) b = torch.ones(2, requires_grad=True) @@ -860,7 +860,7 @@ class GraphModule(torch.nn.Module): MyFn2.apply(c, d) base = torch.rand(10, requires_grad=True) - foo = MyFn3.apply(base, False) + MyFn3.apply(base, False) test() opt_test = torch.compile(test, backend="eager") diff --git a/test/dynamo/test_backends.py b/test/dynamo/test_backends.py index 3d4443978e5..84379aa599c 100644 --- a/test/dynamo/test_backends.py +++ b/test/dynamo/test_backends.py @@ -267,9 +267,8 @@ class TestCustomBackendAPI(torch._dynamo.test_case.TestCase): self.assertTrue(backend_run) def test_lookup_backend(self): - from torch._dynamo import list_backends, lookup_backend + from torch._dynamo import lookup_backend - backends = list_backends() backend_run = False def my_compiler(gm, example_inputs): diff --git a/test/dynamo/test_backward_higher_order_ops.py b/test/dynamo/test_backward_higher_order_ops.py index 2f48c41f7bb..14e3f2e044c 100644 --- a/test/dynamo/test_backward_higher_order_ops.py +++ b/test/dynamo/test_backward_higher_order_ops.py @@ -247,8 +247,6 @@ class GraphModule(torch.nn.Module): with compiled_autograd._enable(compiler_fn): out.backward(grad_out) - graph = None - if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/test/dynamo/test_bytecode_utils.py b/test/dynamo/test_bytecode_utils.py index 0e8b74c6fdb..fa906a2ac16 100644 --- a/test/dynamo/test_bytecode_utils.py +++ b/test/dynamo/test_bytecode_utils.py @@ -518,7 +518,7 @@ def fn(): insts = bytecode_transformation.bytecode_from_template(fn, noprefix=False) self.assertEqual(insts[-1].opname, "NOP") insts_i = 0 - for i, inst in enumerate(dis_insts): + for inst in dis_insts: if inst.opname == "RETURN_CONST": self.assertEqual(insts[insts_i].opname, "LOAD_CONST") insts_i += 1 @@ -538,7 +538,7 @@ def fn(): x = x + 1 except NotImplementedError: x = x + 1 - except Exception as e: + except Exception: x = x + 1 return x diff --git a/test/dynamo/test_compiler_bisector.py b/test/dynamo/test_compiler_bisector.py index 70ef1c12d27..a5a350c0d1a 100644 --- a/test/dynamo/test_compiler_bisector.py +++ b/test/dynamo/test_compiler_bisector.py @@ -43,7 +43,7 @@ class TestCompilerBisector(TestCase): return lib def test_bad_decomp(self): - mod = import_module("torch._inductor.compile_fx") + import_module("torch._inductor.compile_fx") def bad_exp_decomp(self, rate=1, generator=None): assert generator is None @@ -86,7 +86,7 @@ class TestCompilerBisector(TestCase): vq_compiled = torch.compile(vq) x = torch.randn(4, 400, 256).cuda() with torch._dynamo.utils.preserve_rng_state(): - out = vq(x) + vq(x) out_compiled = vq_compiled(x) return not out_compiled.isnan().any() @@ -150,7 +150,6 @@ class TestCompilerBisector(TestCase): self.assertTrue("inductor_fallback_random" in out.debug_info) def test_crossref(self): - test_ns = "bisect_ops" with _scoped_library(self.test_ns, "FRAGMENT") as lib: lib.define("foo(Tensor x) -> Tensor") op = self.get_op("foo") diff --git a/test/dynamo/test_comptime.py b/test/dynamo/test_comptime.py index 17cf9ef13e7..15d78758fdb 100644 --- a/test/dynamo/test_comptime.py +++ b/test/dynamo/test_comptime.py @@ -117,7 +117,7 @@ def forward(self, L_x_ : torch.Tensor): return y + 3 - def munge_disas(s): + def munge_disas(s): # noqa: F841 re.sub( r"^(?: +\d+)?(?: +(-->)) \+\d+ ([A-Za-z0-9_]+)", "\1 \3", @@ -271,7 +271,7 @@ y = FakeTensor(..., size=(2,)) y = g(y) return y + 3 - def munge_filenames(s): + def munge_filenames(s): # noqa: F841 return re.sub(r'File "[^"]+", line \d+', 'File "X", line X', s) f(torch.randn(2)) @@ -389,7 +389,7 @@ y = FakeTensor(..., size=(2,)) @torch.compile(backend=cnt) def f(x): y = x * 2 - lit = 2 + lit = 2 # noqa: F841 @comptime def _(ctx): diff --git a/test/dynamo/test_ctx_manager.py b/test/dynamo/test_ctx_manager.py index e8076436c7d..a9d9606635e 100644 --- a/test/dynamo/test_ctx_manager.py +++ b/test/dynamo/test_ctx_manager.py @@ -268,15 +268,13 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): cur_stream.wait_stream(new_stream) x = torch.add(x, 4) - is_idle = cur_stream.query() + cur_stream.query() cur_stream.synchronize() with torch.cuda.stream(new_stream): x = torch.add(x, 5) new_stream.synchronize() - is_equal = cur_stream == new_stream - x = torch.relu(x) x = torch.cos(x) return x @@ -439,7 +437,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): x = torch.add(x, 3) event = cur_stream.record_event() - is_idle = event.query() + event.query() new_stream.wait_event(event) with torch.cuda.stream(new_stream): @@ -481,7 +479,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): x = torch.add(x, 3) event = cur_stream.record_event() - is_idle = event.query() + event.query() new_stream.wait_event(event) with torch.cuda.stream(new_stream): @@ -567,7 +565,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): real_device = real.device real_dtype = real.dtype - graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) + graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) exported = graph(torch.tensor([0.5])) self.assertEqual(exported.device, real_device) self.assertEqual(exported.dtype, real_dtype) @@ -676,7 +674,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): real_device = real.device real_dtype = real.dtype - graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) + graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) exported = graph(torch.tensor([0.5])) self.assertEqual(exported.device, real_device) self.assertEqual(exported.dtype, real_dtype) @@ -850,7 +848,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): real_device = real.device real_dtype = real.dtype - graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) + graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) exported = graph(torch.tensor([0.5])) self.assertEqual(exported.device, real_device) self.assertEqual(exported.dtype, real_dtype) @@ -876,7 +874,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase): real_device = real.device real_dtype = real.dtype - graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) + graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]])) exported = graph(torch.tensor([0.5])) self.assertEqual(exported.device, real_device) self.assertEqual(exported.dtype, real_dtype) @@ -1297,7 +1295,7 @@ class GraphModule(torch.nn.Module): eager = EagerAndRecordGraphs() torch.compile(fn, backend=eager, fullgraph=False)(torch.randn(())) - def check_graph(actual, expected): + def check_graph(actual, expected): # noqa: F841 self.assertExpectedInline(actual, expected) graph = eager.graphs[0] @@ -1342,7 +1340,7 @@ class GraphModule(torch.nn.Module): for i in range(2): torch._dynamo.reset() - ctx_wrapper, mode = ctx_wrappers[i] + ctx_wrapper, _ = ctx_wrappers[i] ctx_wrapper_inverse, mode_inverse = ctx_wrappers[(i + 1) % 2] def fn(x): @@ -1373,7 +1371,7 @@ class GraphModule(torch.nn.Module): for i in range(2): torch._dynamo.reset() - ctx_wrapper, mode = ctx_wrappers[i] + ctx_wrapper, _ = ctx_wrappers[i] ctx_wrapper_inverse, mode_inverse = ctx_wrappers[(i + 1) % 2] def fn(x): diff --git a/test/dynamo/test_cudagraphs.py b/test/dynamo/test_cudagraphs.py index 58985655f72..ee34e421cbf 100644 --- a/test/dynamo/test_cudagraphs.py +++ b/test/dynamo/test_cudagraphs.py @@ -63,7 +63,7 @@ class TestAotCudagraphs(torch._dynamo.test_case.TestCase): @torch.compile(backend="cudagraphs") def fn(x, y): - for i in range(N_ITERS): + for _ in range(N_ITERS): loss = model(x, y).sum() loss.backward() @@ -80,7 +80,7 @@ class TestAotCudagraphs(torch._dynamo.test_case.TestCase): @torch.compile(backend="cudagraphs") def fn(x, y): - for i in range(N_ITERS): + for _ in range(N_ITERS): loss = model(x, y).sum() loss.backward() @@ -96,7 +96,7 @@ class TestAotCudagraphs(torch._dynamo.test_case.TestCase): @torch.compile(backend="cudagraphs") def fn(x, y): - for i in range(N_ITERS): + for _ in range(N_ITERS): loss = model(x, y).sum() loss.backward() diff --git a/test/dynamo/test_debug_utils.py b/test/dynamo/test_debug_utils.py index d4622c6e601..f86bdbb8584 100644 --- a/test/dynamo/test_debug_utils.py +++ b/test/dynamo/test_debug_utils.py @@ -45,7 +45,7 @@ def forward(self, x_1): """, # NOQA: B950 ) - fp64_model, fp64_examples = debug_utils.cast_to_fp64(fx, (x,)) + _, fp64_examples = debug_utils.cast_to_fp64(fx, (x,)) self.assertEqual(fp64_examples, (x.to(torch.float64),)) self.assertExpectedInline( @@ -79,7 +79,7 @@ def forward(self, x_1): _tensor_constant0 ) _tensor_constant0 = None - index: "f32[6144, 4190]" = torch.ops.aten.index.Tensor( + index: "f32[6144, 4190]" = torch.ops.aten.index.Tensor( # noqa: F841 primals_48, [None, lift_fresh_copy] ) lift_fresh_copy = None diff --git a/test/dynamo/test_decorators.py b/test/dynamo/test_decorators.py index bf24225f66a..bdf506416c0 100644 --- a/test/dynamo/test_decorators.py +++ b/test/dynamo/test_decorators.py @@ -83,7 +83,7 @@ class DecoratorTests(torch._dynamo.test_case.TestCase): # This behavior is not ideal, but supporting it would add overhead # to callsites of eval_frame.innermost_fn. A warning would also be very noisy. - w = torch._dynamo.disable(fn=wrapper, recursive=True) + torch._dynamo.disable(fn=wrapper, recursive=True) def test_disable_nn_modules_forward_hook(self): class SimpleLinear(torch.nn.Module): @@ -543,7 +543,7 @@ class DecoratorTests(torch._dynamo.test_case.TestCase): return v1, v2, v3, v4, v5, v6, v7, v8, v9 a, b, c = A(), B(), C() - v1, v2, v3, v4, v5, v6, v7, v8, v9 = fn(a, b, c) + v1, v2, v3, v4, v5, _, v7, v8, v9 = fn(a, b, c) self.assertEqual(v1, (A, 1)) self.assertEqual(v2, (A, 2)) diff --git a/test/dynamo/test_exc.py b/test/dynamo/test_exc.py index 6ae15a139e9..2a3eb5cea59 100644 --- a/test/dynamo/test_exc.py +++ b/test/dynamo/test_exc.py @@ -92,7 +92,7 @@ from user code: raise NotImplementedError # Ensure graph break is not possible - for i in range(3): + for _ in range(3): comptime(f) torch.compile(fn001, backend="eager")(torch.randn(1)) diff --git a/test/dynamo/test_exceptions.py b/test/dynamo/test_exceptions.py index d6613d84560..40f9a3b8395 100644 --- a/test/dynamo/test_exceptions.py +++ b/test/dynamo/test_exceptions.py @@ -32,7 +32,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase): try: x = torch.sin(x) raise NotImplementedError - except (NotImplementedError, AttributeError) as e: + except (NotImplementedError, AttributeError): x = torch.sigmoid(x) return x @@ -89,7 +89,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase): try: x = torch.sin(x) raise NotImplementedError("Not implemented") - except NotImplementedError as e: + except NotImplementedError: x = torch.sigmoid(x) try: x = torch.cos(x) @@ -131,7 +131,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase): try: x = torch.cos(x) raise NotImplementedError("Not implemented") - except NotImplementedError as e: + except NotImplementedError: x = torch.sigmoid(x) raise @@ -144,10 +144,10 @@ class ExceptionTests(torch._dynamo.test_case.TestCase): return x x = torch.randn(4) - ref = fn(x) + fn(x) # Cant use fullgraph=True because RERAISE is not supported opt_fn = torch.compile(fn, backend="eager") - res = opt_fn(x) + opt_fn(x) # TODO(anijain2305) - does not work with fullgraph=True def test_exception_with_ctx_manager(self): @@ -157,7 +157,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase): with torch.no_grad(): x = torch.sin(x) raise NotImplementedError("Not implemented") - except NotImplementedError as e: + except NotImplementedError: x = torch.sigmoid(x) return x diff --git a/test/dynamo/test_export.py b/test/dynamo/test_export.py index 40c549bedb0..ef537692173 100644 --- a/test/dynamo/test_export.py +++ b/test/dynamo/test_export.py @@ -48,9 +48,9 @@ class ExportTests(torch._dynamo.test_case.TestCase): lc_key = state[0] lc_val = state[1] bar = [] - for i in range(0, 4): + for _ in range(0, 4): bar2 = [] - for j in range(0, 3): + for _ in range(0, 3): bar2.append( lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1]) ) @@ -96,7 +96,7 @@ class ExportTests(torch._dynamo.test_case.TestCase): def func(x, y): return x - exported = torch._dynamo.export(func, same_signature=False)(*inps) + torch._dynamo.export(func, same_signature=False)(*inps) def test_no_tensor_computation(self): inp = [torch.randn(3)] @@ -645,9 +645,9 @@ def forward(self, x, y): lc_key = state[0] lc_val = state[1] bar = [] - for i in range(0, 4): + for _ in range(0, 4): bar2 = [] - for j in range(0, 3): + for _ in range(0, 3): bar2.append( lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1]) ) @@ -1394,7 +1394,7 @@ def forward(self, x, y): module = MyModule() real_result = module(torch.tensor([1.0, 1.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([1.0, 1.0])) + graph, _ = torch._dynamo.export(module)(torch.tensor([1.0, 1.0])) # Tensor input can be almost anything here, and the result will capture what we # made constant at compile time. @@ -1418,7 +1418,7 @@ def forward(self, x, y): module = MyModule() real_result = module(torch.tensor([1.0, 1.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([1.0, 1.0])) + graph, _ = torch._dynamo.export(module)(torch.tensor([1.0, 1.0])) # Tensor input can be almost anything here, and the result will capture what we # made constant at compile time. @@ -1442,7 +1442,7 @@ def forward(self, x, y): module = MyModule() real_result = module(torch.tensor([1.0, 1.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([1.0, 1.0])) + graph, _ = torch._dynamo.export(module)(torch.tensor([1.0, 1.0])) # Tensor input can be almost anything here, and the result will capture what we # made constant at compile time. @@ -1464,7 +1464,7 @@ def forward(self, x, y): module = MyModule() real_result = module(torch.tensor([2.0, 2.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([2.0, 2.0])) + graph, _ = torch._dynamo.export(module)(torch.tensor([2.0, 2.0])) # Tensor input can be almost anything here, and the result will capture what we # made constant at compile time. @@ -1493,7 +1493,7 @@ def forward(self, x, y): # X is negative, so .item() < 0, which means we return y self.assertEqual(real_result, torch.tensor([0.5])) - graph, guards = torch._dynamo.export(module)(torch.tensor([-1])) + graph, _ = torch._dynamo.export(module)(torch.tensor([-1])) result = graph(torch.tensor([2])) # X is positive, but we compiled helper_fn to return None, so it will still return y self.assertTrue(torch._dynamo.utils.same(result, real_result)) @@ -1520,7 +1520,7 @@ def forward(self, x, y): # X is positive, so .item() > 0, which means we return y * x self.assertEqual(real_result, torch.tensor([1.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([2])) + graph, _ = torch._dynamo.export(module)(torch.tensor([2])) result = graph(torch.tensor([-0.5])) # X is negative, but we compiled helper_fn to return x, so it will still return y * x self.assertTrue(torch._dynamo.utils.same(result, real_result)) @@ -1547,7 +1547,7 @@ def forward(self, x, y): # X is negative, so .item() < 0, which means we return y self.assertEqual(real_result, torch.tensor([0.5])) - graph, guards = torch._dynamo.export(module)(torch.tensor([-1])) + graph, _ = torch._dynamo.export(module)(torch.tensor([-1])) result = graph(torch.tensor([2])) # X is positive, but we compiled helper_fn to return None, so it will still return y self.assertTrue(torch._dynamo.utils.same(result, real_result)) @@ -1574,7 +1574,7 @@ def forward(self, x, y): # X is positive, so .item() > 0, which means we return y * x self.assertEqual(real_result, torch.tensor([1.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([2])) + graph, _ = torch._dynamo.export(module)(torch.tensor([2])) result = graph(torch.tensor([-0.5])) # X is negative, but we compiled helper_fn to return x, so it will still return y * x self.assertTrue(torch._dynamo.utils.same(result, real_result)) @@ -1601,7 +1601,7 @@ def forward(self, x, y): # X is positive, so .item() > 0, which means we return y * x self.assertEqual(real_result, torch.tensor([1.0])) - graph, guards = torch._dynamo.export(module)(torch.tensor([2])) + graph, _ = torch._dynamo.export(module)(torch.tensor([2])) result = graph(torch.tensor([-0.5])) # X is negative, but we compiled helper_fn to return x, so it will still return y * x self.assertTrue(torch._dynamo.utils.same(result, real_result)) @@ -1622,7 +1622,7 @@ def forward(self, x, y): module = MyModule() module.val = "A" resA = module(torch.tensor([2])) - graph, guards = torch._dynamo.export(module)(torch.tensor([2])) + graph, _ = torch._dynamo.export(module)(torch.tensor([2])) module.val = "B" resB = graph(torch.tensor([2])) self.assertTrue(torch._dynamo.utils.same(resA, resB)) @@ -1647,7 +1647,7 @@ def forward(self, x, y): model = Bob(0.5, 0.3) inp = torch.ones(3, 4) - graph, guards = torch._dynamo.export(model)(inp) + graph, _ = torch._dynamo.export(model)(inp) self.assertEqual(model(inp), graph(inp)) def test_export_with_constant_in_unspecialized_nn_module(self): @@ -1707,7 +1707,7 @@ def forward(self, x, y): return x.cos() with self.assertRaises(AssertionError): - graph, _ = torch._dynamo.export( + torch._dynamo.export( f, (torch.randn(5)), aten_graph=False, @@ -1889,7 +1889,7 @@ def forward(self, x, y): mods = [Module(), Module2()] for mod in mods: x = torch.randn(2, 2) - out_graph, guards = torch._dynamo.export(mod)(x) + out_graph, _ = torch._dynamo.export(mod)(x) self.assertExpectedInline( out_graph.code.strip(), """\ @@ -1978,7 +1978,7 @@ def forward(self, l_x_): torch._dynamo.exc.Unsupported, "zero-sized tensor", ): - out_graph, _ = torch._dynamo.export(mod)(xs) + torch._dynamo.export(mod)(xs) def test_export_meta_val(self): def f(x, y, z): @@ -2785,7 +2785,7 @@ def forward(self, x): return b y = torch.tensor([8, 8, 6]) - gm, _ = torch._dynamo.export( + torch._dynamo.export( f, aten_graph=True, tracing_mode="symbolic", @@ -3039,7 +3039,7 @@ def forward(self, x): return self.my_lin(x) mod, input_tensor = BasicModule(), torch.randn(2, 3) - gm, guard = torch._dynamo.export(mod, aten_graph=True)(input_tensor) + gm, _ = torch._dynamo.export(mod, aten_graph=True)(input_tensor) ref = mod(x=input_tensor) res = gm(x=input_tensor) self.assertTrue(torch._dynamo.utils.same(ref, res)) @@ -3058,9 +3058,7 @@ def forward(self, x): torch.randn(2, 3), torch.randn(2, 3), ) - gm, guard = torch._dynamo.export(mod, aten_graph=True)( - input_tensor, input_tensor2 - ) + gm, _ = torch._dynamo.export(mod, aten_graph=True)(input_tensor, input_tensor2) ref = mod(input_tensor, input_tensor2) res = gm(input_tensor, input_tensor2) self.assertTrue(torch._dynamo.utils.same(ref, res)) @@ -3086,7 +3084,7 @@ def forward(self, x): torch._dynamo.exc.UserError, "Dynamic control flow is not supported at the moment", ): - gm, _ = torch._dynamo.export(f, aten_graph=True)(torch.randn(5, 6)) + torch._dynamo.export(f, aten_graph=True)(torch.randn(5, 6)) @config.patch(assume_static_by_default=False) def test_export_persist_assert(self): @@ -3094,7 +3092,7 @@ def forward(self, x): assert x[0].sum() > 4, "Shape must be more than 4" return x.cos() + x.sin() - gm, guard = torch._dynamo.export(f, aten_graph=True, tracing_mode="symbolic")( + gm, _ = torch._dynamo.export(f, aten_graph=True, tracing_mode="symbolic")( torch.ones(5, 4, 6) ) @@ -3642,7 +3640,7 @@ G['macademia'], accessed at: arglebargle = torch.randn(3) def f(y): - x = arglebargle + x = arglebargle # noqa: F841 return y torch._dynamo.export(f)(torch.randn(3)) @@ -3701,7 +3699,7 @@ G['macademia'], accessed at: inputs = (torch.randn(10, 2, 2),) dynamic_shapes = ({0: torch.export.Dim("dim")},) for aten_graph in [True, False]: - gm = torch._dynamo.export( + torch._dynamo.export( model, dynamic_shapes=dynamic_shapes, aten_graph=aten_graph, @@ -4128,7 +4126,7 @@ def forward(self, a, b, l_x_, d_true_branch, c_false_branch): with self.assertRaises( torch._dynamo.exc.Unsupported, ): - out_graph, _ = torch._dynamo.export(mod, xs) + torch._dynamo.export(mod, xs) def test_param_buffer_safe_from_mutation_simple(self): class Module(torch.nn.Module): @@ -4170,7 +4168,7 @@ def forward(self, a, b, l_x_, d_true_branch, c_false_branch): return x.sum() + self.buffer1.sum() + self.child(x) gm, _ = torch._dynamo.export(Module(), torch.ones(5), aten_graph=False) - for name, buffer in gm.named_buffers(): + for _, buffer in gm.named_buffers(): self.assertTrue(torch.allclose(buffer, torch.zeros(5))) def test_predispatch_with_higher_order(self): diff --git a/test/dynamo/test_frame_init.py b/test/dynamo/test_frame_init.py index 97aac1870e9..f0dec020fbc 100644 --- a/test/dynamo/test_frame_init.py +++ b/test/dynamo/test_frame_init.py @@ -105,7 +105,7 @@ class FrameInitTests(torch._dynamo.test_case.TestCase): ) return None - for callback in [callback1, callback2]: + for _ in [callback1, callback2]: torch._dynamo.reset() expected_varargs_output = target_with_varargs( 1, 2, 3, 4, name1=1, name2=2, name3=3 diff --git a/test/dynamo/test_functions.py b/test/dynamo/test_functions.py index 1cedc01a2b2..b7dbe07fdea 100644 --- a/test/dynamo/test_functions.py +++ b/test/dynamo/test_functions.py @@ -587,7 +587,7 @@ class FunctionTests(torch._dynamo.test_case.TestCase): @make_test def test_range2(x, y): r = x + y - for i in range(x.size(0) + 2): + for _ in range(x.size(0) + 2): r = r / y return r @@ -1128,7 +1128,7 @@ class FunctionTests(torch._dynamo.test_case.TestCase): @make_test def test_module_constant(x, y): r = x + y - for i in range(torch._dynamo.testing.three): + for _ in range(torch._dynamo.testing.three): r = r / y return r @@ -2661,7 +2661,6 @@ class GraphModule(torch.nn.Module): dynamo_result = torch.compile(fn, backend=cnts)(udf_mul, udf_mul, x) eager_result = fn(udf_mul, udf_mul, x) - gm = backend.graphs[0] self.assertEqual(eager_result, dynamo_result) if torch._dynamo.config.assume_static_by_default: self.assertExpectedInline( @@ -2708,7 +2707,6 @@ class GraphModule(torch.nn.Module): dynamo_result = torch.compile(fn, backend=cnts)(udf_mul, udf_add, x) eager_result = fn(udf_mul, udf_add, x) - gm = backend.graphs[0] self.assertEqual(eager_result, dynamo_result) if torch._dynamo.config.assume_static_by_default: self.assertExpectedInline( @@ -2759,7 +2757,6 @@ class GraphModule(torch.nn.Module): dynamo_result = torch.compile(fn, backend=cnts)(udf_mul, x) eager_result = fn(udf_mul, x) - gm = backend.graphs[0] self.assertEqual(eager_result, dynamo_result) if torch._dynamo.config.assume_static_by_default: self.assertExpectedInline( @@ -2807,7 +2804,6 @@ class GraphModule(torch.nn.Module): dynamo_result = torch.compile(fn, backend=cnts)(udf_mul2, x) eager_result = fn(udf_mul2, x) - gm = backend.graphs[0] self.assertEqual(eager_result, dynamo_result) if torch._dynamo.config.assume_static_by_default: self.assertExpectedInline( @@ -2853,7 +2849,7 @@ class GraphModule(torch.nn.Module): x = torch.randn(2, 2) fn = torch.compile(fn, backend=cnts, fullgraph=True) - dynamo_result = fn(lambda0, lambda1, x) + fn(lambda0, lambda1, x) self.assertEqual(cnts.frame_count, 1) fn(lambda1, lambda0, x) @@ -2880,7 +2876,7 @@ class GraphModule(torch.nn.Module): x = torch.randn(2, 2) fn2 = torch.compile(fn2, backend=cnts, fullgraph=True) - dynamo_result = fn2(lambda0, lambda1, [x]) + fn2(lambda0, lambda1, [x]) self.assertEqual(cnts.frame_count, 1) # start over lambda4 = functools.partial(multiply, y=3, x=torch.randn(3, 3)) @@ -3047,7 +3043,7 @@ class GraphModule(torch.nn.Module): opt_fn_dtype = torch.compile(func_dtype, backend=cnts_1) a = torch.zeros(3, dtype=typ) for arg in dt_args: - r = opt_fn_dtype(a, arg) + opt_fn_dtype(a, arg) # each should produce an identical arg self.assertEqual(cnts_1.frame_count, 1) @@ -3055,7 +3051,7 @@ class GraphModule(torch.nn.Module): opt_fn_info = torch.compile(func_info, backend=cnts_2) info_args = [info_func(dt) for dt in dt_args] for arg in info_args: - r = opt_fn_info(a, arg) + opt_fn_info(a, arg) # each should produce an identical arg self.assertEqual(cnts_2.frame_count, 1) @@ -3259,7 +3255,7 @@ class GraphModule(torch.nn.Module): test(10, 1, -3) # Fuzz testing - for i in range(100): + for _ in range(100): args = self.gen_random_range_args() print("testing :", args) test(*args) @@ -3285,7 +3281,7 @@ class GraphModule(torch.nn.Module): test(range(10, 20, 2), 1, expected=12) # Fuzz testing - for i in range(100): + for _ in range(100): range_args = self.gen_random_range_args() r = range(*range_args) @@ -3348,7 +3344,7 @@ class GraphModule(torch.nn.Module): return slice(r_item(), r_item(), r_item(False)) # Fuzz testing - for i in range(100): + for _ in range(100): range_args = self.gen_random_range_args() r = range(*range_args) # generate random slice @@ -3384,8 +3380,8 @@ class GraphModule(torch.nn.Module): idx_size = [10] idx_size[random.randint(0, 0)] = random.randint(1, 8) t = tuple(idx_size) - src_size = [random.randint(1, 5) + s for s in idx_size] - idx = torch.empty(t) + src_size = [random.randint(1, 5) + s for s in idx_size] # noqa: F841 + idx = torch.empty(t) # noqa: F841 fn() @@ -3412,7 +3408,7 @@ class GraphModule(torch.nn.Module): ) t1 = make_q_tensor() t2 = make_kv_tensor() - t3 = t1 + t2 + t3 = t1 + t2 # noqa: F841 func() @@ -3420,7 +3416,7 @@ class GraphModule(torch.nn.Module): @torch.compile(backend="eager") def fn(): t = torch.ones(2) - y = t.to("meta") + y = t.to("meta") # noqa: F841 fn() @@ -3581,7 +3577,7 @@ class GraphModule(torch.nn.Module): y += 1 return x - l = list(zip([a, b], map(f, [1, 2, 3, 4]))) + l = list(zip([a, b], map(f, [1, 2, 3, 4]))) # noqa: F841 return a + y @make_test @@ -4182,7 +4178,6 @@ class DefaultsTests(torch._dynamo.test_case.TestCase): disallowed(g) - f_opt = torch._dynamo opt_f = torch.compile(f, backend="eager") opt_f() f() diff --git a/test/dynamo/test_graph_deduplication.py b/test/dynamo/test_graph_deduplication.py index 544dea24021..cc95db79ff3 100644 --- a/test/dynamo/test_graph_deduplication.py +++ b/test/dynamo/test_graph_deduplication.py @@ -28,7 +28,7 @@ class GraphDededuplicationTests(TestCase): return z def fn(x, y): - o0 = inner_fn(x, y) + _o0 = inner_fn(x, y) o1 = torch.sin(y) o2 = inner_fn(x, o1) o3 = inner_fn(x, y) @@ -448,7 +448,7 @@ class GraphModule(torch.nn.Module): def fn(x, y): x0 = torch.sin(x) - y0 = torch.cos(y) + _y0 = torch.cos(y) # o0 = inner_fn(x0, y0) # o1 = inner_fn(x0, o0) o2 = inner_fn2(x0, y) diff --git a/test/dynamo/test_graph_region_tracker.py b/test/dynamo/test_graph_region_tracker.py index c701ede3d4c..584079f1b1d 100644 --- a/test/dynamo/test_graph_region_tracker.py +++ b/test/dynamo/test_graph_region_tracker.py @@ -56,7 +56,7 @@ class GraphRegionTrackerTests(TestCase): return z def fn(x, y): - o0 = inner_fn(x, y) + _o0 = inner_fn(x, y) o1 = torch.sin(y) o2 = inner_fn(x, o1) o3 = inner_fn(x, y) diff --git a/test/dynamo/test_higher_order_ops.py b/test/dynamo/test_higher_order_ops.py index ae3fd328b70..e2f12052145 100644 --- a/test/dynamo/test_higher_order_ops.py +++ b/test/dynamo/test_higher_order_ops.py @@ -425,7 +425,7 @@ class GraphModule(torch.nn.Module): def test_wrap_pytree_kwargs(self): def f(x, y, z): def fn(*, x, y, z): - z1, z2 = z + z1, _ = z return (x * 2) + y + z1 return wrap(fn, x=x, y=y, z=z) @@ -459,7 +459,6 @@ class GraphModule(torch.nn.Module): def test_capture_constants(self): x = torch.randn(3, 3) - y = 4.0 def fn(x, y, z): if z: @@ -1719,9 +1718,6 @@ class GraphModule(torch.nn.Module): self._test_wrap_simple(f, default_args_generator((x, y, 8)), arg_count) def test_map_subgraph_name_is_valid(self): - backend = EagerAndRecordGraphs() - cnt = CompileCounterWithBackend(backend) - xs = torch.randn(2, 3, 3) y = torch.randn(3) @@ -1760,8 +1756,6 @@ def forward(self, child : torch.Tensor, l_y_ : torch.Tensor): ) def test_map_multi_return(self): - cnt = CompileCounter() - def f(x): return control_flow.map(lambda x: (x.sin(), x.sin()), x) @@ -1790,8 +1784,6 @@ def forward(self, child : torch.Tensor): ) def test_map_pytree_return(self): - cnt = CompileCounter() - def _construct_pytree(a): return (a, [[[a]]], a, (a, (a,), a), {"a": a}) @@ -1840,9 +1832,6 @@ def forward(self, child : torch.Tensor): self.assertEqual(cnt.frame_count, 0) def test_map_symint_input(self): - backend = EagerAndRecordGraphs() - cnt = CompileCounterWithBackend(backend) - def fn(x, y): def inner(x, y): return torch.sin(x + y) @@ -1874,9 +1863,6 @@ def forward(self, child : torch.Tensor, const_unused : int): ) def test_map_lowers_to_graph(self): - backend = EagerAndRecordGraphs() - cnt = CompileCounterWithBackend(backend) - def fn(x, y): def inner(x, y): return torch.sin(x + y) @@ -1933,7 +1919,7 @@ def forward(self, child : torch.Tensor, const_unused : int): rand_44.reshape(2, 8), ] for x in inps: - compiled_ret = torch.compile( + compiled_ret = torch.compile( # noqa: F841 control_flow.map, backend=backend, fullgraph=True )(inner, x) eager_sin, eager_transpose, eager_view = map_dense(inner, (x,), ()) @@ -2920,7 +2906,7 @@ class GraphModule(torch.nn.Module): return control_flow.map(inner, xs, y).sin() - result = map_f(xs, y) + map_f(xs, y) gm = backend.graphs[0] actual_stack = self._get_source_fn_stack(gm, {"cos", "add", "sin"}) @@ -3095,7 +3081,6 @@ def forward(self, L_a_ : torch.SymInt, L_b_ : torch.SymInt, L_c_ : torch.SymInt, return torch.cond(pred, true_fn, false_fn, [pytree_in]) backend = EagerAndRecordGraphs() - cnt = CompileCounterWithBackend(backend) compiled_res = torch.compile(fn, backend=backend)(pred, inp) eager_res = fn(pred, inp) self.assertEqual(compiled_res, eager_res) @@ -3252,7 +3237,7 @@ class GraphModule(torch.nn.Module): msg = "hints_wrapper - key hints not provided" with self.assertRaisesRegex(RuntimeError, msg): - compiled_res = torch.compile(fn_with_hints, backend=cnt)(x, y) + torch.compile(fn_with_hints, backend=cnt)(x, y) def test_hints_wrapper_incorrect_type(self): def fn_with_hints(x, y): @@ -3271,7 +3256,7 @@ class GraphModule(torch.nn.Module): msg = r"hints must be a dict containing int, float, bool or str value," with self.assertRaisesRegex(RuntimeError, msg): - compiled_res = torch.compile(fn_with_hints, backend=cnt)(x, y) + torch.compile(fn_with_hints, backend=cnt)(x, y) def test_hints_wrapper_pytree_inputs(self): def fn_with_hints(x, y): @@ -3284,9 +3269,6 @@ class GraphModule(torch.nn.Module): ) return res - backend = EagerAndRecordGraphs() - cnt = CompileCounterWithBackend(backend) - x = torch.randn(2, 4) y = torch.ones(4) @@ -3515,10 +3497,10 @@ class HigherOrderOpVmapGuardTests(LoggingTestCase): return torch.vmap(lambda x: x.sin())(x) x = torch.zeros(3, 3, 4, 5) - y = torch.vmap(fn, randomness="same")(x) + torch.vmap(fn, randomness="same")(x) self.assertEqual(len(records), 0) # sanity check - y = torch.vmap(fn, randomness="different")(x) + torch.vmap(fn, randomness="different")(x) self.assertGreater(len(records), 0) record = self.getRecord(records, "pyfunctorch") self.assertIn( @@ -5891,9 +5873,9 @@ class GraphModule(torch.nn.Module): return torch.vmap(lambda x: x.sin())(x) x = torch.zeros(3, 3, 4, 5) - y = torch.vmap(fn)(x) + torch.vmap(fn)(x) # should not recompile on second call. See Pytorch issue #118493 - y = torch.vmap(fn)(x) + torch.vmap(fn)(x) @xfailIfTorchDynamo @config.patch(error_on_recompile=True) @@ -5903,7 +5885,7 @@ class GraphModule(torch.nn.Module): return torch.vmap(lambda x: x.sin())(x) x = torch.zeros(3, 3, 4, 5) - y = torch.vmap(fn)(x) + torch.vmap(fn)(x) with self.assertRaises(torch._dynamo.exc.RecompileError): fn(x) diff --git a/test/dynamo/test_hooks.py b/test/dynamo/test_hooks.py index 91fb5279419..29ff1ddf93f 100644 --- a/test/dynamo/test_hooks.py +++ b/test/dynamo/test_hooks.py @@ -102,7 +102,7 @@ class HooksTests(torch._dynamo.test_case.TestCase): def test_tensor_register_hook_repeated_handle_return(self): def fn(x, y, z): handle = x.register_hook(lambda grad: grad * 2) - h2 = handle + h2 = handle # noqa: F841 z = z * z return x, y * y, z, handle, handle diff --git a/test/dynamo/test_logging.py b/test/dynamo/test_logging.py index 83f7306de06..1ca1ed24592 100644 --- a/test/dynamo/test_logging.py +++ b/test/dynamo/test_logging.py @@ -487,7 +487,7 @@ LoweringException: AssertionError: def test_distributed_rank_logging(self): env = dict(os.environ) env["TORCH_LOGS"] = "dynamo" - stdout, stderr = self.run_process_no_exception( + _, stderr = self.run_process_no_exception( """\ import torch.distributed as dist import logging @@ -796,7 +796,7 @@ TRACE FX call mul from test_logging.py:N in fn (LoggingTests.test_trace_call_pre env = dict(os.environ) env["TORCH_LOGS"] = "dynamo" env["TORCH_LOGS_OUT"] = file_path - stdout, stderr = self.run_process_no_exception( + _, stderr = self.run_process_no_exception( """\ import torch @torch.compile(backend="eager") diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index e00b7bb8abe..ae1fff3587c 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -1,4 +1,5 @@ # Owner(s): ["module: dynamo"] +# ruff: noqa: F841 import abc import collections import collections.abc diff --git a/test/dynamo/test_modes.py b/test/dynamo/test_modes.py index 303c7b748d0..d5b3b1ab6b3 100644 --- a/test/dynamo/test_modes.py +++ b/test/dynamo/test_modes.py @@ -620,7 +620,7 @@ class TorchFunctionModeTests(torch._dynamo.test_case.TestCase): return prefix_lengths[b] >= kv # This runs in fullgraph already - mask = create_block_mask(prefix_lm, 8, None, 512, 512, _compile=True) + create_block_mask(prefix_lm, 8, None, 512, 512, _compile=True) def test_register_hook(self): import functools @@ -641,7 +641,7 @@ class TorchFunctionModeTests(torch._dynamo.test_case.TestCase): x = torch.ones(4, requires_grad=True) with torch.device("cpu"): - out = torch.compile(mod, fullgraph=True)(x) + torch.compile(mod, fullgraph=True)(x) if __name__ == "__main__": diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py index 1a8907efde9..1cc62d5851f 100644 --- a/test/dynamo/test_modules.py +++ b/test/dynamo/test_modules.py @@ -1,4 +1,5 @@ # Owner(s): ["module: dynamo"] +# ruff: noqa: F841 import collections import contextlib @@ -3051,7 +3052,7 @@ class OptimizedModuleTest(torch._dynamo.test_case.TestCase): self.x = res return self.Linear2(res) - N, D_in, H, D_out, inner = 2, 2, 2, 2, 4 + N, D_in, H, inner = 2, 2, 2, 4 model = ReplayMutation(D_in, H, inner) model2 = copy.deepcopy(model) input = torch.ones(N, D_in) diff --git a/test/dynamo/test_prim_hop_base.py b/test/dynamo/test_prim_hop_base.py index 9094a83cb55..ffd7bc03ab7 100644 --- a/test/dynamo/test_prim_hop_base.py +++ b/test/dynamo/test_prim_hop_base.py @@ -160,10 +160,10 @@ class GraphModule(torch.nn.Module): return invoke_quant_test(inner, (x, y), scheme="nf4") with self.assertRaisesRegex(RuntimeError, "aliases of the inputs"): - out = f(inner, x, y) + f(inner, x, y) with self.assertRaisesRegex(RuntimeError, "inputs are mutated"): - out = f(inner2, x, y) + f(inner2, x, y) def test_eager_call(self): def inner(x, y): diff --git a/test/dynamo/test_profiler.py b/test/dynamo/test_profiler.py index 8d18dcd7bb6..9a7a892d8b0 100644 --- a/test/dynamo/test_profiler.py +++ b/test/dynamo/test_profiler.py @@ -101,7 +101,7 @@ class DynamoProfilerTests(torch._dynamo.test_case.TestCase): with TemporaryFileName() as fname: et.register_callback(fname) et.start() - out = opt_fn(*inputs) + opt_fn(*inputs) et.stop() et.unregister_callback() diff --git a/test/dynamo/test_reorder_logs.py b/test/dynamo/test_reorder_logs.py index b67013079fa..0b22ca50c18 100644 --- a/test/dynamo/test_reorder_logs.py +++ b/test/dynamo/test_reorder_logs.py @@ -171,7 +171,7 @@ class ReorderLogsTests(torch._dynamo.test_case.TestCase): counters.clear() with torch._dynamo.config.patch(reorderable_logging_functions={custom_log}): opt_f = torch.compile(backend="eager")(f) - opt_out = opt_f(x) + opt_f(x) self.assertEqual(sum(counters["graph_break"].values()), 1) self.assertEqual(custom_logs[0], "moo") diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py index 8a6550cead4..160084589b5 100644 --- a/test/dynamo/test_repros.py +++ b/test/dynamo/test_repros.py @@ -271,7 +271,7 @@ class _ReversibleFunction(torch.autograd.Function): # split duplicated tensor hidden_states, attn_output = torch.chunk(hidden_states, 2, dim=-1) - for layer_id, (layer, layer_head_mask) in enumerate(zip(layers, head_mask)): + for layer in layers: if output_hidden_states is True: all_hidden_states.append(hidden_states) @@ -650,7 +650,7 @@ class XSoftmax(torch.autograd.Function): @staticmethod def backward(self, grad_output): - (output, rmask) = self.saved_tensors + output, _ = self.saved_tensors inputGrad = softmax_backward_data(self, grad_output, output, self.dim, output) return inputGrad, None, None @@ -1342,6 +1342,8 @@ class ReproTests(torch._dynamo.test_case.TestCase): @torch._dynamo.config.patch(error_on_recompile=True) @torch.fx.experimental._config.patch(use_duck_shape=False) def test_dynamic_shape_disable_duck_size(self): + # noqa: F841 + class TestModel(nn.Module): def __init__( self, @@ -1357,11 +1359,11 @@ class ReproTests(torch._dynamo.test_case.TestCase): x1 = torch.rand(2, 5, 10, 10).to(memory_format=torch.channels_last) x2 = torch.rand(2, 5, 4, 8).to(memory_format=torch.channels_last) - o1_ref = main_model(x1, 4) - o1 = opt_model(x1, 4) + main_model(x1, 4) + opt_model(x1, 4) - o2_ref = main_model(x2, 20) - o2 = opt_model(x2, 20) + main_model(x2, 20) + opt_model(x2, 20) def test_chunk_reformer_ff(self): input = torch.randn([1, 4096, 256]) @@ -1483,7 +1485,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): self.assertEqual(cnt.frame_count, 2) self.assertEqual(cnt.op_count, 2) # rand, rand try: - graph, _ = torch._dynamo.export(fn)() + _, _ = torch._dynamo.export(fn)() # See https://github.com/pytorch/pytorch/pull/87490 self.fail("unexpected export success") except torch._dynamo.exc.Unsupported: @@ -1713,7 +1715,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): ) x = torch.rand([111, 262], device=device) - y2 = forward_aot(x, 2) # previously failed + forward_aot(x, 2) # previously failed def test_issue175(self): n_heads = 2 @@ -2323,7 +2325,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): class Mod(torch.nn.Module): def forward(self, listy): x = listy[3:5] - for i in range(10): + for _ in range(10): z = torch.abs(torch.randn(10)) + 1 x[0] = z return x @@ -2619,7 +2621,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): def forward(self, inp): res = 0 - for name, buffer in self.named_buffers(): + for _, buffer in self.named_buffers(): res += buffer.sum() return inp.cos() + res @@ -2718,7 +2720,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): def forward(self, inp): res = torch.zeros(3, 3) - for mod in self.modules(): + for _ in self.modules(): res += self.fc(inp) return res @@ -2799,7 +2801,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): for (sh, st, dt, dev, rg) in args ] - opt_foo = torch.compile(foo, backend="aot_eager_decomp_partition") + torch.compile(foo, backend="aot_eager_decomp_partition") with torch.cuda.amp.autocast(enabled=True): ref = foo(*args)[0] res = foo(*args)[0] @@ -2974,7 +2976,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): # Repro of huggingface graph break inside loop in `get_parameter_dtype`. # Skip only the inner frame that has loop that contains graph break. def inner(x): - for i in range(3): + for _ in range(3): x += 1 torch._dynamo.graph_break() return x @@ -3647,7 +3649,6 @@ class ReproTests(torch._dynamo.test_case.TestCase): ref2 = fn(x, inp_list2) ref3 = fn(x, inp_list3) - cnt = torch._dynamo.testing.CompileCounter() opt_fn = torch.compile(fn, fullgraph=True) opt_ret1 = opt_fn(x, inp_list1) @@ -4143,7 +4144,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): return x.sin() counter = CompileCounter() - compiled_fn = torch.compile(fn, backend=counter)(torch.randn([2, 2]), []) + torch.compile(fn, backend=counter)(torch.randn([2, 2]), []) self.assertEqual(counter.frame_count, 1) def test_graph_break_on_jit_isinstance(self): @@ -4295,7 +4296,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): torch._dynamo.reset() torch._dynamo.utils.clear_compilation_metrics() - res = torch.compile(fn, backend="aot_eager")(x) + torch.compile(fn, backend="aot_eager")(x) all_metrics = torch._dynamo.utils.get_compilation_metrics() @@ -4399,7 +4400,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): compiled_fn = torch.compile(func, backend=cnt, fullgraph=True) requires_grad = func is not func1 - for i in range(0, 5): + for _ in range(0, 5): # Inputs eager_a = torch.ones([6], requires_grad=requires_grad) compiled_a = torch.ones([6], requires_grad=requires_grad) @@ -4487,7 +4488,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): pass def fn(x, y): - ucm = UserCtxManager() + ucm = UserCtxManager() # noqa: F841 return x * x cnt = torch._dynamo.testing.CompileCounter() @@ -4538,11 +4539,11 @@ class ReproTests(torch._dynamo.test_case.TestCase): e = base[:, 8:10] f = base[:, 10:12] f2 = base[:, 10:14] - out = fn(a, b, c, d, e, f) + fn(a, b, c, d, e, f) with self.assertRaisesRegex( AssertionError, "is being compiled with dynamic shapes" ): - out2 = fn(a, b, c, d, e, f2) + fn(a, b, c, d, e, f2) def test_user_ctor_ctx_manager_custom_init(self): class UserCtxManager: @@ -4556,7 +4557,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): pass def fn(x, y): - ucm = UserCtxManager(y) + ucm = UserCtxManager(y) # noqa: F841 return x * y[0] cnt = torch._dynamo.testing.CompileCounter() @@ -4580,7 +4581,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): def fn(x, counter): x = x * x - ucm = UserCtxManager(counter) + ucm = UserCtxManager(counter) # noqa: F841 return x * x cnt = torch._dynamo.testing.CompileCounter() @@ -4588,7 +4589,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): x = torch.rand([2, 2]) self.assertEqual(opt_fn(x, counter), fn(x, counter)) self.assertEqual(counter[0], 2) - for i in range(0, 10): + for _ in range(0, 10): opt_fn(x, counter) self.assertEqual(counter[0], 12) if torch._dynamo.config.assume_static_by_default: @@ -4719,7 +4720,7 @@ class ReproTests(torch._dynamo.test_case.TestCase): def test_invalid_seq_unpack(self): def myfn(arg): - (a, b) = arg + (a, b) = arg # noqa: F841 def fn(): return myfn((1, 2, 3)) @@ -4804,13 +4805,13 @@ class ReproTests(torch._dynamo.test_case.TestCase): a = torch.randn(2, 4) a_ref = a.clone() - out_ref = foo(a_ref) + foo(a_ref) f_compiled = torch.compile(foo, backend="aot_eager") with self.assertRaisesRegex( RuntimeError, "encountered a mutation on a view chain of length 2, where view 1 was an as_strided", ): - out = f_compiled(a) + f_compiled(a) def test_dont_aggressively_write_assert(self): record_graph = torch._dynamo.testing.EagerAndRecordGraphs() @@ -5620,7 +5621,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor): random_op = torch.compile(random_op) params = {"from": -10, "to": 10} tensor = torch.randn([2, 3]) - res = random_op(tensor, params) + random_op(tensor, params) # https://github.com/pytorch/pytorch/issues/131019 def test_tensor_uniform(self): @@ -5631,7 +5632,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor): uniform_op = torch.compile(uniform_op) params = {"from": -10, "to": 10} tensor = torch.randn([2, 3]) - res = uniform_op(tensor, params) + uniform_op(tensor, params) def test_data_attr_mutation_after_saved_for_bw(self): def f(x): @@ -5771,7 +5772,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor): @torch.compile(backend="aot_eager_decomp_partition") def f(x, l): - z = x.sin() + z = x.sin() # noqa: F841 y = x + 1 # graph input has its storage mutated torch.ops.fsdp.copy_.default(x, y) @@ -5860,14 +5861,14 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor): opt_mod = torch.compile(mod, backend="eager") x = torch.randn(1, 1) - ref = mod(x) - res = opt_mod(x) + ref = mod(x) # noqa: F841 + res = opt_mod(x) # noqa: F841 mod.submod.multipliers = [3.3, 4.4] # Since guard_nn_modules is False, this will not recompile with torch._dynamo.config.patch(error_on_recompile=True): - ref = mod(x) - res = opt_mod(x) + ref = mod(x) # noqa: F841 + res = opt_mod(x) # noqa: F841 def test_optimized_module_training(self): mod = torch.nn.Linear(3, 3) @@ -6170,9 +6171,9 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor): x = torch.randn(4) opt_fn = torch.compile(fn, backend="eager", fullgraph=True) - ref = fn(config, x) + fn(config, x) cloned_config = copy.deepcopy(config) - res = opt_fn(cloned_config, x) + opt_fn(cloned_config, x) self.assertEqual(fn(config, x), opt_fn(config, x)) self.assertEqual(cloned_config.baz, 4) @@ -6230,7 +6231,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor): x = torch.ones(2) with torch.no_grad(): - y = model(x) + model(x) def test_typed_dict(self): class LlavaImagePixelInputs(TypedDict): diff --git a/test/dynamo/test_resume.py b/test/dynamo/test_resume.py index 057392ca655..42103a7878e 100644 --- a/test/dynamo/test_resume.py +++ b/test/dynamo/test_resume.py @@ -13,7 +13,7 @@ def fn_creator(): torch._dynamo.graph_break() x = x + var1 - def inner_fn(): + def inner_fn(): # noqa: F841 return var2 return x diff --git a/test/dynamo/test_structured_trace.py b/test/dynamo/test_structured_trace.py index 3801fd24cc1..9dd3f3038f9 100644 --- a/test/dynamo/test_structured_trace.py +++ b/test/dynamo/test_structured_trace.py @@ -824,7 +824,7 @@ def forward(self, x, y): trace_log.addHandler(payload_handler) def f(x): - y = x + 1 + y = x + 1 # noqa: F841 raise RuntimeError("boo") try: diff --git a/test/dynamo/test_subclasses.py b/test/dynamo/test_subclasses.py index d7ec7b6f5b6..f2bc6d3748d 100644 --- a/test/dynamo/test_subclasses.py +++ b/test/dynamo/test_subclasses.py @@ -126,7 +126,6 @@ def get_view_test_cases(): def mk_dense_subclass_dense_subclass(): values = torch.randn(10, 5) offsets = torch.tensor([0, 3, 6, 10]) - offsets2 = offsets.detach().clone() return nested_view_from_values_offsets( nested_view_from_values_offsets(values, offsets).values(), offsets ) @@ -136,7 +135,7 @@ def get_view_test_cases(): def mk_subclass_dense_subclass_dense(): x = get_jagged_tensor(((2, 3, 4), 3), None, requires_grad=True)[0].clone() offsets2 = x.offsets().detach().clone() - nt_view = nested_view_from_values_offsets(x.values(), offsets2).values() + nested_view_from_values_offsets(x.values(), offsets2).values() yield mk_subclass_dense_subclass_dense, "subclass_dense_subclass_dense" @@ -544,7 +543,7 @@ class SubclassTests(torch._dynamo.test_case.TestCase): input = torch.ones(2, 2) - res = fn(input) + fn(input) def test_torch_function_state_guards(self): cnt = torch._dynamo.testing.CompileCounter() @@ -556,9 +555,9 @@ class SubclassTests(torch._dynamo.test_case.TestCase): input = torch.ones(2, 2) with torch._C.DisableTorchFunctionSubclass(): - res = fn(input) + fn(input) - res = fn(input) + fn(input) self.assertEqual(cnt.frame_count, 2) @@ -1160,7 +1159,7 @@ class GraphModule(torch.nn.Module): ) ff = torch.func.functionalize(f) - ff_out = ff(t_clone) + ff_out = ff(t_clone) # noqa: F841 # frame count and op count are incremented due to re-compilation check_count_and_graph( 2, @@ -1187,7 +1186,7 @@ class GraphModule(torch.nn.Module): x = torch._to_functional_tensor(t_clone2) torch._mirror_autograd_meta_to(t_clone2, x) torch._enable_functionalization(reapply_views=False) - aot_f_out = f(x) + aot_f_out = f(x) # noqa: F841 finally: torch._disable_functionalization() @@ -1334,7 +1333,7 @@ class GraphModule(torch.nn.Module): x = DoubleSizeMaybeAddGeThreeTensor(inp) torch._dynamo.mark_dynamic(x, 0) - res = fn(x) + res = fn(x) # noqa: F841 # During fakeifying, we end up allocating a separate symint # for the outer and inner tensor (in this test, s0 is unused). expected_var_to_val = { @@ -3270,7 +3269,7 @@ Eq(s12, s10)""", x_inner = torch.ones(4) x = TwoTensor(x_inner, x_inner) x_view = x.view(2, 2) - out = f(x_view) + out = f(x_view) # noqa: F841 # NJT1 -> Dense -> NJT2 -> Dense view # During view replay, the Dense -> NJT2 part will construct an intermediate, diff --git a/test/dynamo/test_subgraphs.py b/test/dynamo/test_subgraphs.py index 7d1e7855c21..0cac9499b9d 100644 --- a/test/dynamo/test_subgraphs.py +++ b/test/dynamo/test_subgraphs.py @@ -342,7 +342,7 @@ class SubGraphTests(torch._dynamo.test_case.TestCase): tmp = [a + 1, b + 2, a + b] x = a x = unsupported(x, x) - for i in range(3): + for _ in range(3): x += tmp.pop(-1) return x @@ -369,7 +369,6 @@ class SubGraphTests(torch._dynamo.test_case.TestCase): opt_fn = torch.compile(fn, backend=cnt_dynamic, dynamic=True) start = 2 end = 12 - steps = end - start for i in range(start, end): opt_fn(torch.randn(i), torch.randn(i)) @@ -557,7 +556,7 @@ class SubGraphTests(torch._dynamo.test_case.TestCase): cnt = torch._dynamo.testing.CompileCounter() opt_fn = torch.compile(fn, backend=cnt) v3, it3 = opt_fn(v1) - v4, it4 = opt_fn(v1) + v4, _ = opt_fn(v1) self.assertEqual(v2.tolist(), v3.tolist()) self.assertEqual(v2.tolist(), v4.tolist()) self.assertEqual(list(it2), list(it3))