diff --git a/test/ao/sparsity/test_kernels.py b/test/ao/sparsity/test_kernels.py
index 7e4337ba431..1fb8d46adf9 100644
--- a/test/ao/sparsity/test_kernels.py
+++ b/test/ao/sparsity/test_kernels.py
@@ -147,7 +147,6 @@ def _sparse_layer_test_helper(
     W_zp = 0
 
     X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32)
-    float_bias = torch.randn(output_channels, dtype=torch.float32)
 
     # generate a weight which we'll insert into the model
     W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32)
diff --git a/test/ao/sparsity/test_qlinear_packed_params.py b/test/ao/sparsity/test_qlinear_packed_params.py
index eb186d4245f..1c4c58a9366 100644
--- a/test/ao/sparsity/test_qlinear_packed_params.py
+++ b/test/ao/sparsity/test_qlinear_packed_params.py
@@ -30,7 +30,6 @@ class TestQlinearPackedParams(TestCase):
         row_block_size = 1
         col_block_size = 4
         out_features = weight_fp32.shape[0]
-        in_features = weight_fp32.shape[1]
 
         scales = [2.0, 6.0, 12.0]
         zero_points = [
@@ -201,14 +200,11 @@ class TestQlinearPackedParams(TestCase):
         row_block_size = 1
         col_block_size = 4
         out_features = weight_fp32.shape[0]
-        in_features = weight_fp32.shape[1]
 
         scales = [2.0, 3.0, 7.0]
         zero_points = [0 for _ in range(out_features)]
         dtype = torch.qint8
 
-        x = torch.rand(size=(1, weight_fp32.shape[1]))
-
         def make_lin_get_state_weight_bias_and_save():
             weight = torch.quantize_per_tensor(
                 weight_fp32,
diff --git a/test/ao/sparsity/test_sparsifier.py b/test/ao/sparsity/test_sparsifier.py
index a39f97ad3d5..097d4890dc8 100644
--- a/test/ao/sparsity/test_sparsifier.py
+++ b/test/ao/sparsity/test_sparsifier.py
@@ -86,7 +86,7 @@ class TestBaseSparsifier(TestCase):
         sparsifier0.prepare(model0, [{"tensor_fqn": "linear1.weight"}])
         mask = model0.linear1.parametrizations["weight"][0].mask
         mask.data = torch.arange(mask.shape[0] * mask.shape[1]).reshape(mask.shape)
-        for step in range(step_count):
+        for _ in range(step_count):
             sparsifier0.step()
         state_dict = sparsifier0.state_dict()
 
diff --git a/test/ao/sparsity/test_sparsity_utils.py b/test/ao/sparsity/test_sparsity_utils.py
index 0dd7c9722c0..b29be49d571 100644
--- a/test/ao/sparsity/test_sparsity_utils.py
+++ b/test/ao/sparsity/test_sparsity_utils.py
@@ -124,7 +124,7 @@ class TestSparsityUtilFunctions(TestCase):
             list_of_modules = [m for _, m in model.named_modules()] + [model]
             for module in list_of_modules:
                 module_fqn = module_to_fqn(model, module)
-                for tensor_name, tensor in module.named_parameters(recurse=False):
+                for tensor_name, _ in module.named_parameters(recurse=False):
                     tensor_fqn = (
                         module_fqn + ("." if module_fqn != "" else "") + tensor_name
                     )
diff --git a/test/ao/sparsity/test_structured_sparsifier.py b/test/ao/sparsity/test_structured_sparsifier.py
index ff4ffa4a308..00fdbed68af 100644
--- a/test/ao/sparsity/test_structured_sparsifier.py
+++ b/test/ao/sparsity/test_structured_sparsifier.py
@@ -269,7 +269,6 @@ class TestBaseStructuredSparsifier(TestCase):
 
     def _test_step_linear_on_device(self, model, device):
         model = model.to(device)
-        x = torch.ones(7, 7, device=device)
         pruner = SimplePruner(None)
         pruner.prepare(model, None)
         pruner.enable_mask_update = True
@@ -808,7 +807,7 @@ class TestBaseStructuredSparsifier(TestCase):
         pruned_model = fx_pruner.prune()
         pruned_model.eval()
         out_pruned, lstm_out_pruned = pruned_model(lstm_input)
-        r, c = lstm_out_expected.size()
+        _, c = lstm_out_expected.size()
 
         # We cannot check that y_expected == y_pruned as usual because
         # zeros vs. missing elements yield different numerical results.
@@ -891,7 +890,7 @@ class TestBaseStructuredSparsifier(TestCase):
         pruned_model = fx_pruner.prune()
         pruned_model.eval()
         out_pruned, lstm_out_pruned = pruned_model(lstm_input)
-        r, c = lstm_out_expected.size()
+        _, c = lstm_out_expected.size()
 
         # We cannot check that y_expected == y_pruned as usual because
         # zeros vs. missing elements yield different numerical results.
diff --git a/test/autograd/test_functional.py b/test/autograd/test_functional.py
index 6147ee1989d..5c086162bbc 100644
--- a/test/autograd/test_functional.py
+++ b/test/autograd/test_functional.py
@@ -670,7 +670,7 @@ class TestAutogradFunctional(TestCase):
 
         x = ctors.randn(3)
         with warnings.catch_warnings(record=True) as wa:
-            result = api(foo, x, vectorize=True)
+            api(foo, x, vectorize=True)
         self.assertEqual(len(wa), 0)
 
     @base_and_logging_tensor
@@ -762,7 +762,7 @@ class TestAutogradFunctional(TestCase):
 
         inp = ctors.rand(4)
         with self.assertRaisesRegex(RuntimeError, "not supported together"):
-            res = autogradF.jacobian(foo, inp, strict=True, vectorize=True)
+            autogradF.jacobian(foo, inp, strict=True, vectorize=True)
 
     @base_and_logging_tensor
     def test_jacobian_no_grad(self, ctors):
@@ -1122,7 +1122,7 @@ class TestAutogradFunctional(TestCase):
 
         inp = ctors.rand(4)
         with self.assertRaisesRegex(RuntimeError, "not supported together"):
-            res = autogradF.hessian(foo, inp, strict=True, vectorize=True)
+            autogradF.hessian(foo, inp, strict=True, vectorize=True)
 
     @base_and_logging_tensor
     def test_hessian_no_grad(self, ctors):
diff --git a/test/bottleneck_test/test_cuda.py b/test/bottleneck_test/test_cuda.py
index 5a28fe87a17..d9f9b0b8274 100644
--- a/test/bottleneck_test/test_cuda.py
+++ b/test/bottleneck_test/test_cuda.py
@@ -18,7 +18,7 @@ def main():
     data = torch.randn(10, 50).cuda()
     model = Model().cuda()
     optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
-    for i in range(10):
+    for _ in range(10):
         optimizer.zero_grad()
         loss = model(data)
         loss.backward()
diff --git a/test/custom_operator/test_custom_ops.py b/test/custom_operator/test_custom_ops.py
index 83bb0572266..38c7349f139 100644
--- a/test/custom_operator/test_custom_ops.py
+++ b/test/custom_operator/test_custom_ops.py
@@ -78,9 +78,9 @@ def forward(self, arg0_1):
         x = torch.randn(3, device="meta")
         self.assertNotIn("my_custom_ops2", sys.modules.keys())
         with self.assertRaisesRegex(NotImplementedError, r"'my_custom_ops2'"):
-            y = torch.ops.custom.sin.default(x)
+            torch.ops.custom.sin.default(x)
         torch.ops.import_module("my_custom_ops2")
-        y = torch.ops.custom.sin.default(x)
+        torch.ops.custom.sin.default(x)
 
     def test_calling_custom_op_string(self):
         output = ops.custom.op2("abc", "def")
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py b/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py
index 319e72baafa..4029bdd1af6 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py
@@ -35,7 +35,7 @@ class _TestClipGradNormBase(FSDPTest):
         vector_norm_fn = functools.partial(torch.linalg.vector_norm, ord=norm_type)
         dp_mesh = dp_mesh or init_device_mesh("cuda", (self.world_size,))
         torch.manual_seed(42 + dp_mesh.get_local_rank() + 1)
-        for iter_idx in range(10):
+        for _ in range(10):
             ref_optim.zero_grad()
             ref_model(inp).sum().backward()
             optim.zero_grad()
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_comm.py b/test/distributed/_composable/fsdp/test_fully_shard_comm.py
index 99be766cb43..4b5397c02d2 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_comm.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_comm.py
@@ -250,8 +250,8 @@ class TestFullyShardCollectiveOps(FSDPTestMultiThread):
         self.assertEqual(group.size(), self.world_size)
         all_reduce_stream = torch.cuda.Stream()
         (
-            reduce_scatter_input,
-            reduce_scatter_event,
+            _,
+            _,
             post_reduce_event,
             _,
             _,
@@ -406,7 +406,7 @@ class TestFullyShardCommunication(FSDPTest):
         torch.manual_seed(42 + self.rank)
         inp = torch.randint(0, model_args.vocab_size, (2, 16), device="cuda")
 
-        for iter_idx in range(10):
+        for _ in range(10):
             ref_loss = ref_model(inp).sum()
             ref_loss.backward()
             for param in ref_model.parameters():
@@ -501,7 +501,7 @@ class TestFullyShardPrefetch(FSDPTest):
         self, reshard_after_forward: Union[bool, int], checkpoint_impl: Optional[str]
     ):
         n_layers = 3
-        model, optim, inp = self._init_transformer(
+        model, _, inp = self._init_transformer(
             n_layers, reshard_after_forward, checkpoint_impl
         )
         events: List[EventType] = []
@@ -843,7 +843,7 @@ class TestFullyShardPrefetch(FSDPTest):
         with patch_unshard(unshard_with_record), patch_post_backward(
             post_backward_with_record
         ):
-            for iter_idx in range(3):
+            for _ in range(3):
                 loss = model(inp)
                 expected_events = [
                     (
@@ -922,7 +922,7 @@ class TestFullyShardPrefetch(FSDPTest):
         with patch_unshard(unshard_with_record), patch_post_backward(
             post_backward_with_record
         ):
-            for iter_idx in range(3):
+            for _ in range(3):
                 loss = model(inp)
                 expected_events = [
                     ("unshard", "", TrainingState.FORWARD),
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_compile.py b/test/distributed/_composable/fsdp/test_fully_shard_compile.py
index 2780bd75a38..381a696a70a 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_compile.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_compile.py
@@ -662,7 +662,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
             def __init__(self, n_layers):
                 super().__init__()
                 self.layers = torch.nn.ModuleList()
-                for layer_id in range(n_layers):
+                for _ in range(n_layers):
                     self.layers.append(TestSubmodule(hidden_dim))
 
             def forward(self, x):
@@ -684,7 +684,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
             fsdp_config = {}
             mesh = init_device_mesh("cuda", (self.world_size,))
             model = TestModule(n_layers=3)
-            for layer_id, mod in enumerate(model.layers):
+            for mod in model.layers:
                 fully_shard(mod, mesh=mesh, reshard_after_forward=True, **fsdp_config)
             model = fully_shard(
                 model, mesh=mesh, reshard_after_forward=True, **fsdp_config
@@ -871,7 +871,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
                         else:
                             v.requires_grad_(False)
                 assert requires_grad_param_count == n_layers * len(requires_grad_params)
-            for layer_id, mod in enumerate(model.layers):
+            for _, mod in enumerate(model.layers):
                 fully_shard(mod, mesh=mesh, reshard_after_forward=True, **fsdp_config)
             model = fully_shard(
                 model, mesh=mesh, reshard_after_forward=True, **fsdp_config
@@ -1087,7 +1087,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
                 setattr(m.encoder, name, new_child)
         m = FSDP(m, sharding_strategy=ShardingStrategy.FULL_SHARD, use_orig_params=True)
         inp = torch.randn(32, 784, device="cuda")
-        out = m(inp)
+        m(inp)
 
 
 if __name__ == "__main__":
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_extensions.py b/test/distributed/_composable/fsdp/test_fully_shard_extensions.py
index 522466a7881..7fc1679468b 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_extensions.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_extensions.py
@@ -241,7 +241,7 @@ class TestFullyShardAllGatherExtensionsMultiProcess(
                 losses.append(_model(inp).sum())
                 losses[-1].backward()
                 if _model is ref_model:
-                    for param_name, param in _model.named_parameters():
+                    for _, param in _model.named_parameters():
                         dist.all_reduce(param.grad)
                         param.grad.detach().div_(self.world_size)
             self.assertEqual(losses[0], losses[1])
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_init.py b/test/distributed/_composable/fsdp/test_fully_shard_init.py
index 83bf6a245a0..49bd3848a85 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_init.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_init.py
@@ -904,7 +904,7 @@ class TestFullyShardProcessGroupInit(FSDPTestMultiThread):
         )
         self.assertEqual(mesh.mesh, ref_mesh.mesh)
         self.assertEqual(mesh._coordinate_on_dim, ref_mesh._coordinate_on_dim)
-        for (tag, ranks, group_name), (ref_tag, ref_ranks, ref_group_name) in zip(
+        for (_, ranks, _), (_, ref_ranks, _) in zip(
             mesh._dim_group_infos, ref_mesh._dim_group_infos
         ):
             # Since we manually constructed new subgroups, the test and ref
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_logging.py b/test/distributed/_composable/fsdp/test_fully_shard_logging.py
index ba77ce82218..94e57b2fc36 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_logging.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_logging.py
@@ -26,7 +26,7 @@ class LoggingTests(LoggingTestCase):
         env["WORLD_SIZE"] = "1"
         env["MASTER_PORT"] = "34715"
         env["MASTER_ADDR"] = "localhost"
-        stdout, stderr = self.run_process_no_exception(
+        _, stderr = self.run_process_no_exception(
             """\
 import logging
 import torch
diff --git a/test/distributed/_composable/fsdp/test_fully_shard_training.py b/test/distributed/_composable/fsdp/test_fully_shard_training.py
index 550c0633e3f..575a7d6059c 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_training.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_training.py
@@ -590,7 +590,7 @@ class TestFullyShard1DTrainingCore(FSDPTest):
 
         torch.manual_seed(42 + self.rank)
         inp = torch.randint(0, model_args.vocab_size, (2, 8), device="cuda")
-        for iter_idx in range(10):
+        for _ in range(10):
             losses: List[torch.Tensor] = []
             for _model, _optim in ((ref_model, ref_optim), (model, optim)):
                 _optim.zero_grad()
@@ -624,12 +624,12 @@ class TestFullyShard1DTrainingCore(FSDPTest):
         # sync point after each iteration
         ref_losses: List[torch.Tensor] = []
         losses: List[torch.Tensor] = []
-        for iter_idx in range(10):
+        for _ in range(10):
             ref_optim.zero_grad()
             ref_losses.append(ref_model(inp).sum())
             ref_losses[-1].backward()
             ref_optim.step()
-        for iter_idx in range(10):
+        for _ in range(10):
             optim.zero_grad()
             losses.append(model(inp).sum())
             losses[-1].backward()
@@ -1185,7 +1185,7 @@ class TestFullyShardNDTraining(FSDPTest):
         foreach: bool,
     ):
         global_mesh = self.init_global_mesh()
-        pp_mesh, dp_mesh, tp_mesh = (
+        _, dp_mesh, tp_mesh = (
             global_mesh["pp"],
             global_mesh["dp"],
             global_mesh["tp"],
@@ -1217,7 +1217,7 @@ class TestFullyShardNDTraining(FSDPTest):
                 _optim.step()
             self.assertEqual(losses[0], losses[1])
 
-        for n, p in model.named_parameters():
+        for _, p in model.named_parameters():
             self.assertIsInstance(p, DTensor)
             self.assertEqual(p.device_mesh.ndim, 2)
             self.assertEqual(len(p.placements), 2)
@@ -1288,7 +1288,7 @@ class TestFullyShardHSDP3DTraining(FSDPTest):
                 _optim.step()
             self.assertEqual(losses[0], losses[1])
 
-        for n, p in model.named_parameters():
+        for _, p in model.named_parameters():
             self.assertIsInstance(p, DTensor)
             self.assertEqual(p.device_mesh.ndim, 3)
             self.assertEqual(len(p.placements), 3)
diff --git a/test/distributed/_composable/test_checkpoint.py b/test/distributed/_composable/test_checkpoint.py
index dd04011d0ff..566694931d8 100644
--- a/test/distributed/_composable/test_checkpoint.py
+++ b/test/distributed/_composable/test_checkpoint.py
@@ -119,7 +119,6 @@ class TestCheckpoint(TestCase):
         # no checkpoint
         with MemoryDelta(x.device) as mem1:
             loss1 = net1(x1).sum()
-        graph_size1 = self._get_graph_size(loss1)
         loss1.backward()
 
         # with checkpoint
diff --git a/test/distributed/_composable/test_composability/test_2d_composability.py b/test/distributed/_composable/test_composability/test_2d_composability.py
index 57f7f32f164..c596139c364 100644
--- a/test/distributed/_composable/test_composability/test_2d_composability.py
+++ b/test/distributed/_composable/test_composability/test_2d_composability.py
@@ -244,7 +244,6 @@ class TestFullyShard2DTraining(FSDPTest):
                 ref_model.parameters(), model.named_parameters()
             ):
                 full_grad = param.grad.full_tensor()
-                ref_grad = ref_param.grad
                 self.assertEqual(ref_param.grad, full_grad)
 
             ref_optim.step()
@@ -285,7 +284,7 @@ class TestFullyShard2DTraining(FSDPTest):
         # called, but they will just be no-ops without issuing any kernels.
         # We prefer to keep the no-op check at the c10d level, not in FSDP.
         inp = torch.randn((4, mlp_dim), device="cuda")  # same on all ranks
-        for iter_idx in range(10):
+        for _ in range(10):
             ref_optim.zero_grad()
             optim.zero_grad()
 
@@ -583,9 +582,7 @@ class TestNew2dParallelTraining(DTensorTestBase):
                 "net1": ColwiseParallel(),
                 "net2": RowwiseParallel(),
             }
-            model_2d = parallelize_module(
-                SimpleModel().cuda(), mesh_2d["tp"], parallelize_plan
-            )
+            parallelize_module(SimpleModel().cuda(), mesh_2d["tp"], parallelize_plan)
 
     @with_comms
     @skip_if_lt_x_gpu(4)
@@ -833,7 +830,6 @@ class TestNew2dParallelStateDict(DTensorTestBase):
         # Create a model without wrapper
         torch.manual_seed(0)
         no_wrap_model = simple_model().cuda(self.rank)
-        no_wrap_state_dict = no_wrap_model.state_dict()
         no_wrap_optim = torch.optim.Adam(no_wrap_model.parameters(), lr=0.01)
         no_wrap_model(no_wrap_model.get_input().cuda(self.rank)).sum().backward()
         no_wrap_optim.step()
@@ -890,8 +886,6 @@ class TestNew2dParallelStateDict(DTensorTestBase):
         set_optimizer_state_dict(
             model_2d, optimizers=optim_2d, optim_state_dict=ref_optim_2d_osd
         )
-        new_optim_2d_osd = get_optimizer_state_dict(model_2d, optimizers=optim_2d)
-
         ref_optim_2d_osd_states = ref_optim_2d_osd["state"]
         new_optim_2d_osd_states = optim_2d_osd["state"]
 
diff --git a/test/distributed/_composable/test_composability/test_pp_composability.py b/test/distributed/_composable/test_composability/test_pp_composability.py
index aa9265cf94c..c82b08bb21f 100644
--- a/test/distributed/_composable/test_composability/test_pp_composability.py
+++ b/test/distributed/_composable/test_composability/test_pp_composability.py
@@ -119,7 +119,7 @@ class ComposabilityTest(MultiProcessTestCase):
     )
     @parametrize("use_new_runtime", [False, True])
     def test_manual_with_data_parallel(self, dp_type, ScheduleClass, use_new_runtime):
-        device = torch.device("cuda", self.device)
+        _device_raii = torch.device("cuda", self.device)
         torch.cuda.set_device(self.device)
         store = torch.distributed.FileStore(self.file_name, self.world_size)
         torch.distributed.init_process_group(
@@ -398,7 +398,7 @@ class ComposabilityTest(MultiProcessTestCase):
         ],
     )
     def test_3d_with_tp_dp_pp(self, ScheduleClass, MixedPrecisionParam):
-        device = torch.device("cuda", self.device)
+        _device_raii = torch.device("cuda", self.device)
         torch.cuda.set_device(self.device)
         store = torch.distributed.FileStore(self.file_name, self.world_size)
         torch.distributed.init_process_group(
diff --git a/test/distributed/_composable/test_replicate_with_compiler.py b/test/distributed/_composable/test_replicate_with_compiler.py
index 8690bef6cc2..91c3ecb4798 100644
--- a/test/distributed/_composable/test_replicate_with_compiler.py
+++ b/test/distributed/_composable/test_replicate_with_compiler.py
@@ -329,11 +329,11 @@ class ReplicateTest(MultiProcessInductorTestCase):
         code = self._test_bucketing()
         self.assertEqual(counters["inductor"]["ddp_buckets"], 3)
         fc = FileCheck()
-        for i in range(3):
+        for _ in range(3):
             fc.check("cpp_fused_").check(
                 "torch.ops._c10d_functional.all_reduce_coalesced_.default("
             )
-        for i in range(3):
+        for _ in range(3):
             fc.check("torch.ops._c10d_functional.wait_tensor.default")
 
         fc.run(code)
@@ -342,11 +342,11 @@ class ReplicateTest(MultiProcessInductorTestCase):
         code = self._test_bucketing(init_process_group=False, loop=2)
         self.assertEqual(counters["inductor"]["ddp_buckets"], 3)
         fc = FileCheck()
-        for i in range(3):
+        for _ in range(3):
             fc.check("cpp_fused_").check(
                 "torch.ops._c10d_functional.all_reduce_coalesced_.default("
             )
-        for i in range(3):
+        for _ in range(3):
             fc.check("torch.ops._c10d_functional.wait_tensor.default")
 
         fc.run(code)
@@ -371,11 +371,11 @@ class ReplicateTest(MultiProcessInductorTestCase):
         code = self._test_bucketing()
         self.assertEqual(counters["inductor"]["ddp_buckets"], 3)
         fc = FileCheck()
-        for i in range(3):
+        for _ in range(3):
             fc.check("aten.flatten.using_ints(").check("cpp_fused_").check(
                 "torch.ops._c10d_functional.all_reduce_.default("
             )
-        for i in range(3):
+        for _ in range(3):
             fc.check("torch.ops._c10d_functional.wait_tensor.default")
         fc.run(code)
 
@@ -383,11 +383,11 @@ class ReplicateTest(MultiProcessInductorTestCase):
         code = self._test_bucketing(init_process_group=False, loop=2)
         self.assertEqual(counters["inductor"]["ddp_buckets"], 3)
         fc = FileCheck()
-        for i in range(3):
+        for _ in range(3):
             fc.check("aten.flatten.using_ints(").check("cpp_fused_").check(
                 "torch.ops._c10d_functional.all_reduce_.default("
             )
-        for i in range(3):
+        for _ in range(3):
             fc.check("torch.ops._c10d_functional.wait_tensor.default")
         fc.run(code)
 
diff --git a/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py b/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py
index bacb36e47f9..094bc0f53d9 100644
--- a/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py
+++ b/test/distributed/_shard/sharded_tensor/ops/test_binary_cmp.py
@@ -129,7 +129,7 @@ class TestShardedTensorBinaryOps(ShardedTensorTestBase):
     def test_torch_equal(self):
         """Test torch.equal(ShardedTensor, ShardedTensor)"""
 
-        spec, alt_spec = self.get_gpu_specs()
+        spec, _ = self.get_gpu_specs()
         st1, st2 = self.get_random_tensors(spec, spec, 10, 10)
         self.assertTrue(torch.equal(st1, st2))
 
@@ -145,7 +145,7 @@ class TestShardedTensorBinaryOps(ShardedTensorTestBase):
     def test_torch_allclose(self):
         """Test torch.allclose(ShardedTensor, ShardedTensor)"""
 
-        spec, alt_spec = self.get_gpu_specs()
+        spec, _ = self.get_gpu_specs()
 
         st1, st2 = self.get_random_tensors(spec, spec, 10, 10)
         self.assertTrue(torch.allclose(st1, st2))
diff --git a/test/distributed/_shard/sharded_tensor/ops/test_init.py b/test/distributed/_shard/sharded_tensor/ops/test_init.py
index bf4cbd924c8..c33136f33ee 100644
--- a/test/distributed/_shard/sharded_tensor/ops/test_init.py
+++ b/test/distributed/_shard/sharded_tensor/ops/test_init.py
@@ -40,8 +40,6 @@ class TestShardedTensorNNInit(ShardedTensorTestBase):
             ],
         )
         h, w = 8, 2
-        expected_h = 2
-        expected_device = torch.device(f"cuda:{self.rank}")
         a, b = 10, 20
 
         seed = 1234
@@ -75,8 +73,6 @@ class TestShardedTensorNNInit(ShardedTensorTestBase):
             ],
         )
         h, w = 8, 2
-        expected_h = 2
-        expected_device = torch.device(f"cuda:{self.rank}")
         mean, std = 10, 5
 
         seed = 1234
@@ -110,8 +106,6 @@ class TestShardedTensorNNInit(ShardedTensorTestBase):
             ],
         )
         h, w = 8, 2
-        expected_h = 2
-        expected_device = torch.device(f"cuda:{self.rank}")
         a, mode, nonlinearity = 0, "fan_in", "leaky_relu"
 
         seed = 1234
diff --git a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
index 730b2c2c0ac..fb8b9778073 100644
--- a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
+++ b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
@@ -456,7 +456,7 @@ class TestLocalTensor(ShardedTensorTestBase):
         with self.assertRaisesRegex(
             NotImplementedError, "Only single local shard is supported."
         ):
-            local_shard = st.local_tensor()
+            st.local_tensor()
 
 
 class TestShardedTensorChunked(ShardedTensorTestBase):
@@ -981,7 +981,6 @@ class TestShardedTensorChunked(ShardedTensorTestBase):
         # Validate remote shards.
         remote_shards = st.remote_shards()
         self.assertEqual(3, len(remote_shards))
-        owners = {}
         for rpc_rank, shards in remote_shards.items():
             self.assertEqual(2, len(shards))
             for remote_shard in shards:
@@ -1364,14 +1363,14 @@ class TestShardedTensorChunked(ShardedTensorTestBase):
             with self.assertRaisesRegex(RuntimeError, "Local rank at save time was"):
                 with load_with_process_group(pg):
                     # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load
-                    state_dict_deser = torch.load(buffer, weights_only=False)
+                    torch.load(buffer, weights_only=False)
         else:
             with self.assertRaisesRegex(
                 RuntimeError, "Local world size at save time was"
             ):
                 with load_with_process_group(pg):
                     # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load
-                    state_dict_deser = torch.load(buffer, weights_only=False)
+                    torch.load(buffer, weights_only=False)
 
         dist.destroy_process_group()
         buffer.seek(0)
@@ -1379,7 +1378,7 @@ class TestShardedTensorChunked(ShardedTensorTestBase):
             RuntimeError, "Need to initialize default process group"
         ):
             # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load
-            state_dict_deser = torch.load(buffer, weights_only=False)
+            torch.load(buffer, weights_only=False)
         rpc.shutdown()
 
     @with_comms
@@ -1396,8 +1395,8 @@ class TestShardedTensorChunked(ShardedTensorTestBase):
                     "rank:3/cuda:3",
                 ],
             )
-            st1 = sharded_tensor.empty(spec, 10, 20, init_rrefs=True)
-            st2 = sharded_tensor.empty(spec, 10, 20)
+            sharded_tensor.empty(spec, 10, 20, init_rrefs=True)
+            sharded_tensor.empty(spec, 10, 20)
 
         create_tensors()
         self.assertEqual(0, len(sharded_tensor.api._sharded_tensor_map))
@@ -2204,7 +2203,6 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
         else:
             self.assertEqual(2, len(remote_shards))
 
-        owners = {}
         for rpc_rank, shards in remote_shards.items():
             self.assertEqual(2, len(shards))
             for remote_shard in shards:
@@ -2418,10 +2416,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
             placement=f"rank:{self.rank}/cuda:{self.rank}",
         )
         with self.assertRaisesRegex(ValueError, "Shard tensor size does not match"):
-            local_shard_from_wrong_meta = sharded_tensor.Shard(
-                local_tensor,
-                metadata=wrong_local_shard_metadata,
-            )
+            sharded_tensor.Shard(local_tensor, metadata=wrong_local_shard_metadata)
 
     @with_comms
     @skip_if_lt_x_gpu(4)
@@ -2696,7 +2691,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
 
         empty_local_shards = []
         with self.assertRaisesRegex(ValueError, "have no local shards on all ranks"):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 empty_local_shards, [10, 10], init_rrefs=True
             )
 
@@ -2706,7 +2701,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
         with self.assertRaisesRegex(
             ValueError, "Only torch.strided layout is currently supported"
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_layout_shards, [10, 10], init_rrefs=True
             )
 
@@ -2719,23 +2714,19 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
             ValueError,
             "Only torch.contiguous_format memory_format is currently supported",
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_memory_format_shards, [10, 10], init_rrefs=True
             )
 
         with self.assertRaisesRegex(ValueError, "Shard tensor size does not match"):
-            wrong_size_shards = [
-                sharded_tensor.Shard(
-                    torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata
-                )
-            ]
+            sharded_tensor.Shard(
+                torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata
+            )
 
         with self.assertRaisesRegex(
             ValueError, "Local shard tensor device does not match"
         ):
-            wrong_device_shards = [
-                sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata)
-            ]
+            sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata)
 
     @with_comms
     @skip_if_lt_x_gpu(4)
@@ -2756,7 +2747,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
             ValueError,
             "ShardedTensor global_size property does not match from different ranks!",
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_dtype_shards, tensor_overall_size, init_rrefs=True
             )
 
@@ -2771,7 +2762,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
             ValueError,
             "ShardedTensor dtype property does not match from different ranks!",
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_dtype_shards, [10, 10], init_rrefs=True
             )
 
@@ -2788,7 +2779,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
             ValueError,
             "ShardedTensor requires_grad property does not match from different ranks!",
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_requires_grad_shards, [10, 10], init_rrefs=True
             )
 
@@ -2818,7 +2809,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
         with self.assertRaisesRegex(
             ValueError, "Local shards' tensor pin_memory property need to be the same"
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_pin_memory_local_shards, [10, 10], init_rrefs=True
             )
 
@@ -2832,7 +2823,7 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
             ValueError,
             "ShardedTensor pin_memory property does not match from different ranks!",
         ):
-            st = sharded_tensor.init_from_local_shards(
+            sharded_tensor.init_from_local_shards(
                 wrong_pin_memory_shards_cross_ranks, [10, 10], init_rrefs=True
             )
 
@@ -2945,19 +2936,15 @@ class TestShardedTensorFromLocalShards(ShardedTensorTestBase):
         with self.assertRaisesRegex(
             ValueError, "Shard tensor size does not match with metadata.shard_lengths"
         ):
-            wrong_size_shards = [
-                sharded_tensor.Shard(
-                    torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata
-                )
-            ]
+            sharded_tensor.Shard(
+                torch.randn(2, 3, device=f"cuda:{self.rank}"), local_shard_metadata
+            )
 
         with self.assertRaisesRegex(
             ValueError,
             "Local shard tensor device does not match with local Shard's placement",
         ):
-            wrong_device_shards = [
-                sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata)
-            ]
+            sharded_tensor.Shard(torch.randn(5, 5), local_shard_metadata)
 
         wrong_dtype_shards = [
             sharded_tensor.Shard(
diff --git a/test/distributed/_shard/sharding_plan/test_sharding_plan.py b/test/distributed/_shard/sharding_plan/test_sharding_plan.py
index 4625bebf41d..7310c43bb4a 100644
--- a/test/distributed/_shard/sharding_plan/test_sharding_plan.py
+++ b/test/distributed/_shard/sharding_plan/test_sharding_plan.py
@@ -42,7 +42,7 @@ class ChunkAllShardingPlanner(ShardingPlanner):
     def build_plan(self, module: nn.Module) -> ShardingPlan:
         named_params = module.named_parameters()
         plan = {}
-        for name, param in named_params:
+        for name, _ in named_params:
             plan[name] = ChunkShardingSpec(self.dim, placements=self.devices)
 
         return ShardingPlan(plan=plan)
diff --git a/test/distributed/_tensor/debug/test_comm_mode.py b/test/distributed/_tensor/debug/test_comm_mode.py
index 3428bca2c83..fb194f46197 100644
--- a/test/distributed/_tensor/debug/test_comm_mode.py
+++ b/test/distributed/_tensor/debug/test_comm_mode.py
@@ -92,7 +92,6 @@ class TestCommMode(TestCase):
         self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 1)
 
     def test_comm_mode_with_dtensor(self):
-        world_pg = self.world_pg
         mesh = DeviceMesh(self.device_type, list(range(self.world_size)))
 
         def f(x, y):
@@ -118,8 +117,6 @@ class TestCommMode(TestCase):
         if not torch.cuda.is_available():
             return
 
-        world_pg = self.world_pg
-
         inp = torch.rand(2, 8, 16).cuda()
         all_gather_out = inp.new_empty(self.world_size * 2, 8, 16)
 
@@ -202,7 +199,7 @@ class TestCommMode(TestCase):
         self.checksAssert(comm_mode, c10d_ops.reduce_scatter_, 1, 1)
 
         # tests c10d reduce_scatter_tensor_coalesced
-        with comm_mode as A, dist._coalescing_manager() as B:
+        with comm_mode, dist._coalescing_manager():
             dist.reduce_scatter_tensor(all_gather_out, inp)
 
         self.checksAssert(comm_mode, c10d_ops.reduce_scatter_tensor_coalesced_, 1, 1)
diff --git a/test/distributed/_tensor/debug/test_comm_mode_features.py b/test/distributed/_tensor/debug/test_comm_mode_features.py
index fc19cddb58f..aa54491f4ce 100644
--- a/test/distributed/_tensor/debug/test_comm_mode_features.py
+++ b/test/distributed/_tensor/debug/test_comm_mode_features.py
@@ -251,7 +251,7 @@ class TestCommModeFeatures(DTensorTestBase):
                 comm_mode.comm_module_counts,
                 {"Global": {"forward": {}, "backward": {}}},
             )
-            output_tp = model(inp)
+            model(inp)
 
         model_args = ModelArgs(dropout_p=0.0)
         model2 = Transformer(model_args).to(device=self.device_type)
@@ -264,7 +264,7 @@ class TestCommModeFeatures(DTensorTestBase):
 
         comm_mode = CommDebugMode()
         with comm_mode:
-            output = model2(inp)
+            model2(inp)
 
         # checks to see if all collectives were correctly traced at the module-level
         self.assertEqual(
diff --git a/test/distributed/_tensor/test_dtensor.py b/test/distributed/_tensor/test_dtensor.py
index bf2613f4e67..0e62bbf2ee8 100644
--- a/test/distributed/_tensor/test_dtensor.py
+++ b/test/distributed/_tensor/test_dtensor.py
@@ -155,14 +155,12 @@ class DTensorTest(DTensorTestBase):
         device_mesh = DeviceMesh(self.device_type, list(range(self.world_size)))
         shard0_spec = [Shard(0)]
         local_tensor = torch.randn(4, 8)
-        global_shape = torch.Size([self.world_size * 4, 8])
         dist_tensor = DTensor.from_local(local_tensor, device_mesh, shard0_spec)
         # won't affect stride
         self.assertEqual(dist_tensor.stride(), (8, 1))
 
         shard1_spec = [Shard(1)]
         local_tensor = torch.randn(8, 4)
-        global_shape = torch.Size([8, self.world_size * 4])
         dist_tensor = DTensor.from_local(local_tensor, device_mesh, shard1_spec)
         # will affect stride after DT initialized
         self.assertEqual(dist_tensor.stride(), (4 * self.world_size, 1))
@@ -170,7 +168,6 @@ class DTensorTest(DTensorTestBase):
         # if initialized from a transposed mat
         local_tensor = torch.randn(8, 4, 8)
         local_tensor_t = local_tensor.permute(1, 2, 0)
-        global_shape = torch.Size([4, self.world_size * 8, 8])
         self.assertEqual(local_tensor_t.stride(), (8, 1, 32))
         dist_tensor = DTensor.from_local(local_tensor_t, device_mesh, shard1_spec)
         global_stride = (8 * self.world_size, 1, 32 * self.world_size)
@@ -257,7 +254,7 @@ class DTensorTest(DTensorTestBase):
         with self.assertRaisesRegex(
             RuntimeError, "Please pass both shape and stride at the same time."
         ):
-            dtensor = DTensor.from_local(
+            DTensor.from_local(
                 tensor_list[self.rank],
                 device_mesh,
                 (Shard(0),),
@@ -267,7 +264,7 @@ class DTensorTest(DTensorTestBase):
         with self.assertRaisesRegex(
             RuntimeError, "Please pass both shape and stride at the same time."
         ):
-            dtensor = DTensor.from_local(
+            DTensor.from_local(
                 tensor_list[self.rank],
                 device_mesh,
                 (Shard(0),),
@@ -1043,7 +1040,7 @@ class DTensorLogTest(LoggingTestCase):
         env["MASTER_PORT"] = "12345"
         env["MASTER_ADDR"] = "localhost"
 
-        stdout, stderr = self.run_process_no_exception(
+        _, stderr = self.run_process_no_exception(
             """\
 import logging
 import torch
diff --git a/test/distributed/_tensor/test_dtensor_compile.py b/test/distributed/_tensor/test_dtensor_compile.py
index 17939ba4785..356f5ff2c42 100644
--- a/test/distributed/_tensor/test_dtensor_compile.py
+++ b/test/distributed/_tensor/test_dtensor_compile.py
@@ -234,8 +234,8 @@ class TestDTensorCompile(torch._dynamo.test_case.TestCase):
                 requires_grad=x.requires_grad,
             )
 
-        out = fn(x)
-        out2 = torch.compile(fn, backend="eager")(x)
+        fn(x)
+        torch.compile(fn, backend="eager")(x)
 
     def test_dtensor_constructor_w_dynamo_disable(self):
         mesh = DeviceMesh(self.device_type, torch.arange(self.world_size))
@@ -599,7 +599,7 @@ class TestDTensorCompile(torch._dynamo.test_case.TestCase):
 
         @torch.compile(backend=cnt)
         def fn(x):
-            dt = DTensor.from_local(x, mesh, [placement], run_check=False)
+            DTensor.from_local(x, mesh, [placement], run_check=False)
 
         x = torch.ones(4, 4, requires_grad=True)
 
@@ -659,7 +659,7 @@ class TestDTensorCompile(torch._dynamo.test_case.TestCase):
         x2 = x_dt.redistribute(mesh, [Replicate()], async_op=True)
         x2 = x2.to_local()
         self.assertTrue(isinstance(x2, AsyncCollectiveTensor))
-        out = opt_fn(x2)
+        opt_fn(x2)
         # The important part: we get a wait_tensor() in the graph.
         # At runtime, the input to the graph is an AsyncCollectiveTensor,
         # and inside the graph we need to issue a wait() to synchronize.
@@ -880,8 +880,6 @@ class TestDTensorCompileE2E(DTensorTestBase):
             mesh_dim_names=["dp", "tp"],
         )
 
-        fsdp_pg = twod_mesh.get_group(mesh_dim=0)
-
         inp = torch.rand(20, 10, device=self.device_type)
         parallelize_plan = {
             "mlp_0.net1": ColwiseParallel(),
diff --git a/test/distributed/_tensor/test_random_ops.py b/test/distributed/_tensor/test_random_ops.py
index d5aa7efb8fa..7b918b76993 100644
--- a/test/distributed/_tensor/test_random_ops.py
+++ b/test/distributed/_tensor/test_random_ops.py
@@ -249,7 +249,7 @@ class DistTensorRandomOpTest(DTensorTestBase):
 
         device_mesh = DeviceMesh(self.device_type, torch.arange(self.world_size))
         # seed synchronization happens after the first `distribute_tensor` call
-        dtensor = distribute_tensor(
+        distribute_tensor(
             torch.empty([self.world_size], device="cuda"), device_mesh, [Shard(0)]
         )
         self.assertEqual(seed_from_rank_0, random._rng_tracker.get_seed("parallel-rng"))
diff --git a/test/distributed/_tensor/test_redistribute.py b/test/distributed/_tensor/test_redistribute.py
index 7b7531692fa..34ae3514ae1 100644
--- a/test/distributed/_tensor/test_redistribute.py
+++ b/test/distributed/_tensor/test_redistribute.py
@@ -309,7 +309,7 @@ class RedistributeTest(DTensorTestBase):
         shard_tensor = distribute_tensor(local_tensor, device_mesh, shard_spec)
         self.assertEqual(shard_tensor.placements[0].dim, 1)
         reshard_tensor = shard_tensor.redistribute(device_mesh, shard_minus_spec)
-        self.assertEqual(shard_tensor.placements[0].dim, 1)
+        self.assertEqual(reshard_tensor.placements[0].dim, 1)
 
     @with_comms
     def test_redistribute_uneven_sharding(self):
diff --git a/test/distributed/_tensor/test_tensor_ops.py b/test/distributed/_tensor/test_tensor_ops.py
index f9153c126bc..b389c682456 100644
--- a/test/distributed/_tensor/test_tensor_ops.py
+++ b/test/distributed/_tensor/test_tensor_ops.py
@@ -622,7 +622,7 @@ class DistTensorOpsTest(DTensorTestBase):
         self.assertEqual(misses, 2)
 
         # convert to fp32 again and see if there's cache hit
-        fp32_sharded_dtensor1 = bf16_sharded_dtensor1.float()
+        bf16_sharded_dtensor1.float()
         hits, misses, _, _ = _get_sharding_prop_cache_info()
         # by now we should have cache hit
         self.assertEqual(hits, 1)
diff --git a/test/distributed/_tensor/test_utils.py b/test/distributed/_tensor/test_utils.py
index f9ebf57d1dc..a9798f9d434 100644
--- a/test/distributed/_tensor/test_utils.py
+++ b/test/distributed/_tensor/test_utils.py
@@ -133,7 +133,6 @@ class UtilTest(DTensorTestBase):
             global_tensor_shape, global_mesh, placements
         )
         assert global_mesh.get_coordinate is not None
-        dp_replic_rank = global_mesh.get_local_rank("dp_replic")
         dp_shard_rank = global_mesh.get_local_rank("dp_shard")
         tp_rank = global_mesh.get_local_rank("tp")
         shard_idx_on_dim_0 = tp_rank * dp_shard_size + dp_shard_rank
diff --git a/test/distributed/_tensor/test_xla_integration.py b/test/distributed/_tensor/test_xla_integration.py
index 970b1a195df..2c11dd16ab9 100644
--- a/test/distributed/_tensor/test_xla_integration.py
+++ b/test/distributed/_tensor/test_xla_integration.py
@@ -150,7 +150,7 @@ class DTensorXLAIntegrationTest(TestCase):
             shard_spec = [Shard(0)]
             # annoate fc1 and fc2
             if isinstance(mod, nn.Linear):
-                for name, param in mod.named_parameters():
+                for _, param in mod.named_parameters():
                     # annotate the parameter tensors directly
                     distribute_tensor(param, mesh, shard_spec)
 
diff --git a/test/distributed/algorithms/quantization/test_quantization.py b/test/distributed/algorithms/quantization/test_quantization.py
index 94a1c763474..6713d4cc3f5 100644
--- a/test/distributed/algorithms/quantization/test_quantization.py
+++ b/test/distributed/algorithms/quantization/test_quantization.py
@@ -1,4 +1,5 @@
 # Owner(s): ["oncall: distributed"]
+# ruff: noqa: F841
 
 import os
 import sys
diff --git a/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py b/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py
index 4b0f3d6e045..a9a67f8b304 100644
--- a/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py
+++ b/test/distributed/checkpoint/e2e/test_e2e_save_and_load.py
@@ -277,7 +277,7 @@ class TestE2ESaveAndLoad(DTensorTestBase, VerifyStateDictMixin):
         self.assertEqual(loss, dist_loss)
 
         dist_msd, dist_osd = get_state_dict(dist_model, optimizers=dist_optim)
-        model_sd, optim_sd = get_state_dict(model, optimizers=optim)
+        model_sd, _ = get_state_dict(model, optimizers=optim)
 
         self._verify_msd(model_sd, dist_msd)
         self._verify_osd_by_load(model, optim, self._optim(model), dist_osd)
diff --git a/test/distributed/checkpoint/e2e/test_fine_tuning.py b/test/distributed/checkpoint/e2e/test_fine_tuning.py
index b91b48e6f4c..799d304ab54 100644
--- a/test/distributed/checkpoint/e2e/test_fine_tuning.py
+++ b/test/distributed/checkpoint/e2e/test_fine_tuning.py
@@ -96,7 +96,7 @@ class TestFineTuning(DTensorTestBase):
         optim = torch.optim.Adam(model.parameters(), lr=1e-3)
 
         # Training
-        for i in range(3):
+        for _ in range(3):
             batch = torch.rand(32, DIM, device="cuda")
             loss = model(batch).sum()
             loss.backward()
@@ -161,7 +161,7 @@ class TestFineTuning(DTensorTestBase):
                 self.assertEqual(i, 0)
 
             # Training
-            for j in range(3):
+            for _ in range(3):
                 batch = torch.rand(32, DIM, device="cuda")
                 loss = model(batch).sum()
                 loss.backward()
diff --git a/test/distributed/checkpoint/test_checkpoint.py b/test/distributed/checkpoint/test_checkpoint.py
index 050f7df25da..7c6923800fd 100644
--- a/test/distributed/checkpoint/test_checkpoint.py
+++ b/test/distributed/checkpoint/test_checkpoint.py
@@ -85,11 +85,9 @@ class TestDistributedCheckpointing(ShardedTensorTestBase):
         )
 
         st = sharded_tensor.zeros(spec, 4, 4, dtype=torch.float64)
-        mapping = {}
-
         md = _create_default_local_metadata({"st": st})
-
         st_md = md.state_dict_metadata["st"]
+
         self.assertEqual(1, len(st_md.chunks))
 
     @with_comms(init_rpc=False)
diff --git a/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py b/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py
index 5f98aa82191..8e49edf1472 100644
--- a/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py
+++ b/test/distributed/checkpoint/test_fsdp_tp_checkpoint_conversion.py
@@ -86,7 +86,6 @@ class TestFsdpTpCheckpointConversion(DTensorTestBase):
         tp_model.load_state_dict(tp_state_dict)
 
         # Check parameters are equal after loading.
-        tp_state_dict_after_load = tp_model.state_dict()
         for fsdp_item, tp_item in zip(fsdp_state_dict.items(), tp_state_dict.items()):
             fsdp_k, fsdp_v = fsdp_item
             tp_k, tp_v = tp_item
diff --git a/test/distributed/checkpoint/test_hsdp_checkpoint.py b/test/distributed/checkpoint/test_hsdp_checkpoint.py
index 23ca7c9463b..dc9c8518962 100644
--- a/test/distributed/checkpoint/test_hsdp_checkpoint.py
+++ b/test/distributed/checkpoint/test_hsdp_checkpoint.py
@@ -120,7 +120,6 @@ class TestHSDPCheckpoint(DTensorTestBase):
         )
         model.load_state_dict(state_dict_to_save["model"])
 
-        state_dict_after_load = model.state_dict()
         # After loading, the current model state dict should be the same as state_dict_to_save.
         for (k1, v1), (k2, v2) in zip(
             state_dict_to_save["model"].items(), model.state_dict().items()
diff --git a/test/distributed/checkpoint/test_nested_dict.py b/test/distributed/checkpoint/test_nested_dict.py
index 4b873210f42..bf9a61fe114 100644
--- a/test/distributed/checkpoint/test_nested_dict.py
+++ b/test/distributed/checkpoint/test_nested_dict.py
@@ -43,7 +43,7 @@ class TestFlattening(TestCase):
             "k3": ["x", 99, [{"k3": "y"}]],
         }
 
-        flatten_dict, mapping = flatten_state_dict(state_dict)
+        _, mapping = flatten_state_dict(state_dict)
         """
         flatten_dict:
         {'k0': [1], 'k2.0': tensor([1]), 'k2.1': 99, 'k2.2.0.k3': tensor(1), 'k3': ['x', 99, [{'k3': 'y'}]]}
diff --git a/test/distributed/checkpoint/test_save_load_api.py b/test/distributed/checkpoint/test_save_load_api.py
index 5f215d35da8..862f59f00da 100644
--- a/test/distributed/checkpoint/test_save_load_api.py
+++ b/test/distributed/checkpoint/test_save_load_api.py
@@ -40,21 +40,19 @@ class TestSaveAndLoadAPI(DTensorTestBase):
         device_mesh = init_device_mesh(self.device_type, (self.world_size,))
         model = FSDP(model, device_mesh=device_mesh)
         dcp.save(model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "first"))
-        sd = dcp.load(
-            model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "first")
-        )
+        dcp.load(model.state_dict(), checkpoint_id=os.path.join(self.temp_dir, "first"))
 
         with patch.object(
             dcp.FileSystemReader, "validate_checkpoint_id", return_value=False
-        ) as m1:
+        ):
             with patch.object(
                 dcp.FileSystemWriter, "validate_checkpoint_id", return_value=False
-            ) as m2:
+            ):
                 dcp.save(
                     model.state_dict(),
                     checkpoint_id=os.path.join(self.temp_dir, "second"),
                 )
-                sd = dcp.load(
+                dcp.load(
                     model.state_dict(),
                     checkpoint_id=os.path.join(self.temp_dir, "second"),
                 )
@@ -62,7 +60,7 @@ class TestSaveAndLoadAPI(DTensorTestBase):
         with self.assertRaisesRegex(RuntimeError, "Cannot detect"):
             dcp.save(model.state_dict(), checkpoint_id="abc://abc.abc")
         with self.assertRaisesRegex(RuntimeError, "Cannot detect"):
-            sd = dcp.load(model.state_dict(), checkpoint_id="abc://abc.abc")
+            dcp.load(model.state_dict(), checkpoint_id="abc://abc.abc")
 
 
 if __name__ == "__main__":
diff --git a/test/distributed/checkpoint/test_state_dict.py b/test/distributed/checkpoint/test_state_dict.py
index b6f4b9bed3c..581968335fd 100644
--- a/test/distributed/checkpoint/test_state_dict.py
+++ b/test/distributed/checkpoint/test_state_dict.py
@@ -81,7 +81,7 @@ class TestStateDict(DTensorTestBase, VerifyStateDictMixin):
 
         # Train 10 steps.
         _dist_optim = [dist_optim] if not isinstance(dist_optim, list) else dist_optim
-        for i in range(10):
+        for _ in range(10):
             optim.zero_grad()
             for d_optim in _dist_optim:
                 d_optim.zero_grad()
diff --git a/test/distributed/checkpoint/test_state_dict_utils.py b/test/distributed/checkpoint/test_state_dict_utils.py
index 1bab6be151e..04ddd764418 100644
--- a/test/distributed/checkpoint/test_state_dict_utils.py
+++ b/test/distributed/checkpoint/test_state_dict_utils.py
@@ -104,7 +104,7 @@ class TestStateDictUtils(DTensorTestBase):
             return tensor, dist_tensor
 
         ltensor, ldtensor = [], []
-        for i in range(10):
+        for _ in range(10):
             tensor, dtensor = create_dtensor()
             ltensor.append(tensor)
             ltensor.append(torch.ones(10, device=torch.device("cuda")))
diff --git a/test/distributed/elastic/multiprocessing/api_test.py b/test/distributed/elastic/multiprocessing/api_test.py
index 98ff8f1a309..b41b45d7e41 100644
--- a/test/distributed/elastic/multiprocessing/api_test.py
+++ b/test/distributed/elastic/multiprocessing/api_test.py
@@ -259,7 +259,7 @@ class _StartProcessesTest(TestCase):
     ) -> None:
         mp_queue = mp.get_context("spawn").Queue()
         child_nproc = 2
-        ctx = mp.spawn(
+        mp.spawn(
             start_processes_zombie_test,
             nprocs=1,
             args=(entrypoint, mp_queue, self.log_dir(), child_nproc),
diff --git a/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py b/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py
index 89329c380f3..2685d0e2da0 100644
--- a/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py
+++ b/test/distributed/elastic/rendezvous/c10d_rendezvous_backend_test.py
@@ -165,7 +165,7 @@ class CreateBackendTest(TestCase):
     def test_create_backend_returns_backend_if_is_host_is_not_specified_and_store_already_exists(
         self,
     ) -> None:
-        store = TCPStore(  # type: ignore[call-arg] # noqa: F841
+        TCPStore(  # type: ignore[call-arg] # noqa: F841
             self._expected_endpoint_host, self._expected_endpoint_port, is_master=True
         )
 
diff --git a/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py b/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py
index a65a042a244..8eb54041c23 100644
--- a/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py
+++ b/test/distributed/elastic/rendezvous/dynamic_rendezvous_test.py
@@ -99,7 +99,7 @@ class RendezvousTimeoutTest(TestCase):
                     ValueError,
                     rf"^The join timeout \({join_timeout}\) must be positive.$",
                 ):
-                    timeout = RendezvousTimeout(join_timeout)
+                    RendezvousTimeout(join_timeout)
 
 
 class NodeDescTest(TestCase):
@@ -1637,7 +1637,7 @@ class CreateHandlerTest(TestCase):
 def _ignore_exception(exception_type: Exception, fn: Callable):
     try:
         fn()
-    except exception_type as e:
+    except exception_type:
         pass
 
 
diff --git a/test/distributed/elastic/rendezvous/rendezvous_backend_test.py b/test/distributed/elastic/rendezvous/rendezvous_backend_test.py
index fef7545fcd2..107a3bbfbad 100644
--- a/test/distributed/elastic/rendezvous/rendezvous_backend_test.py
+++ b/test/distributed/elastic/rendezvous/rendezvous_backend_test.py
@@ -70,7 +70,7 @@ class RendezvousBackendTestMixin(ABC):
         self.assertTrue(has_set)
 
     def test_set_state_sets_backend_state_if_token_is_current(self) -> None:
-        state1, token1, has_set1 = self._set_state(b"x")
+        _, token1, has_set1 = self._set_state(b"x")
 
         state2, token2, has_set2 = self._set_state(b"y", token1)
 
@@ -80,7 +80,7 @@ class RendezvousBackendTestMixin(ABC):
         self.assertTrue(has_set2)
 
     def test_set_state_returns_current_backend_state_if_token_is_old(self) -> None:
-        state1, token1, _ = self._set_state(b"x")
+        _, token1, _ = self._set_state(b"x")
 
         state2, token2, _ = self._set_state(b"y", token1)
 
diff --git a/test/distributed/elastic/timer/file_based_local_timer_test.py b/test/distributed/elastic/timer/file_based_local_timer_test.py
index c06f3520bac..39d215f9319 100644
--- a/test/distributed/elastic/timer/file_based_local_timer_test.py
+++ b/test/distributed/elastic/timer/file_based_local_timer_test.py
@@ -113,7 +113,7 @@ if not (IS_WINDOWS or IS_MACOS):
             num_clients = 10
             num_requests_per_client = 10
             processes = []
-            for i in range(num_clients):
+            for _ in range(num_clients):
                 p = mp.Process(
                     target=func, args=(num_requests_per_client, self.file_path)
                 )
@@ -190,7 +190,7 @@ if not (IS_WINDOWS or IS_MACOS):
         """
         client = timer.FileTimerClient(file_path)
         sem.release()
-        for i in range(0, n):
+        for _ in range(0, n):
             client.acquire("test_scope", 0)
             time.sleep(interval)
 
diff --git a/test/distributed/fsdp/test_checkpoint_wrapper.py b/test/distributed/fsdp/test_checkpoint_wrapper.py
index d1f189fed4d..06aa6900430 100644
--- a/test/distributed/fsdp/test_checkpoint_wrapper.py
+++ b/test/distributed/fsdp/test_checkpoint_wrapper.py
@@ -159,7 +159,7 @@ class CheckpointWrapperTest(TestCase):
                     if use_reentrant
                     else CheckpointImpl.NO_REENTRANT,
                 )
-                for i in range(self.n):
+                for _ in range(self.n):
                     l = nn.Sequential(
                         nn.Linear(256, 256), nn.Linear(256, 256), nn.Linear(256, 256)
                     )
diff --git a/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py b/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py
index 97e7d56b97b..281f11d6d66 100644
--- a/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py
+++ b/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py
@@ -303,13 +303,13 @@ class TestFSDPWithDeviceMeshAndDTensor(DTensorTestBase):
             RuntimeError, "DeviceMesh is not compatible with LOCAL_STATE_DICT."
         ):
             with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT):
-                state_dict = model.state_dict()
+                model.state_dict()
 
         with self.assertRaisesRegex(
             RuntimeError, "DeviceMesh is not compatible with LOCAL_STATE_DICT."
         ):
             with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT):
-                optim_state_dict = FSDP.optim_state_dict(model, optim)
+                FSDP.optim_state_dict(model, optim)
 
 
 instantiate_parametrized_tests(TestFSDPWithDeviceMeshAndDTensor)
diff --git a/test/distributed/fsdp/test_fsdp_fine_tune.py b/test/distributed/fsdp/test_fsdp_fine_tune.py
index 446d1be04f0..754c33bfdfd 100644
--- a/test/distributed/fsdp/test_fsdp_fine_tune.py
+++ b/test/distributed/fsdp/test_fsdp_fine_tune.py
@@ -364,9 +364,8 @@ class TestFSDPFineTune(FSDPTest):
         )
         torch.manual_seed(self.rank + 1)
         losses = []
-        for idx in range(6):
+        for _ in range(6):
             frozen_input = torch.randn((4, 4), device="cuda", requires_grad=False)
-            learnable_input = torch.randn((4, 4), device="cuda", requires_grad=True)
             for _model, _optim in ((model, model_optim), (ref_model, ref_model_optim)):
                 loss = _model(frozen_input, frozen_input).sum()
                 losses.append(loss)
diff --git a/test/distributed/fsdp/test_fsdp_freezing_weights.py b/test/distributed/fsdp/test_fsdp_freezing_weights.py
index 7d662cfcba3..0ffe6054bd3 100644
--- a/test/distributed/fsdp/test_fsdp_freezing_weights.py
+++ b/test/distributed/fsdp/test_fsdp_freezing_weights.py
@@ -182,7 +182,7 @@ class TestFreezingWeights(FSDPTest):
         criterion = nn.CrossEntropyLoss()
         optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
 
-        for iteration in range(3):
+        for _ in range(3):
             out = model(batch)
             fake_loss = criterion(out, target)
             optimizer.zero_grad()
diff --git a/test/distributed/fsdp/test_fsdp_memory.py b/test/distributed/fsdp/test_fsdp_memory.py
index ea200e9ae10..08ab6049ced 100644
--- a/test/distributed/fsdp/test_fsdp_memory.py
+++ b/test/distributed/fsdp/test_fsdp_memory.py
@@ -108,8 +108,6 @@ class TestFSDPMemory(FSDPTest):
 
     def _dist_train(self, with_checkpoint, expected, model_hidden_dim, iterations):
         gpu_id = self.rank
-        world_size = self.world_size
-
         batch = torch.randn(size=(2, 3, 224, 224)).cuda()
 
         model = create_model(
diff --git a/test/distributed/fsdp/test_fsdp_misc.py b/test/distributed/fsdp/test_fsdp_misc.py
index 2bd0d719a31..39b9b2d317d 100644
--- a/test/distributed/fsdp/test_fsdp_misc.py
+++ b/test/distributed/fsdp/test_fsdp_misc.py
@@ -278,9 +278,9 @@ class TestFSDPMiscMultiProcess(FSDPTest):
         )
         x = torch.randn(10, 10, device="cuda")
         y = torch.randn(10, 10, device="cuda")
-        for i in range(4):
+        for _ in range(4):
             if use_second_layer:
-                a, b = fsdp(x, y)
+                a, _ = fsdp(x, y)
             else:
                 a = fsdp(x, y)
             loss = a.sum()
@@ -509,7 +509,7 @@ class TestFSDPMiscMultiProcess(FSDPTest):
     def test_fsdp_cpu_training(self):
         """Tests FSDP training on CPU."""
         gloo_pg = dist.new_group(backend="gloo")
-        for ss in [
+        for ss in [  # noqa: F841
             ShardingStrategy.NO_SHARD,
             ShardingStrategy.FULL_SHARD,
             ShardingStrategy.SHARD_GRAD_OP,
@@ -857,13 +857,13 @@ class TestFSDPMiscMultiThread(FSDPTestMultiThread):
         torch.cuda.set_device(self.rank)
         # Test CPU
         no_params = nn.ReLU()
-        module = FSDP(no_params)
+        FSDP(no_params)
         # Test CUDA
         no_params = nn.ReLU().cuda()
-        module = FSDP(no_params)
+        FSDP(no_params)
         # Test CPU + device_id
         no_params = nn.ReLU()
-        module = FSDP(no_params, device_id=torch.cuda.current_device())
+        FSDP(no_params, device_id=torch.cuda.current_device())
         # For modules with no params, wrong device_id will raise error about
         # inconsistency between compute_device and device_id, since compute_device
         # is computed as torch.cuda.current_device when there are no params.
diff --git a/test/distributed/fsdp/test_fsdp_mixed_precision.py b/test/distributed/fsdp/test_fsdp_mixed_precision.py
index 30b628b6a3c..0e4b0d0497c 100644
--- a/test/distributed/fsdp/test_fsdp_mixed_precision.py
+++ b/test/distributed/fsdp/test_fsdp_mixed_precision.py
@@ -1139,7 +1139,6 @@ class TestFSDPDifferentSubmodulePrecision(FSDPTest):
         model = SaveForwardInputsModel(
             forward_inputs=forward_inputs, cast_forward_inputs=False
         ).cuda()
-        c1, c2 = model.c1, model.c2
         x = torch.zeros(2, 100, device="cuda")
 
         # float16 on one submodule and float32 on everything else
diff --git a/test/distributed/fsdp/test_fsdp_multiple_wrapping.py b/test/distributed/fsdp/test_fsdp_multiple_wrapping.py
index caa4c6cb98a..42ef25b9d6f 100644
--- a/test/distributed/fsdp/test_fsdp_multiple_wrapping.py
+++ b/test/distributed/fsdp/test_fsdp_multiple_wrapping.py
@@ -45,7 +45,7 @@ class TestMultipleWrapping(FSDPTest):
         model = FSDP(inner_model).cuda()
         optim = SGD(model.parameters(), lr=0.1)
 
-        for i in range(3):
+        for _ in range(3):
             input = torch.rand((1, 5), dtype=torch.float).cuda()
             input.requires_grad = True
             output = model(input)
diff --git a/test/distributed/fsdp/test_fsdp_optim_state.py b/test/distributed/fsdp/test_fsdp_optim_state.py
index 6926a486c8c..df8f5419c63 100644
--- a/test/distributed/fsdp/test_fsdp_optim_state.py
+++ b/test/distributed/fsdp/test_fsdp_optim_state.py
@@ -1510,7 +1510,7 @@ class TestFSDPOptimState(FSDPTest):
         ) = self._init_nested_model(wrap=False, use_multiple_param_groups=False)
         if should_check_method_fn("rekey_optim_state_dict"):
             with context_fn():
-                rekeyed_osd = FSDP.rekey_optim_state_dict(
+                FSDP.rekey_optim_state_dict(
                     fsdp_osd,  # from `full_optim_state_dict()`
                     OptimStateKeyType.PARAM_ID,
                     nonwrapped_model,
@@ -1650,7 +1650,7 @@ class TestFSDPOptimState(FSDPTest):
         )
 
         # Make optim1 has a different state.
-        for i in range(5):
+        for _ in range(5):
             batch = torch.rand(5, 8).cuda()
             loss = models[1](batch).sum()
             loss.backward()
@@ -1765,7 +1765,7 @@ class TestFSDPOptimState(FSDPTest):
         initializer = self._model_class[model_class]
 
         # First, run a wrapped model with full world size for a few iterations
-        model1, optim1, optim_input1 = initializer(
+        model1, optim1, _ = initializer(
             wrap=True,
             use_multiple_param_groups=use_multiple_param_groups,
         )
@@ -1788,7 +1788,7 @@ class TestFSDPOptimState(FSDPTest):
             new_group = dist.distributed_c10d._get_default_group()
         # Second, run a wrapped model with (possibly) halved world size and
         # (possibly) differing `optim_input` across ranks
-        model2, optim2, optim_input2 = initializer(
+        model2, optim2, _ = initializer(
             wrap=True,
             group=new_group,
             use_multiple_param_groups=use_multiple_param_groups,
@@ -1861,7 +1861,8 @@ class TestFSDPOptimState(FSDPTest):
             FSDP.optim_state_dict(model, optim), osd, check_same_param_keys=True
         )
         step()
-        osd_to_load = FSDP.optim_state_dict_to_load(
+
+        osd_to_load = FSDP.optim_state_dict_to_load(  # noqa: F841
             model, optim, osd, load_directly=True
         )
         self._check_same_state(
@@ -1994,7 +1995,7 @@ class TestFSDPOptimState(FSDPTest):
             loss.backward()
             fsdp_optim.step()
             orig_state_dict = deepcopy(fsdp_optim.state_dict())
-            optim_state_dict = FSDP.optim_state_dict(fsdp_model, fsdp_optim)
+            FSDP.optim_state_dict(fsdp_model, fsdp_optim)
             FSDP.optim_state_dict_to_load(
                 fsdp_model,
                 fsdp_optim,
diff --git a/test/distributed/fsdp/test_fsdp_state_dict.py b/test/distributed/fsdp/test_fsdp_state_dict.py
index a246375caba..0a8a4f57684 100644
--- a/test/distributed/fsdp/test_fsdp_state_dict.py
+++ b/test/distributed/fsdp/test_fsdp_state_dict.py
@@ -966,7 +966,7 @@ class TestFSDPStateDict(FSDPTest):
                 setattr(module, LINEAR_SKIP, linear_skip)
                 return fsdp, linear_skip_tensor_names
 
-        fsdp, linear_skip_tensor_names = _create_module()
+        fsdp, _ = _create_module()
         # Run a forward pass
         inp = torch.randn((1, 10), device=torch.cuda.current_device())
         loss = fsdp(inp)
diff --git a/test/distributed/fsdp/test_fsdp_unshard_params.py b/test/distributed/fsdp/test_fsdp_unshard_params.py
index fe8a00892e2..e2eea11ac2e 100644
--- a/test/distributed/fsdp/test_fsdp_unshard_params.py
+++ b/test/distributed/fsdp/test_fsdp_unshard_params.py
@@ -634,7 +634,7 @@ class TestUnshardParams(TestUnshardParamsBase):
         model = FSDP(model, auto_wrap_policy=ModuleWrapPolicy((nn.Sequential,)))
         with FSDP.summon_full_params(model[0]):
             # Check that the summoned module does not have its flat parameter
-            for param_name, param in model[0].named_parameters():
+            for param_name, _ in model[0].named_parameters():
                 self.assertFalse(FLAT_PARAM in param_name)
             self.assertGreater(len(list(model[0].parameters())), 1)
 
diff --git a/test/distributed/fsdp/test_fsdp_use_orig_params.py b/test/distributed/fsdp/test_fsdp_use_orig_params.py
index e477c043c4d..996f1840454 100644
--- a/test/distributed/fsdp/test_fsdp_use_orig_params.py
+++ b/test/distributed/fsdp/test_fsdp_use_orig_params.py
@@ -260,7 +260,7 @@ class TestFSDPUseOrigParamsMultipleParamGroups(FSDPTest):
         model = FSDP(copy.deepcopy(base_model), self.process_group, **fsdp_kwargs)
         model = torch.compile(model)
         optim = torch.optim.Adam(model.parameters(), lr=1e-2)
-        for i in range(10):
+        for _ in range(10):
             losses = []
             inp = ref_model.get_input(torch.device("cuda"))
             for _model, _optim in ((ref_model, ref_optim), (model, optim)):
diff --git a/test/distributed/fsdp/test_utils.py b/test/distributed/fsdp/test_utils.py
index adc338dcf9a..739100f1d3e 100644
--- a/test/distributed/fsdp/test_utils.py
+++ b/test/distributed/fsdp/test_utils.py
@@ -118,7 +118,7 @@ class TestUtils(TestCase):
             x.fill_(0)
 
         x = nn.utils.rnn.pack_padded_sequence(x, seq_length)
-        x, h = rnn(x)
+        x, _ = rnn(x)
         x = _apply_to_tensors(fill_fn, x)
         x, _ = nn.utils.rnn.pad_packed_sequence(x)
         self.assertEqual(torch.sum(x), 0)
diff --git a/test/distributed/launcher/launch_test.py b/test/distributed/launcher/launch_test.py
index b8312de37fa..1ef7fa7e284 100644
--- a/test/distributed/launcher/launch_test.py
+++ b/test/distributed/launcher/launch_test.py
@@ -41,7 +41,6 @@ class LaunchTest(unittest.TestCase):
     def test_launch_without_env(self):
         nnodes = 1
         nproc_per_node = 4
-        world_size = nnodes * nproc_per_node
         sock = get_socket_with_port()
         with closing(sock):
             master_port = sock.getsockname()[1]
diff --git a/test/distributed/pipelining/model_registry.py b/test/distributed/pipelining/model_registry.py
index da081451372..05d4e54176f 100644
--- a/test/distributed/pipelining/model_registry.py
+++ b/test/distributed/pipelining/model_registry.py
@@ -114,7 +114,7 @@ class CustomLinearDx(Function):
 
     @staticmethod
     def backward(ctx, grad_output):
-        input_val, weight, bias = ctx.saved_tensors
+        input_val, weight, _ = ctx.saved_tensors
         grad_input = grad_output.mm(weight)
         ctx.module.cached_context[ctx.layer_idx].append(grad_output.clone())
         ctx.module.cached_context[str(ctx.layer_idx) + "_input"].append(
@@ -131,7 +131,7 @@ class CustomLinearDxDw(Function):
 
     @staticmethod
     def backward(ctx, grad_output):
-        input_val, weight, bias = ctx.saved_tensors
+        input_val, weight, _ = ctx.saved_tensors
         grad_input = grad_output.mm(weight)
         grad_weight = grad_output.t().mm(input_val)
         grad_bias = grad_output.sum(0)
diff --git a/test/distributed/pipelining/test_backward.py b/test/distributed/pipelining/test_backward.py
index a19092d8a21..218da6b07ed 100644
--- a/test/distributed/pipelining/test_backward.py
+++ b/test/distributed/pipelining/test_backward.py
@@ -74,7 +74,7 @@ class StageBackwardTests(TestCase):
         # Forward, then backward of loss with respect to inputs
         out = mod(x)
         loss = loss_fn(out, target)
-        dinputs, param_groups = stage_backward_input(
+        dinputs, _param_groups = stage_backward_input(
             stage_outputs_or_loss=(loss,),
             output_grads=None,
             input_values=[x],
@@ -88,7 +88,7 @@ class StageBackwardTests(TestCase):
 
         torch.testing.assert_close(x.grad, ref_x.grad)
         torch.testing.assert_close(dinputs[0], ref_x.grad)
-        for name, p in mod.named_parameters():
+        for _, p in mod.named_parameters():
             # Check that the weight gradients were not updated
             self.assertEqual(p.grad, None)
 
@@ -109,7 +109,7 @@ class StageBackwardTests(TestCase):
         # Forward, then backward of loss with respect to inputs
         out = mod(x)
         loss = loss_fn(out, target)
-        dinputs, param_groups = stage_backward_input(
+        _dinputs, param_groups = stage_backward_input(
             stage_outputs_or_loss=(loss,),
             output_grads=None,
             input_values=[x],
@@ -157,7 +157,7 @@ class StageBackwardTests(TestCase):
         for x in inputs:
             out = mod(x)
             loss = loss_fn(out, target)
-            dinputs, param_groups = stage_backward_input(
+            _dinputs, param_groups = stage_backward_input(
                 stage_outputs_or_loss=(loss,),
                 output_grads=None,
                 input_values=[x],
diff --git a/test/distributed/pipelining/test_schedule.py b/test/distributed/pipelining/test_schedule.py
index d1025f786c6..639a0fc2d0b 100644
--- a/test/distributed/pipelining/test_schedule.py
+++ b/test/distributed/pipelining/test_schedule.py
@@ -264,7 +264,7 @@ class TestSchedulePlan(TestCase):
                 ]
 
                 schedule = ScheduleClass(stages, num_microbatches)
-                formatted_pipeline_order = _format_pipeline_order(
+                _formatted_pipeline_order = _format_pipeline_order(
                     schedule.pipeline_order
                 )
 
@@ -305,10 +305,7 @@ class TestSchedulePlan(TestCase):
                     for i in range(num_local_stages)
                 ]
                 schedule = ScheduleClass(stages, num_microbatches)
-                formatted_pipeline_order = _format_pipeline_order(
-                    schedule.pipeline_order
-                )
-                # print(formatted_pipeline_order)
+                _format_pipeline_order(schedule.pipeline_order)
 
                 def stage_to_rank(stage):
                     return stage % group_size
diff --git a/test/distributed/pipelining/test_schedule_multiproc.py b/test/distributed/pipelining/test_schedule_multiproc.py
index f41c06b6b31..b4e5ef3eaa3 100644
--- a/test/distributed/pipelining/test_schedule_multiproc.py
+++ b/test/distributed/pipelining/test_schedule_multiproc.py
@@ -151,7 +151,7 @@ class ScheduleTest(MultiProcContinousTest):
                 schedule.step(x)
             elif self.rank == self.world_size - 1:
                 losses = []
-                out = schedule.step(target=target, losses=losses)
+                schedule.step(target=target, losses=losses)
             else:
                 schedule.step()
 
@@ -412,7 +412,6 @@ class ScheduleTest(MultiProcContinousTest):
             if hasattr(ScheduleClass, "num_microbatches")
             else 8
         )
-        input_args = x.chunk(num_microbatches)[0]
         stages = [
             PipelineStage(
                 stage_module,
@@ -548,7 +547,6 @@ class ScheduleTest(MultiProcContinousTest):
         loss_fn = torch.nn.MSELoss(reduction="sum")
 
         # Create a pipeline stage to wrap that submodule
-        input_args = x.chunk(num_microbatches)[0]
         stage_indices = rank_stages[self.rank]
         print(f"Rank {self.rank} stages: {stage_indices}")
         submod_names = [f"layers.{i}" for i in stage_indices]
@@ -582,7 +580,7 @@ class ScheduleTest(MultiProcContinousTest):
                     schedule.step(x)
                 elif self.rank == self.world_size - 1:
                     losses = []
-                    out = schedule.step(target=target, losses=losses)
+                    schedule.step(target=target, losses=losses)
                 else:
                     schedule.step()
         self.assertEqual(
@@ -887,7 +885,6 @@ class ScheduleTest(MultiProcContinousTest):
 
         # Create a pipeline stage to wrap that submodule
         chunks = 2
-        input_args = x.chunk(chunks)[0]
         stages = [
             PipelineStage(
                 stage_module,
diff --git a/test/distributed/pipelining/test_stage.py b/test/distributed/pipelining/test_stage.py
index b02e7e25aff..450e719377f 100644
--- a/test/distributed/pipelining/test_stage.py
+++ b/test/distributed/pipelining/test_stage.py
@@ -310,9 +310,6 @@ class StageTest(MultiProcContinousTest):
         full_mod.to(self.device)
         stage_mod = full_mod.get_submodule(f"layers.{self.rank}")
 
-        x = torch.randn(batch_size, d_hid, device=self.device)
-        target = torch.randn(batch_size, d_hid, device=self.device)
-
         stage_with_dw_builder = PipelineStage(
             stage_mod,
             self.rank,
diff --git a/test/distributed/pipelining/test_unflatten.py b/test/distributed/pipelining/test_unflatten.py
index 9e63c3b8084..ba0b3c62a2f 100644
--- a/test/distributed/pipelining/test_unflatten.py
+++ b/test/distributed/pipelining/test_unflatten.py
@@ -58,7 +58,7 @@ class UnflattenTests(TestCase):
         # Check qualnames
         for stage_idx in range(pipe.num_stages):
             stage_mod = pipe.get_stage_module(stage_idx)
-            for param_name, param in stage_mod.named_parameters():
+            for param_name, _ in stage_mod.named_parameters():
                 assert (
                     param_name in orig_state_dict
                 ), f"{param_name} not in original state dict"
diff --git a/test/distributed/tensor/parallel/test_micro_pipeline_tp.py b/test/distributed/tensor/parallel/test_micro_pipeline_tp.py
index 5502116284a..abde0ca518d 100644
--- a/test/distributed/tensor/parallel/test_micro_pipeline_tp.py
+++ b/test/distributed/tensor/parallel/test_micro_pipeline_tp.py
@@ -87,7 +87,9 @@ class MicroPipelineTPTest(TestCase):
             a = all_gather_tensor(inp, gather_dim=0, group=group.group_name)
             b = all_gather_tensor(inp, gather_dim=1, group=group.group_name)
             c = _fp8_all_gather(inp, gather_dim=0, group_name=group.group_name)
-            d = _fp8_all_gather(inp, gather_dim=1, group_name=group.group_name)
+            d = _fp8_all_gather(  # noqa: F841
+                inp, gather_dim=1, group_name=group.group_name
+            )
             return a, b, c
 
         inp = torch.rand(64, 32, device="cuda")
diff --git a/test/distributed/tensor/parallel/test_tp_examples.py b/test/distributed/tensor/parallel/test_tp_examples.py
index 43662c4d6cf..e448953c676 100644
--- a/test/distributed/tensor/parallel/test_tp_examples.py
+++ b/test/distributed/tensor/parallel/test_tp_examples.py
@@ -311,7 +311,7 @@ class DistTensorParallelExampleTest(DTensorTestBase):
 
         torch.manual_seed(0)
         steps = 10 if type(model) is torch.float64 else 1
-        for iter in range(steps):
+        for _ in range(steps):
             inp = torch.randint(
                 model_args.vocab_size, inp_size, device=self.device_type
             )
diff --git a/test/distributed/tensor/parallel/test_tp_style.py b/test/distributed/tensor/parallel/test_tp_style.py
index 28ff10bab09..6c965edf73d 100644
--- a/test/distributed/tensor/parallel/test_tp_style.py
+++ b/test/distributed/tensor/parallel/test_tp_style.py
@@ -223,7 +223,7 @@ class TensorParallelStyleTest(DTensorTestBase):
             AssertionError,
             "input_layouts and desired_input_layouts should have same length!",
         ):
-            prepare_inps_dimension_mismatch = PrepareModuleInput(
+            PrepareModuleInput(
                 input_layouts=Shard(0), desired_input_layouts=(Replicate(), None)
             )
         # Raise assertion error if module inputs and input_layouts do not have same length.
diff --git a/test/distributed/test_c10d_common.py b/test/distributed/test_c10d_common.py
index 87937b74d33..4fd0e43d706 100644
--- a/test/distributed/test_c10d_common.py
+++ b/test/distributed/test_c10d_common.py
@@ -182,7 +182,7 @@ class TimeoutTest(TestCase):
                 threads.append(t)
                 t.start()
 
-            for i, thread in enumerate(threads):
+            for _, thread in enumerate(threads):
                 thread.join()
 
             # we expect the world_size-1 threads to have failed
@@ -583,14 +583,14 @@ class CommonDistributedDataParallelTest:
                 )
             )
             with err_ctx:
-                model = self._test_ddp_checkpointing(
+                self._test_ddp_checkpointing(
                     self.CheckpointOnceModule(use_reentrant=use_reentrant),
                     process_group=process_group,
                     use_bucket_view=use_bucket_view,
                     find_unused_parameters=True,
                 )
             # test passes when static_graph is true
-            model = self._test_ddp_checkpointing(
+            self._test_ddp_checkpointing(
                 self.CheckpointOnceModule(use_reentrant=use_reentrant),
                 process_group=process_group,
                 use_bucket_view=use_bucket_view,
@@ -615,7 +615,7 @@ class CommonDistributedDataParallelTest:
                 )
             )
             with err_ctx:
-                model = self._test_ddp_checkpointing(
+                self._test_ddp_checkpointing(
                     self.CheckpointTwiceModule(use_reentrant=use_reentrant),
                     process_group=process_group,
                     use_bucket_view=use_bucket_view,
@@ -623,7 +623,7 @@ class CommonDistributedDataParallelTest:
                 )
 
             with err_ctx:
-                model = self._test_ddp_checkpointing(
+                self._test_ddp_checkpointing(
                     self.CheckpointTwiceModule(use_reentrant=use_reentrant),
                     process_group=process_group,
                     use_bucket_view=use_bucket_view,
@@ -641,7 +641,7 @@ class CommonDistributedDataParallelTest:
         process_group = self._get_process_group()
         for use_bucket_view in (True, False):
             # Test passes when static_graph=True.
-            model = self._test_ddp_checkpointing(
+            self._test_ddp_checkpointing(
                 self.CheckpointTwiceModule(use_reentrant=use_reentrant),
                 process_group=process_group,
                 use_bucket_view=use_bucket_view,
@@ -656,7 +656,7 @@ class CommonDistributedDataParallelTest:
         """
         process_group = self._get_process_group()
         for use_bucket_view in (True, False):
-            model = self._test_ddp_checkpointing(
+            self._test_ddp_checkpointing(
                 self.DynamicCheckpointTwiceModule(use_reentrant=False),
                 process_group=process_group,
                 use_bucket_view=use_bucket_view,
@@ -675,7 +675,7 @@ class CommonDistributedDataParallelTest:
         """
         process_group = self._get_process_group()
         for use_bucket_view in (True, False):
-            model = self._test_ddp_checkpointing(
+            self._test_ddp_checkpointing(
                 self.DynamicCheckpointTwiceModuleWeightSharing(use_reentrant=False),
                 process_group=process_group,
                 use_bucket_view=use_bucket_view,
@@ -719,7 +719,7 @@ class CommonDistributedDataParallelTest:
         process_group = self._get_process_group()
         torch.cuda.set_device(self.rank)
         for use_bucket_view in (True, False):
-            model = self._test_ddp_checkpointing(
+            self._test_ddp_checkpointing(
                 self.CheckpointTwiceModuleWeightSharing(),
                 process_group=process_group,
                 use_bucket_view=use_bucket_view,
@@ -737,7 +737,7 @@ class CommonDistributedDataParallelTest:
                 "Expect `start_powerSGD_iter` > 1 if `use_error_feedback` or `warm_start` is enabled, "
                 "because PowerSGD can only be applied after the first two iterations in DDP.",
             ):
-                state = powerSGD.PowerSGDState(
+                powerSGD.PowerSGDState(
                     process_group=None,
                     matrix_approximation_rank=1,
                     start_powerSGD_iter=start_powerSGD_iter,
diff --git a/test/distributed/test_c10d_functional_native.py b/test/distributed/test_c10d_functional_native.py
index b1c99145311..4db81a0b21d 100644
--- a/test/distributed/test_c10d_functional_native.py
+++ b/test/distributed/test_c10d_functional_native.py
@@ -429,7 +429,7 @@ class TestWithNCCL(MultiProcessTestCase):
 
         input = torch.full((10, 10), float(self.rank), device=self.device)
         self.assertEqual(torch._C._distributed_c10d._get_work_registry_size(), 0)
-        output = torch.ops._c10d_functional.all_reduce(
+        torch.ops._c10d_functional.all_reduce(
             input,
             "avg",
             "default",
@@ -550,7 +550,7 @@ class CompileTest(TestCase):
         assert "= torch.ops._c10d_functional.wait_tensor.default" not in code
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (arg,))
+        AOTIRunnerUtil.run("cuda", func, (arg,))
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
@@ -596,7 +596,7 @@ class CompileTest(TestCase):
         assert "= torch.ops._c10d_functional.wait_tensor.default" not in code
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (args,))
+        out = AOTIRunnerUtil.run("cuda", func, (args,))  # noqa: F841
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
@@ -708,7 +708,7 @@ class CompileTest(TestCase):
         assert "= torch.ops._c10d_functional.wait_tensor.default" not in code
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (arg,))
+        AOTIRunnerUtil.run("cuda", func, (arg,))
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
@@ -742,7 +742,7 @@ class CompileTest(TestCase):
         )
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (args,))
+        out = AOTIRunnerUtil.run("cuda", func, (args,))  # noqa: F841
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "This is a GPU test!")
@@ -764,7 +764,7 @@ class CompileTest(TestCase):
         )
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (arg,))
+        AOTIRunnerUtil.run("cuda", func, (arg,))
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
@@ -790,7 +790,7 @@ class CompileTest(TestCase):
         )
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (arg,))
+        AOTIRunnerUtil.run("cuda", func, (arg,))
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
@@ -910,7 +910,7 @@ class CompileTest(TestCase):
         )
 
         # Test aoti
-        out = AOTIRunnerUtil.run("cuda", func, (arg,))
+        AOTIRunnerUtil.run("cuda", func, (arg,))
         torch.cuda.synchronize()
 
     @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py
index ee1e04a2be7..968238be5cf 100644
--- a/test/distributed/test_c10d_gloo.py
+++ b/test/distributed/test_c10d_gloo.py
@@ -1920,7 +1920,7 @@ class DistributedDataParallelTest(
         ddp_state_dict = torch.load(checkpoint_path, map_location=map_location)
 
         for model in [ddp_withload, model_withload]:
-            for p in ddp_withload.parameters():
+            for p in model.parameters():
                 with torch.no_grad():
                     p.zero_()
         ddp_withload.load_state_dict(ddp_state_dict)
@@ -1973,7 +1973,8 @@ class DistributedDataParallelTest(
         This unit test verifies whether the Future object is passed properly.
         The callback function creates a Future object and sets a value to it.
         """
-        store = c10d.FileStore(self.file_name, self.world_size)
+        store = c10d.FileStore(self.file_name, self.world_size)  # noqa: F841
+
         process_group = self._get_process_group()
 
         # Test on CPU
diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py
index 35fb3217ce2..930f3811c4d 100644
--- a/test/distributed/test_c10d_nccl.py
+++ b/test/distributed/test_c10d_nccl.py
@@ -366,7 +366,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
             thread.start()
 
             # We would get stuck here due to d2h if we didn't abort.
-            t_cpu = t.cpu()
+            t.cpu()
 
             thread.join()
 
@@ -741,7 +741,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
         # First allreduce to initialize default PG's communicator.
         pg.allreduce(t).wait()
         # PG1 is an PG without comms initialized, since we don't call collective on it
-        new_pg1 = c10d.new_group([0, 1])
+        new_pg1 = c10d.new_group([0, 1])  # noqa: F841
         new_pg2 = c10d.new_group([0, 1])
         t2 = torch.rand(10, 10, device=device)
 
@@ -807,7 +807,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
         # 'timeout' kwarg (or its kwdefault) taking precedence
         opts = dist.ProcessGroupNCCL.Options()
         opts._timeout = timedelta(seconds=123)
-        with warnings.catch_warnings(record=True) as w:
+        with warnings.catch_warnings(record=True):
             dist.init_process_group(**base_opts, pg_options=opts)
             # TODO(whc) i verified that we are indeed emitting this warning, and i can't figure out why i can't catch it.
             # self.assertEqual(len(w), 1)
@@ -1266,30 +1266,26 @@ class DistributedDataParallelTest(
             "DistributedDataParallel device_ids and output_device arguments only work with "
             "single-device/multiple-device GPU modules or CPU modules",
         ):
-            ddp_model = DistributedDataParallel(
+            DistributedDataParallel(
                 model, output_device=gpus[1], process_group=process_group
             )
 
         with self.assertRaisesRegex(
             ValueError, "device_ids can only be None or contain a single element."
         ):
-            ddp_model = DistributedDataParallel(
-                model, device_ids=gpus, process_group=process_group
-            )
+            DistributedDataParallel(model, device_ids=gpus, process_group=process_group)
 
         with self.assertRaisesRegex(
             ValueError, "input module must be on the same type of devices"
         ):
             model.fc1 = model.fc1.cpu()
-            ddp_model = DistributedDataParallel(model, process_group=process_group)
+            DistributedDataParallel(model, process_group=process_group)
 
         model = model.cpu()
         with self.assertRaisesRegex(
             ValueError, "device_ids can only be None or contain a single element."
         ):
-            ddp_model = DistributedDataParallel(
-                model, device_ids=gpus, process_group=process_group
-            )
+            DistributedDataParallel(model, device_ids=gpus, process_group=process_group)
 
     def _test_fp16(self, gradient_as_bucket_view=False):
         process_group = self._get_process_group()
@@ -1940,11 +1936,9 @@ class DistributedDataParallelTest(
                                     ),
                                     named_msg,
                                 )
-                                for j, ((param_name, p), p_ddp) in enumerate(
-                                    zip(
-                                        m_child.named_parameters(),
-                                        m_ddp_child.parameters(),
-                                    )
+                                for (param_name, p), p_ddp in zip(
+                                    m_child.named_parameters(),
+                                    m_ddp_child.parameters(),
                                 ):
                                     named_msg = (
                                         layer_name + "." + param_name + " " + iter_msg
@@ -2010,15 +2004,13 @@ class DistributedDataParallelTest(
 
         m = ConvNet(layer_devs, layer_formats, layer_dtypes)
         if self.rank == 0:
-            m_ddp = DistributedDataParallel(
-                m, device_ids=[dev0], process_group=process_group
-            )
+            DistributedDataParallel(m, device_ids=[dev0], process_group=process_group)
         else:
             with self.assertRaisesRegex(
                 RuntimeError,
                 ".* appears not to match strides of the same param in process 0",
             ):
-                m_ddp = DistributedDataParallel(
+                DistributedDataParallel(
                     m, device_ids=[dev0], process_group=process_group
                 )
 
@@ -2356,7 +2348,7 @@ class DistributedDataParallelTest(
                 process_group=process_group,
             )
 
-            for i in range(3):
+            for _ in range(3):
                 m.zero_grad(set_to_none=try_set_to_none)
                 m(1).sum().backward()
 
@@ -2701,7 +2693,7 @@ class WorkHookTest(MultiProcessTestCase):
         pg._register_on_completion_hook(hook)
         tensor = torch.ones([2, 3]).cuda(self.rank) * self.rank
         work_count = 3
-        for i in range(work_count):
+        for _ in range(work_count):
             work += 1
             pg.broadcast([tensor]).wait()
 
@@ -2806,7 +2798,7 @@ class NcclErrorHandlingTest(MultiProcessTestCase):
             # Run some GPU operations to make sure cuda has not gotten stuck.
             # It was observed cuda could get stuck if NCCL communicators were
             # not properly aborted before throwing RuntimeError.
-            a = torch.rand(10).cuda(self.rank)
+            torch.rand(10).cuda(self.rank)
         elif self.rank == 1:
             # Clean up structures (ex: files for FileStore before going down)
             del process_group
@@ -2947,7 +2939,7 @@ class NcclErrorHandlingTest(MultiProcessTestCase):
         os.environ["TORCH_NCCL_BLOCKING_WAIT"] = val
         store = c10d.FileStore(self.file_name, self.world_size)
         with self.assertRaises(RuntimeError):
-            process_group = c10d.ProcessGroupNCCL(store, self.rank, self.world_size)
+            c10d.ProcessGroupNCCL(store, self.rank, self.world_size)
 
     @requires_nccl()
     @skip_if_lt_x_gpu(3)
@@ -4223,7 +4215,7 @@ class NCCLTraceTestBase(MultiProcessTestCase):
     def _join_processes(self, fn):
         # We need to patch sys.exit() as skip_if will use sys.exit() and
         # the exit code from the this process will not be catched.
-        with mock.patch("sys.exit") as exit_mock:
+        with mock.patch("sys.exit"):
             fn()
         super()._join_processes(fn)
 
@@ -4231,7 +4223,7 @@ class NCCLTraceTestBase(MultiProcessTestCase):
         proc = torch.multiprocessing.get_context("spawn").Process
         self.children_pipes = []
         parent_pipes = []
-        for i in range(self.world_size):
+        for _ in range(self.world_size):
             parent_conn, child_conn = torch.multiprocessing.Pipe()
             self.children_pipes.append(child_conn)
             parent_pipes.append(parent_conn)
@@ -4346,7 +4338,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
             pg._enable_collectives_timing()
         device = self.local_device
         a = torch.full((3, 4), float(self.rank), device=device)
-        for i in range(2):
+        for _ in range(2):
             f = pg.allreduce(a)
         f.wait()
         torch.cuda.synchronize(device=device)
@@ -4372,7 +4364,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
             pg._enable_collectives_timing()
         device = self.local_device
         a = torch.full((3, 4), float(self.rank), device=device)
-        for i in range(2):
+        for _ in range(2):
             f = pg.allreduce(a)
         f.wait()
         torch.cuda.synchronize(device=device)
@@ -4420,7 +4412,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
         pg = self._create_process_group_nccl()
         device = self.local_device
         a = torch.full((3, 4), float(self.rank), device=device)
-        for i in range(2):
+        for _ in range(2):
             f = pg.allreduce(a)
         f.wait()
         torch.cuda.synchronize(device=device)
@@ -4436,7 +4428,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
         pg = self._create_process_group_nccl()
         device = self.local_device
         a = torch.full((3, 4), float(self.rank), device=device)
-        for i in range(2):
+        for _ in range(2):
             # test some other primitives to make sure
             # their strings are valid
             xs = [torch.ones(3, 4, device=device)]
@@ -4496,7 +4488,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
         pg = self._create_process_group_nccl()
         device = self.local_device
         # send more works than the buffer size to overwrite the previous entry
-        for i in range(12):
+        for _ in range(12):
             a = [torch.ones(3, 4, device=device)]
             pg.broadcast(a).wait()
         torch.cuda.synchronize(device=device)
@@ -4611,7 +4603,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
                 th.start()
                 # fill the cuda buffer, at around 1024 events
                 # this will stall
-                for i in range(2000):
+                for _ in range(2000):
                     a = a + a
                 th.join()
             else:
@@ -4646,7 +4638,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
 
         num_coalesced_ops = 20
         ops_per_coalesce = len(op_sizes_per_coalesce)
-        for i in range(num_coalesced_ops):
+        for _ in range(num_coalesced_ops):
             ops = []
             for input_sizes in op_sizes_per_coalesce:
                 tensor = torch.zeros(input_sizes).to(self.local_device)
@@ -4745,7 +4737,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
             pg._enable_collectives_timing()
         num_repeats = 10
         ops_per_repeat = len(op_sizes)
-        for i in range(num_repeats):
+        for _ in range(num_repeats):
             for input_sizes in op_sizes:
                 tensor = torch.zeros(input_sizes).to(self.local_device)
                 if self.rank == 0:
@@ -5047,7 +5039,7 @@ class NcclErrorDumpTest(NCCLTraceTestBase):
                 # Block the current stream on the NCCL stream
                 work.wait()
                 # Run some GPU operations
-                a = torch.rand(10).cuda(self.rank)
+                torch.rand(10).cuda(self.rank)
         elif self.rank == 1:
             # Clean up structures (ex: files for FileStore before going down)
             del process_group
@@ -5108,7 +5100,6 @@ class ProcessGroupNCCLLargerScaleTest(MultiProcessTestCase):
 
         tensor = torch.full((1,), self.rank).cuda(device)
         ng1 = c10d.split_group(pg, [[0, 1], [2, 3, 4, 5, 6, 7]])
-        backend1 = ng1._get_backend(torch.device(device))
 
         # comm split happens eagerly since device_id is passed to init_process_group.
         self.assertEqual(backend.comm_split_count(), 1)
diff --git a/test/distributed/test_c10d_ops_nccl.py b/test/distributed/test_c10d_ops_nccl.py
index f0249877c63..73bad39956c 100644
--- a/test/distributed/test_c10d_ops_nccl.py
+++ b/test/distributed/test_c10d_ops_nccl.py
@@ -162,7 +162,6 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest):
     @requires_nccl()
     @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs")
     def test_allreduce_ops(self):
-        device_count = torch.cuda.device_count()
         pg = self.pg
         local_device_id = self.rank_to_GPU[self.rank][0]
 
@@ -303,9 +302,8 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest):
         pg = self.pg
         rank = self.rank_to_GPU[self.rank][0]
         with torch.cuda.device(rank):
-            for i in range(10):
+            for _ in range(10):
                 xs = [torch.FloatTensor([1]).cuda(rank)]
-                ys = [torch.FloatTensor([4]).cuda(rank)]
                 for _ in range(30):
                     pg.allreduce(xs[0]).wait()
 
@@ -410,7 +408,7 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest):
             output_tensors.append([t.cuda(device=gpu) for t in output_per_gpu])
             expected_output.append([t.cuda(device=gpu) for t in expected_per_gpu])
 
-        result = allgather(output_tensors, tensors)
+        allgather(output_tensors, tensors)
 
         # Verification
         self.assertEqual(output_tensors, expected_output)
@@ -558,7 +556,7 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest):
 
         # init output
         output_ts = []
-        for rank in range(self.world_size):
+        for _ in range(self.world_size):
             output_ts.append(torch.tensor([-1]).cuda(device_id))
 
         with self.assertRaisesRegex(ValueError, "invalid root rank"):
@@ -914,7 +912,6 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest):
     @requires_nccl()
     @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs")
     def test_send_recv(self):
-        pg = self.pg
         device = self.rank_to_GPU[self.rank][0]
 
         # Generate the same random tensor
@@ -930,7 +927,6 @@ class ProcessGroupNCCLOpTest(MultiProcContinousTest):
     @requires_nccl()
     @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs")
     def test_send_recv_complex(self):
-        pg = self.pg
         device = self.rank_to_GPU[self.rank][0]
 
         # Generate the same random tensor
diff --git a/test/distributed/test_c10d_ucc.py b/test/distributed/test_c10d_ucc.py
index b7f778656d6..de61a4a8739 100644
--- a/test/distributed/test_c10d_ucc.py
+++ b/test/distributed/test_c10d_ucc.py
@@ -755,7 +755,7 @@ class DistributedDataParallelTest(
         ddp_state_dict = torch.load(checkpoint_path, map_location=map_location)
 
         for model in [ddp_withload, model_withload]:
-            for p in ddp_withload.parameters():
+            for p in model.parameters():
                 with torch.no_grad():
                     p.zero_()
         ddp_withload.load_state_dict(ddp_state_dict)
diff --git a/test/distributed/test_collective_utils.py b/test/distributed/test_collective_utils.py
index 727850680aa..ee93d56efb8 100644
--- a/test/distributed/test_collective_utils.py
+++ b/test/distributed/test_collective_utils.py
@@ -57,7 +57,7 @@ class TestCollectiveUtils(MultiProcessTestCase):
         Ensure broadcast has no dependency on torch.distributed when run in single process.
         """
         func = mock.MagicMock()
-        res = broadcast(data_or_fn=func, rank=0)
+        broadcast(data_or_fn=func, rank=0)
         func.assert_called_once()
 
     def test_broadcast_result_raises_exceptions_from_func(
@@ -98,7 +98,7 @@ class TestCollectiveUtils(MultiProcessTestCase):
         Ensure all_gather has no dependency on torch.distributed when run in single process.
         """
         func = mock.MagicMock()
-        res = all_gather(data_or_fn=func)
+        all_gather(data_or_fn=func)
         func.assert_called_once()
 
     def test_all_gather_result_raises_exceptions_from_func(
diff --git a/test/distributed/test_data_parallel.py b/test/distributed/test_data_parallel.py
index 9ef576ec1df..26f64df90d9 100644
--- a/test/distributed/test_data_parallel.py
+++ b/test/distributed/test_data_parallel.py
@@ -791,8 +791,8 @@ class TestDataParallel(TestCase):
                                 ),
                                 named_msg,
                             )
-                            for j, ((param_name, p), p_dp) in enumerate(
-                                zip(m_child.named_parameters(), m_dp_child.parameters())
+                            for (param_name, p), p_dp in zip(
+                                m_child.named_parameters(), m_dp_child.parameters()
                             ):
                                 named_msg = (
                                     layer_name + "." + param_name + " " + iter_msg
diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py
index 54665934e52..b39ffd375f2 100644
--- a/test/distributed/test_device_mesh.py
+++ b/test/distributed/test_device_mesh.py
@@ -88,7 +88,7 @@ class DeviceMeshTest(DTensorTestBase):
     def test_assert_invalid_mesh_tensor(self):
         mesh = torch.arange(self.world_size).to(self.rank)
         with self.assertRaises(ValueError):
-            device_mesh = DeviceMesh(self.device_type, mesh)
+            DeviceMesh(self.device_type, mesh)
 
     @with_comms()
     def test_2d_mesh_non_eager_init_subgroup(self):
@@ -144,7 +144,7 @@ class DeviceMeshTest(DTensorTestBase):
             RuntimeError,
             "Optional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.",
         ):
-            local_rank = mesh_2d.get_local_rank()
+            mesh_2d.get_local_rank()
 
     @with_comms
     def test_get_local_rank(self):
@@ -258,7 +258,7 @@ class DeviceMeshTest(DTensorTestBase):
         ):
             # test init_device_mesh with an invalid device type that contains a GPU index
             mesh_shape = (2, self.world_size // 2)
-            mesh_2d = init_device_mesh(
+            init_device_mesh(
                 "cuda:0", mesh_shape=mesh_shape, mesh_dim_names=("dp", "tp")
             )
 
@@ -453,7 +453,7 @@ class InitDeviceMeshTest(DTensorTestBase):
             RuntimeError,
             "Each mesh_dim_name must be unique.",
         ):
-            mesh = init_device_mesh(
+            init_device_mesh(
                 self.device_type,
                 (2, 4),
                 mesh_dim_names=["dp", "dp"],
@@ -465,7 +465,7 @@ class InitDeviceMeshTest(DTensorTestBase):
             RuntimeError,
             "mesh_shape and mesh_dim_names should have same length!",
         ):
-            mesh = init_device_mesh(
+            init_device_mesh(
                 self.device_type,
                 (8,),
                 mesh_dim_names=["dp", "tp"],
@@ -483,7 +483,7 @@ class TestDeviceMeshGetItem(DTensorTestBase):
             RuntimeError, "Cannot slice a DeviceMesh without mesh_dim_names!"
         ):
             mesh = init_device_mesh(self.device_type, (2, 4))
-            child_mesh = mesh["DP"]
+            mesh["DP"]
 
     @with_comms
     def test_raises_invalid_mesh_dim_name(self):
@@ -493,7 +493,7 @@ class TestDeviceMeshGetItem(DTensorTestBase):
             mesh = init_device_mesh(
                 self.device_type, (2, 4), mesh_dim_names=mesh_dim_names
             )
-            child_mesh = mesh[child_mesh_dim_name]
+            mesh[child_mesh_dim_name]
 
     @with_comms
     def test_get_item_2d(self):
@@ -514,7 +514,6 @@ class TestDeviceMeshGetItem(DTensorTestBase):
         tp_group_idx = self.rank // 4
         self.assertEqual(tp_mesh.mesh, pg_ranks_by_dim_name["TP"][tp_group_idx])
 
-        dp_mesh = mesh_2d["DP"]
         dp_group_idx = self.rank % 4
         self.assertEqual(mesh_2d["DP"].mesh, pg_ranks_by_dim_name["DP"][dp_group_idx])
 
@@ -564,17 +563,15 @@ class TestDeviceMeshGetItem(DTensorTestBase):
     def test_cache_and_reuse_submesh_slice_result(self):
         mesh = init_device_mesh(self.device_type, (2, 4), mesh_dim_names=("dp", "tp"))
 
-        dp_mesh = mesh["dp"]
         ref_pg_count = _world.group_count
 
         # When we call the "dp" slice second time, it should not create any new pg.
         # As we are just using the cached result so the pg count should be the same.
-        dp_mesh_2 = mesh["dp"]
         self.assertEqual(ref_pg_count, _world.group_count)
 
         # When we call the "tp" slice, it should not create a new pg, as the "tp" slice would
         # just reuse the parent mesh pg.
-        tp_mesh = mesh["tp"]
+        mesh["tp"]
         self.assertEqual(_world.group_count, ref_pg_count)
 
     @with_comms
@@ -603,7 +600,7 @@ class TestDeviceMeshGetItem(DTensorTestBase):
             KeyError,
             "Invalid mesh_dim_names",
         ):
-            cp_dp_mesh = mesh_3d["cp", "dp"]
+            mesh_3d["cp", "dp"]
 
     @with_comms
     def test_flatten_mesh_3d(self):
@@ -767,9 +764,9 @@ class TestMeshEnv(DTensorTestBase):
         )
 
         with FakeTensorMode():
-            dp_mesh = mesh_2d["DP"]
-            tp_mesh = mesh_2d["TP"]
-            dp_tp_mesh = mesh_2d["DP", "TP"]
+            mesh_2d["DP"]
+            mesh_2d["TP"]
+            mesh_2d["DP", "TP"]
 
 
 class DeviceMeshCollectiveTest(DTensorTestBase):
diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py
index cdf834cb69e..df132dae265 100644
--- a/test/distributed/test_dynamo_distributed.py
+++ b/test/distributed/test_dynamo_distributed.py
@@ -421,7 +421,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase):
                 self.weight2 = nn.Parameter(torch.randn(512, 512))
 
             def forward(self, x, y):
-                u0, u1 = y.tolist()
+                u0, _ = y.tolist()
                 x = torch.cat([x, x])
                 y = x @ self.weight1
                 z = (x + y @ self.weight2) * u0
@@ -442,7 +442,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase):
                 self.weight2 = nn.Parameter(torch.randn(512, 512))
 
             def forward(self, x, y):
-                u0, u1 = y.tolist()
+                u0, _ = y.tolist()
                 a = torch.ones(u0)
                 x = torch.cat([x, x])
                 y = x @ self.weight1
@@ -466,7 +466,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase):
 
             def forward(self, x, y):
                 # partition one (contains the u0 def)
-                u0, u1 = y.tolist()
+                u0, _ = y.tolist()
                 x = torch.cat([x, x])
                 y1 = x @ self.weight1
                 # partition two (contains the variable)
@@ -511,7 +511,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase):
             ):
                 super().__init__()
                 layers = []
-                for l in range(2):
+                for _ in range(2):
                     layer = nn.ModuleList(
                         [
                             nn.LayerNorm(96),
@@ -529,7 +529,7 @@ class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase):
                 for m in self.layers:
                     x = x.reshape(B * F, T, H)
                     x = m[0](x)
-                    x, attn = m[1].forward(x, x, x)
+                    x, _ = m[1].forward(x, x, x)
                     x = x.reshape(B, F, T, H)
                 return x
 
@@ -937,8 +937,8 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase):
 
             @torch.compile()
             def f(x, y):
-                zx = x.shape
-                zy = y.shape
+                zx = x.shape  # noqa: F841
+                zy = y.shape  # noqa: F841
                 return x.sum() + y.sum()
 
             if self.rank == 0:
@@ -967,10 +967,10 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase):
 
             @torch.compile()
             def f(x, y):
-                z = y
+                z = y  # noqa: F841
                 print("woof")
-                zx = x.shape
-                zy = y.shape
+                zx = x.shape  # noqa: F841
+                zy = y.shape  # noqa: F841
                 return x.sum() + y.sum()
 
             if self.rank == 0:
@@ -999,8 +999,8 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase):
 
             @torch.compile()
             def f(x, y):
-                zx = x.shape
-                zy = y.shape
+                zx = x.shape  # noqa: F841
+                zy = y.shape  # noqa: F841
                 return x.sum() + y.sum()
 
             if self.rank == 0:
@@ -1405,7 +1405,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):
         model = DDP(model, device_ids=self.device_ids)
 
         hidden_states = torch.randn(B, S, H * D).to(device)
-        attention_scores = model(hidden_states)
+        model(hidden_states)
         torch.cuda.synchronize()
 
     @patch.object(config, "optimize_ddp", True)
@@ -1461,7 +1461,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):
         model = DDP(model, device_ids=self.device_ids)
 
         hidden_states = torch.randn(B, S, H * D).to(device)
-        attention_scores = model(hidden_states)
+        model(hidden_states)
         torch.cuda.synchronize()
 
     @patch.object(config, "optimize_ddp", True)
@@ -1723,7 +1723,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):
 
     def test_fsdp_orig_params_assert(self):
         # Test with basic FSDP wrapping (outer wrap around whole model)
-        m, inputs, correct_outputs = get_model(f"cuda:{self.rank}")
+        m, inputs, _ = get_model(f"cuda:{self.rank}")
         fsdp_m = FSDP(m, use_orig_params=False)
         fsdp_m = torch.compile(fsdp_m)
         self.assertRaisesRegex(
diff --git a/test/distributed/test_functional_api.py b/test/distributed/test_functional_api.py
index e401076de7a..b31fdeb94e6 100644
--- a/test/distributed/test_functional_api.py
+++ b/test/distributed/test_functional_api.py
@@ -130,7 +130,7 @@ class TestExpand(MultiThreadedTestCase):
         tag, rankset, group_size = ft_c._expand_group(dist.group.WORLD, "bla")
         self.assertEqual("bla", tag)
 
-        my_pg, others = new_subgroups(group_size=2)
+        my_pg, _ = new_subgroups(group_size=2)
         tag, rankset, group_size = ft_c._expand_group(my_pg)
         self.assertEqual(c10d._get_group_tag(my_pg), tag)
         self.assertEqual(dist.get_process_group_ranks(my_pg), rankset)
@@ -588,7 +588,7 @@ class TestCollectivesWithDistributedBackend(DistributedTestBase):
         def allreduce(t, pg):
             return ft_c.all_reduce(t, "sum", pg)
 
-        compiled_allreduce = torch.compile(allreduce, fullgraph=True)
+        compiled_allreduce = torch.compile(allreduce, fullgraph=True)  # noqa: F841
         dist.init_process_group(
             backend="fake",
             rank=0,
@@ -615,9 +615,7 @@ class TestCollectivesWithDistributedBackend(DistributedTestBase):
                 return batch * 5
 
         compiled_func = torch.compile(func)
-        ret = compiled_func(
-            torch.ones((100,), device=device), self.process_group, self.rank
-        )
+        compiled_func(torch.ones((100,), device=device), self.process_group, self.rank)
         dist.barrier()
 
 
@@ -715,7 +713,7 @@ class TestFunctionalAutograd(MultiThreadedTestCase):
             out = compiled(t, self.world_size)
             out.backward()
 
-        res, codes = run_and_get_code(run_with_backward)
+        _, codes = run_and_get_code(run_with_backward)
         for code in codes:
             FileCheck().check_count(
                 "_c10d_functional.all_to_all_single.default", 1, exactly=True
diff --git a/test/distributed/test_inductor_collectives.py b/test/distributed/test_inductor_collectives.py
index 92a2fd6ee2c..31d65b1c592 100644
--- a/test/distributed/test_inductor_collectives.py
+++ b/test/distributed/test_inductor_collectives.py
@@ -411,7 +411,7 @@ class TestCollectivesMultiProc(DynamoDistributedMultiProcTestCase):
                 y = self.emb(x)
                 last_dim = y.dim() - 1
                 y = y.transpose_(0, last_dim).contiguous()
-                res = _functional_collectives.all_gather_tensor(y, 0, ranks, tag)
+                _functional_collectives.all_gather_tensor(y, 0, ranks, tag)
                 out = y.transpose_(0, last_dim).contiguous()
                 return out
 
diff --git a/test/distributed/test_launcher.py b/test/distributed/test_launcher.py
index e2bd1a510d1..decae9d1c7c 100644
--- a/test/distributed/test_launcher.py
+++ b/test/distributed/test_launcher.py
@@ -35,7 +35,6 @@ class TestDistributedLaunch(TestCase):
     def test_launch_user_script(self):
         nnodes = 1
         nproc_per_node = 4
-        world_size = nnodes * nproc_per_node
         sock = get_socket_with_port()
         with closing(sock):
             master_port = sock.getsockname()[1]
diff --git a/test/distributed/test_store.py b/test/distributed/test_store.py
index b2976abd087..bbd075e93a6 100644
--- a/test/distributed/test_store.py
+++ b/test/distributed/test_store.py
@@ -553,7 +553,7 @@ class LibUvTCPStoreTest(TCPStoreTest):
         )
 
         with self.assertRaisesRegex(NotImplementedError, err_msg_reg):
-            store = dist.TCPStore(
+            dist.TCPStore(
                 addr,
                 port,
                 1,
@@ -748,7 +748,7 @@ class RendezvousTCPTest(TestCase):
         url = self.create_tcp_url()
         test_store_timeout = timedelta(seconds=0.1)
         gen0 = dist.rendezvous(url + "&rank=0", timeout=timedelta(seconds=10))
-        store0, rank0, size0 = next(gen0)
+        store0, _, _ = next(gen0)
         store0.set_timeout(test_store_timeout)
         # this should time out in 0.1s. If the timeout passed into rendezvous was
         # not respected, it will take much longer to timeout.
@@ -766,7 +766,7 @@ class RendezvousTCPTest(TestCase):
         url = self.create_tcp_url()
         test_store_timeout = timedelta(seconds=0.1)
         gen0 = dist.rendezvous(url + "&rank=0", timeout=timedelta(seconds=10))
-        store0, rank0, size0 = next(gen0)
+        store0, _, _ = next(gen0)
         store0.set_timeout(test_store_timeout)
         # this should time out in 10s. If the timeout passed into rendezvous was
         # not respected, it will take much longer to timeout.
@@ -787,7 +787,7 @@ class RendezvousTCPTest(TestCase):
     def test_tcp_store_url_with_libuv(self):
         url = self.create_tcp_url()
         gen0 = dist.rendezvous(url + "&rank=0&use_libuv=1")
-        store0, rank0, size0 = next(gen0)
+        store0, _, _ = next(gen0)
         self.assertTrue(store0.libuvBackend)
 
 
@@ -1078,7 +1078,7 @@ class TestClientProtocol(TestCase):
         thread = threading.Thread(target=listen)
         thread.start()
 
-        store = dist.TCPStore(
+        dist.TCPStore(
             host_name="localhost",
             port=port,
             world_size=2,
diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py
index 72d53b94cc1..3c33567c795 100644
--- a/test/distributed/test_symmetric_memory.py
+++ b/test/distributed/test_symmetric_memory.py
@@ -332,7 +332,6 @@ class SymmetricMemoryTest(MultiProcessTestCase):
         K = 32
         group = dist.group.WORLD
         rank = self.rank
-        world_size = self.world_size
 
         torch.manual_seed(42 + rank)
         A_shard = torch.rand(BATCH, M // self.world_size, K, device="cuda")
@@ -428,7 +427,6 @@ class SymmetricMemoryTest(MultiProcessTestCase):
         K = 32
         group = dist.group.WORLD
         rank = self.rank
-        world_size = self.world_size
 
         if gather_dim == 0:
             leading_dims = (BATCH // self.world_size, M)
@@ -513,7 +511,6 @@ class SymmetricMemoryTest(MultiProcessTestCase):
         K = 32
         group = dist.group.WORLD
         rank = self.rank
-        world_size = self.world_size
 
         torch.manual_seed(42 + rank)
         A = torch.rand(BATCH, M, K, device="cuda")
@@ -546,7 +543,6 @@ class SymmetricMemoryTest(MultiProcessTestCase):
         K = 32
         group = dist.group.WORLD
         rank = self.rank
-        world_size = self.world_size
 
         torch.manual_seed(42 + rank)
         A = torch.rand(BATCH, M, K, device="cuda").to(torch.float8_e4m3fn)
diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py
index 8de1c1dce87..1756bf5afa8 100644
--- a/test/distributions/test_distributions.py
+++ b/test/distributions/test_distributions.py
@@ -1314,7 +1314,7 @@ class TestDistributions(DistributionsTestCase):
         if not msk.all():
             counts = np.concatenate([counts[msk], np.sum(counts[~msk], keepdims=True)])
             pmf = np.concatenate([pmf[msk], np.sum(pmf[~msk], keepdims=True)])
-        chisq, p = scipy.stats.chisquare(counts, pmf * num_samples)
+        _, p = scipy.stats.chisquare(counts, pmf * num_samples)
         self.assertGreater(p, failure_rate, message)
 
     def _check_enumerate_support(self, dist, examples):
@@ -1912,9 +1912,7 @@ class TestDistributions(DistributionsTestCase):
     @set_default_dtype(torch.double)
     def test_one_hot_categorical_2d(self):
         probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]]
-        probabilities_1 = [[1.0, 0.0], [0.0, 1.0]]
         p = torch.tensor(probabilities, requires_grad=True)
-        s = torch.tensor(probabilities_1, requires_grad=True)
         self.assertEqual(OneHotCategorical(p).sample().size(), (2, 3))
         self.assertEqual(
             OneHotCategorical(p).sample(sample_shape=(3, 4)).size(), (3, 4, 2, 3)
@@ -2074,13 +2072,11 @@ class TestDistributions(DistributionsTestCase):
     @set_default_dtype(torch.double)
     def test_relaxed_one_hot_categorical_2d(self):
         probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]]
-        probabilities_1 = [[1.0, 0.0], [0.0, 1.0]]
         temp = torch.tensor([3.0], requires_grad=True)
         # The lower the temperature, the more unstable the log_prob gradcheck is
         # w.r.t. the sample. Values below 0.25 empirically fail the default tol.
         temp_2 = torch.tensor([0.25], requires_grad=True)
         p = torch.tensor(probabilities, requires_grad=True)
-        s = torch.tensor(probabilities_1, requires_grad=True)
         self.assertEqual(RelaxedOneHotCategorical(temp, p).sample().size(), (2, 3))
         self.assertEqual(
             RelaxedOneHotCategorical(temp, p).sample(sample_shape=(3, 4)).size(),
@@ -3939,7 +3935,7 @@ class TestDistributions(DistributionsTestCase):
         for dim in range(2, 5):
             log_probs = []
             lkj = LKJCholesky(dim, concentration=1.0, validate_args=True)
-            for i in range(2):
+            for _ in range(2):
                 sample = lkj.sample()
                 sample_tril = tril_matrix_to_vec(sample, diag=-1)
                 log_prob = lkj.log_prob(sample)
@@ -6241,7 +6237,7 @@ class TestLazyLogitsInitialization(DistributionsTestCase):
             except NotImplementedError:
                 pass
             self.assertNotIn("probs", dist.__dict__, msg=message)
-            batch_shape, event_shape = dist.batch_shape, dist.event_shape
+            dist.batch_shape, dist.event_shape
             self.assertNotIn("probs", dist.__dict__, msg=message)
 
     def test_lazy_probs_initialization(self):
@@ -6258,7 +6254,7 @@ class TestLazyLogitsInitialization(DistributionsTestCase):
             except NotImplementedError:
                 pass
             self.assertNotIn("logits", dist.__dict__, msg=message)
-            batch_shape, event_shape = dist.batch_shape, dist.event_shape
+            dist.batch_shape, dist.event_shape
             self.assertNotIn("logits", dist.__dict__, msg=message)
 
 
@@ -6565,6 +6561,7 @@ class TestFunctors(DistributionsTestCase):
         expected_jac = sum(
             [t1.log_abs_det_jacobian(x1, y1), t2.log_abs_det_jacobian(x2, y2)]
         )
+        self.assertEqual(actual_jac, expected_jac)
 
     def test_stack_transform(self):
         x1 = -1 * torch.arange(1, 101, dtype=torch.float)
@@ -6628,18 +6625,18 @@ class TestValidation(DistributionsTestCase):
                 for v in torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0]):
                     # samples with incorrect shape must throw ValueError only
                     try:
-                        log_prob = d_val.log_prob(v)
+                        d_val.log_prob(v)
                     except ValueError:
                         pass
                     # get sample of correct shape
                     val = torch.full(d_val.batch_shape + d_val.event_shape, v)
                     # check samples with incorrect support
                     try:
-                        log_prob = d_val.log_prob(val)
+                        d_val.log_prob(val)
                     except ValueError as e:
                         if e.args and "must be within the support" in e.args[0]:
                             try:
-                                log_prob = d_nonval.log_prob(val)
+                                d_nonval.log_prob(val)
                             except RuntimeError:
                                 pass
 
diff --git a/test/dynamo/test_activation_checkpointing.py b/test/dynamo/test_activation_checkpointing.py
index d02140c6e2a..b41bc285a2e 100644
--- a/test/dynamo/test_activation_checkpointing.py
+++ b/test/dynamo/test_activation_checkpointing.py
@@ -1260,7 +1260,7 @@ Non-primal fwd outputs from model w/o backward hook: {mod_no_hook_fwd_outputs_no
                 super().__init__()
 
             def forward(self, x, ys):
-                a = torch.sin(x)
+                a = torch.sin(x)  # noqa: F841
                 b = torch.cos(ys[0])
                 c = torch.cos(ys[1])
                 return (x, [b, c])
diff --git a/test/dynamo/test_aot_autograd.py b/test/dynamo/test_aot_autograd.py
index f8cc7e16da2..9f76483f2d5 100644
--- a/test/dynamo/test_aot_autograd.py
+++ b/test/dynamo/test_aot_autograd.py
@@ -453,7 +453,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         a = torch.randn(3, 3, requires_grad=True)
         b = torch.randn(3, 3, requires_grad=True)
         a1, a2 = a.clone(), a.clone()
-        b1, b2 = b.clone(), b.clone()
+        _, b2 = b.clone(), b.clone()
 
         failure_reason = None
 
@@ -481,7 +481,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         c = torch.randn(3, 3, requires_grad=True)
         d = torch.randn(3, 3, requires_grad=True)
         c3, c4 = c.clone(), c.clone()
-        d3, d4 = d.clone(), d.clone()
+        _, d4 = d.clone(), d.clone()
 
         f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F())
         f(c3, c3, 3, 3)
@@ -507,7 +507,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         b = torch.randn(3, 3, requires_grad=True)
         z = a
         a1, a2 = a.clone(), a.clone()
-        b1, b2 = b.clone(), b.clone()
+        _, b2 = b.clone(), b.clone()
 
         failure_reason = None
 
@@ -543,7 +543,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         a = torch.randn(3, 3, requires_grad=True)
         b = torch.randn(3, 3, requires_grad=True)
         a1, a2 = a.clone(), a.clone()
-        b1, b2 = b.clone(), b.clone()
+        _, b2 = b.clone(), b.clone()
 
         failure_reason = None
 
@@ -571,7 +571,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         c = torch.randn(3, 3, requires_grad=True)
         d = torch.randn(3, 3, requires_grad=True)
         c3, c4 = c.clone(), c.clone()
-        d3, d4 = d.clone(), d.clone()
+        _, d4 = d.clone(), d.clone()
 
         f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F())
         f([3, 2, 1], [4, 5, 6], c3, c3)
@@ -593,7 +593,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         a = torch.randn(3, 3, requires_grad=True)
         b = torch.randn(3, 3, requires_grad=True)
         a1, a2 = a.clone(), a.clone()
-        b1, b2 = b.clone(), b.clone()
+        _, b2 = b.clone(), b.clone()
 
         failure_reason = None
 
@@ -621,7 +621,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         c = torch.randn(3, 3, requires_grad=True)
         d = torch.randn(3, 3, requires_grad=True)
         c3, c4 = c.clone(), c.clone()
-        d3, d4 = d.clone(), d.clone()
+        _, d4 = d.clone(), d.clone()
 
         f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F())
         f(c3, c3)
@@ -642,7 +642,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         a = torch.randn(3, 3, requires_grad=True)
         b = torch.randn(3, 3, requires_grad=True)
         a1, a2, a3, a4 = a.clone(), a.clone(), a.clone(), a.clone()
-        b1, b2, b3, b4 = b.clone(), b.clone(), b.clone(), b.clone()
+        _, b2, b3, b4 = b.clone(), b.clone(), b.clone(), b.clone()
 
         failure_reason = None
 
@@ -670,7 +670,7 @@ class AotAutogradFallbackTests(torch._inductor.test_case.TestCase):
         c = torch.randn(3, 3, requires_grad=True)
         d = torch.randn(3, 3, requires_grad=True)
         c3, c4 = c.clone(), c.clone()
-        d3, d4 = d.clone(), d.clone()
+        _, d4 = d.clone(), d.clone()
 
         f = torch._dynamo.optimize(cc, guard_fail_fn=guard_fail_fn)(F())
         f(a3, b3, c3, c3)
@@ -1017,7 +1017,7 @@ SeqNr|OrigAten|SrcFn|FwdSrcFn
             activities=[torch.profiler.ProfilerActivity.CPU],
             record_shapes=True,
         ) as kineto_prof:
-            res = model_instance(*args)
+            model_instance(*args)
         bwd_set = set()
         prof_str = "SeqNr|Thread|FwdThread|Name\n"
         for event in kineto_prof.events():
@@ -1191,7 +1191,7 @@ SeqNr|OrigAten|SrcFn|FwdSrcFn
 
             x = torch.randn(3, requires_grad=True)
             with self.assertRaisesRegex(RuntimeError, "Cannot access data pointer"):
-                y = torch.compile(f, backend="aot_eager", fullgraph=True)(x)
+                torch.compile(f, backend="aot_eager", fullgraph=True)(x)
             self.assertTrue(backward_called)
 
     # We don't know how to catch multiple mutations to the same memory location
diff --git a/test/dynamo/test_aot_autograd_cache.py b/test/dynamo/test_aot_autograd_cache.py
index 6dba4f0b9ee..228c45a4ff6 100644
--- a/test/dynamo/test_aot_autograd_cache.py
+++ b/test/dynamo/test_aot_autograd_cache.py
@@ -157,7 +157,7 @@ class AOTAutogradCacheTests(InductorTestCase):
 
         with torch.autograd._force_original_view_tracking(True):
             compiled_fn = torch.compile(fn)
-            out = compiled_fn(torch.rand(2, 3))
+            compiled_fn(torch.rand(2, 3))
 
         self.assertEqual(counters["aot_autograd"]["autograd_cache_miss"], 1)
         self.assertEqual(counters["aot_autograd"]["autograd_cache_bypass"], 1)
@@ -654,7 +654,7 @@ class AOTAutogradCachePicklerTests(torch._dynamo.test_case.TestCase):
         def fn(x):
             return x.sin().cos()
 
-        def fn2(x):
+        def fn2(x):  # noqa: F841
             y = x.sin()
             z = y.cos()
             return z
diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py
index 3b2ee9ad8d6..5ba4c71b3ea 100644
--- a/test/dynamo/test_autograd_function.py
+++ b/test/dynamo/test_autograd_function.py
@@ -760,7 +760,7 @@ class GraphModule(torch.nn.Module):
             def backward(ctx, gO):
                 return torch.tensor(float("nan")).expand(10, 10)
 
-        def run_fn(a):
+        def run_fn(a):  # noqa: F841
             out = MyFunc2.apply(a)
             return out.sum()
 
@@ -837,11 +837,11 @@ class GraphModule(torch.nn.Module):
 
             x = torch.randn(5, 5, requires_grad=True)
             y = torch.randn(5, 5, requires_grad=True)
-            q, p = Identity.apply(x, y)
+            Identity.apply(x, y)
 
             a = torch.rand(1, 2)
             b = torch.rand(1, requires_grad=True)
-            view_a = MyFn.apply(a)
+            MyFn.apply(a)
 
             a = torch.ones(2, requires_grad=True)
             b = torch.ones(2, requires_grad=True)
@@ -860,7 +860,7 @@ class GraphModule(torch.nn.Module):
             MyFn2.apply(c, d)
 
             base = torch.rand(10, requires_grad=True)
-            foo = MyFn3.apply(base, False)
+            MyFn3.apply(base, False)
 
         test()
         opt_test = torch.compile(test, backend="eager")
diff --git a/test/dynamo/test_backends.py b/test/dynamo/test_backends.py
index 3d4443978e5..84379aa599c 100644
--- a/test/dynamo/test_backends.py
+++ b/test/dynamo/test_backends.py
@@ -267,9 +267,8 @@ class TestCustomBackendAPI(torch._dynamo.test_case.TestCase):
         self.assertTrue(backend_run)
 
     def test_lookup_backend(self):
-        from torch._dynamo import list_backends, lookup_backend
+        from torch._dynamo import lookup_backend
 
-        backends = list_backends()
         backend_run = False
 
         def my_compiler(gm, example_inputs):
diff --git a/test/dynamo/test_backward_higher_order_ops.py b/test/dynamo/test_backward_higher_order_ops.py
index 2f48c41f7bb..14e3f2e044c 100644
--- a/test/dynamo/test_backward_higher_order_ops.py
+++ b/test/dynamo/test_backward_higher_order_ops.py
@@ -247,8 +247,6 @@ class GraphModule(torch.nn.Module):
                 with compiled_autograd._enable(compiler_fn):
                     out.backward(grad_out)
 
-            graph = None
-
 
 if __name__ == "__main__":
     from torch._dynamo.test_case import run_tests
diff --git a/test/dynamo/test_bytecode_utils.py b/test/dynamo/test_bytecode_utils.py
index 0e8b74c6fdb..fa906a2ac16 100644
--- a/test/dynamo/test_bytecode_utils.py
+++ b/test/dynamo/test_bytecode_utils.py
@@ -518,7 +518,7 @@ def fn():
         insts = bytecode_transformation.bytecode_from_template(fn, noprefix=False)
         self.assertEqual(insts[-1].opname, "NOP")
         insts_i = 0
-        for i, inst in enumerate(dis_insts):
+        for inst in dis_insts:
             if inst.opname == "RETURN_CONST":
                 self.assertEqual(insts[insts_i].opname, "LOAD_CONST")
                 insts_i += 1
@@ -538,7 +538,7 @@ def fn():
                     x = x + 1
                 except NotImplementedError:
                     x = x + 1
-                except Exception as e:
+                except Exception:
                     x = x + 1
             return x
 
diff --git a/test/dynamo/test_compiler_bisector.py b/test/dynamo/test_compiler_bisector.py
index 70ef1c12d27..a5a350c0d1a 100644
--- a/test/dynamo/test_compiler_bisector.py
+++ b/test/dynamo/test_compiler_bisector.py
@@ -43,7 +43,7 @@ class TestCompilerBisector(TestCase):
         return lib
 
     def test_bad_decomp(self):
-        mod = import_module("torch._inductor.compile_fx")
+        import_module("torch._inductor.compile_fx")
 
         def bad_exp_decomp(self, rate=1, generator=None):
             assert generator is None
@@ -86,7 +86,7 @@ class TestCompilerBisector(TestCase):
                 vq_compiled = torch.compile(vq)
                 x = torch.randn(4, 400, 256).cuda()
                 with torch._dynamo.utils.preserve_rng_state():
-                    out = vq(x)
+                    vq(x)
                 out_compiled = vq_compiled(x)
 
             return not out_compiled.isnan().any()
@@ -150,7 +150,6 @@ class TestCompilerBisector(TestCase):
         self.assertTrue("inductor_fallback_random" in out.debug_info)
 
     def test_crossref(self):
-        test_ns = "bisect_ops"
         with _scoped_library(self.test_ns, "FRAGMENT") as lib:
             lib.define("foo(Tensor x) -> Tensor")
             op = self.get_op("foo")
diff --git a/test/dynamo/test_comptime.py b/test/dynamo/test_comptime.py
index 17cf9ef13e7..15d78758fdb 100644
--- a/test/dynamo/test_comptime.py
+++ b/test/dynamo/test_comptime.py
@@ -117,7 +117,7 @@ def forward(self, L_x_ : torch.Tensor):
 
             return y + 3
 
-        def munge_disas(s):
+        def munge_disas(s):  # noqa: F841
             re.sub(
                 r"^(?: +\d+)?(?: +(-->)) \+\d+ ([A-Za-z0-9_]+)",
                 "\1 \3",
@@ -271,7 +271,7 @@ y = FakeTensor(..., size=(2,))
             y = g(y)
             return y + 3
 
-        def munge_filenames(s):
+        def munge_filenames(s):  # noqa: F841
             return re.sub(r'File "[^"]+", line \d+', 'File "X", line X', s)
 
         f(torch.randn(2))
@@ -389,7 +389,7 @@ y = FakeTensor(..., size=(2,))
         @torch.compile(backend=cnt)
         def f(x):
             y = x * 2
-            lit = 2
+            lit = 2  # noqa: F841
 
             @comptime
             def _(ctx):
diff --git a/test/dynamo/test_ctx_manager.py b/test/dynamo/test_ctx_manager.py
index e8076436c7d..a9d9606635e 100644
--- a/test/dynamo/test_ctx_manager.py
+++ b/test/dynamo/test_ctx_manager.py
@@ -268,15 +268,13 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
             cur_stream.wait_stream(new_stream)
 
             x = torch.add(x, 4)
-            is_idle = cur_stream.query()
+            cur_stream.query()
             cur_stream.synchronize()
 
             with torch.cuda.stream(new_stream):
                 x = torch.add(x, 5)
             new_stream.synchronize()
 
-            is_equal = cur_stream == new_stream
-
             x = torch.relu(x)
             x = torch.cos(x)
             return x
@@ -439,7 +437,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
             x = torch.add(x, 3)
 
             event = cur_stream.record_event()
-            is_idle = event.query()
+            event.query()
 
             new_stream.wait_event(event)
             with torch.cuda.stream(new_stream):
@@ -481,7 +479,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
             x = torch.add(x, 3)
 
             event = cur_stream.record_event()
-            is_idle = event.query()
+            event.query()
 
             new_stream.wait_event(event)
             with torch.cuda.stream(new_stream):
@@ -567,7 +565,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
         real_device = real.device
         real_dtype = real.dtype
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
         exported = graph(torch.tensor([0.5]))
         self.assertEqual(exported.device, real_device)
         self.assertEqual(exported.dtype, real_dtype)
@@ -676,7 +674,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
         real_device = real.device
         real_dtype = real.dtype
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
         exported = graph(torch.tensor([0.5]))
         self.assertEqual(exported.device, real_device)
         self.assertEqual(exported.dtype, real_dtype)
@@ -850,7 +848,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
         real_device = real.device
         real_dtype = real.dtype
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
         exported = graph(torch.tensor([0.5]))
         self.assertEqual(exported.device, real_device)
         self.assertEqual(exported.dtype, real_dtype)
@@ -876,7 +874,7 @@ class CtxManagerTests(torch._dynamo.test_case.TestCase):
         real_device = real.device
         real_dtype = real.dtype
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([[0.0, 0], [0, 0]]))
         exported = graph(torch.tensor([0.5]))
         self.assertEqual(exported.device, real_device)
         self.assertEqual(exported.dtype, real_dtype)
@@ -1297,7 +1295,7 @@ class GraphModule(torch.nn.Module):
         eager = EagerAndRecordGraphs()
         torch.compile(fn, backend=eager, fullgraph=False)(torch.randn(()))
 
-        def check_graph(actual, expected):
+        def check_graph(actual, expected):  # noqa: F841
             self.assertExpectedInline(actual, expected)
 
         graph = eager.graphs[0]
@@ -1342,7 +1340,7 @@ class GraphModule(torch.nn.Module):
             for i in range(2):
                 torch._dynamo.reset()
 
-                ctx_wrapper, mode = ctx_wrappers[i]
+                ctx_wrapper, _ = ctx_wrappers[i]
                 ctx_wrapper_inverse, mode_inverse = ctx_wrappers[(i + 1) % 2]
 
                 def fn(x):
@@ -1373,7 +1371,7 @@ class GraphModule(torch.nn.Module):
             for i in range(2):
                 torch._dynamo.reset()
 
-                ctx_wrapper, mode = ctx_wrappers[i]
+                ctx_wrapper, _ = ctx_wrappers[i]
                 ctx_wrapper_inverse, mode_inverse = ctx_wrappers[(i + 1) % 2]
 
                 def fn(x):
diff --git a/test/dynamo/test_cudagraphs.py b/test/dynamo/test_cudagraphs.py
index 58985655f72..ee34e421cbf 100644
--- a/test/dynamo/test_cudagraphs.py
+++ b/test/dynamo/test_cudagraphs.py
@@ -63,7 +63,7 @@ class TestAotCudagraphs(torch._dynamo.test_case.TestCase):
 
         @torch.compile(backend="cudagraphs")
         def fn(x, y):
-            for i in range(N_ITERS):
+            for _ in range(N_ITERS):
                 loss = model(x, y).sum()
                 loss.backward()
 
@@ -80,7 +80,7 @@ class TestAotCudagraphs(torch._dynamo.test_case.TestCase):
 
         @torch.compile(backend="cudagraphs")
         def fn(x, y):
-            for i in range(N_ITERS):
+            for _ in range(N_ITERS):
                 loss = model(x, y).sum()
                 loss.backward()
 
@@ -96,7 +96,7 @@ class TestAotCudagraphs(torch._dynamo.test_case.TestCase):
 
         @torch.compile(backend="cudagraphs")
         def fn(x, y):
-            for i in range(N_ITERS):
+            for _ in range(N_ITERS):
                 loss = model(x, y).sum()
                 loss.backward()
 
diff --git a/test/dynamo/test_debug_utils.py b/test/dynamo/test_debug_utils.py
index d4622c6e601..f86bdbb8584 100644
--- a/test/dynamo/test_debug_utils.py
+++ b/test/dynamo/test_debug_utils.py
@@ -45,7 +45,7 @@ def forward(self, x_1):
     """,  # NOQA: B950
         )
 
-        fp64_model, fp64_examples = debug_utils.cast_to_fp64(fx, (x,))
+        _, fp64_examples = debug_utils.cast_to_fp64(fx, (x,))
         self.assertEqual(fp64_examples, (x.to(torch.float64),))
 
         self.assertExpectedInline(
@@ -79,7 +79,7 @@ def forward(self, x_1):
                 _tensor_constant0
             )
             _tensor_constant0 = None
-            index: "f32[6144, 4190]" = torch.ops.aten.index.Tensor(
+            index: "f32[6144, 4190]" = torch.ops.aten.index.Tensor(  # noqa: F841
                 primals_48, [None, lift_fresh_copy]
             )
             lift_fresh_copy = None
diff --git a/test/dynamo/test_decorators.py b/test/dynamo/test_decorators.py
index bf24225f66a..bdf506416c0 100644
--- a/test/dynamo/test_decorators.py
+++ b/test/dynamo/test_decorators.py
@@ -83,7 +83,7 @@ class DecoratorTests(torch._dynamo.test_case.TestCase):
 
         # This behavior is not ideal, but supporting it would add overhead
         # to callsites of eval_frame.innermost_fn. A warning would also be very noisy.
-        w = torch._dynamo.disable(fn=wrapper, recursive=True)
+        torch._dynamo.disable(fn=wrapper, recursive=True)
 
     def test_disable_nn_modules_forward_hook(self):
         class SimpleLinear(torch.nn.Module):
@@ -543,7 +543,7 @@ class DecoratorTests(torch._dynamo.test_case.TestCase):
             return v1, v2, v3, v4, v5, v6, v7, v8, v9
 
         a, b, c = A(), B(), C()
-        v1, v2, v3, v4, v5, v6, v7, v8, v9 = fn(a, b, c)
+        v1, v2, v3, v4, v5, _, v7, v8, v9 = fn(a, b, c)
 
         self.assertEqual(v1, (A, 1))
         self.assertEqual(v2, (A, 2))
diff --git a/test/dynamo/test_exc.py b/test/dynamo/test_exc.py
index 6ae15a139e9..2a3eb5cea59 100644
--- a/test/dynamo/test_exc.py
+++ b/test/dynamo/test_exc.py
@@ -92,7 +92,7 @@ from user code:
                 raise NotImplementedError
 
             # Ensure graph break is not possible
-            for i in range(3):
+            for _ in range(3):
                 comptime(f)
 
         torch.compile(fn001, backend="eager")(torch.randn(1))
diff --git a/test/dynamo/test_exceptions.py b/test/dynamo/test_exceptions.py
index d6613d84560..40f9a3b8395 100644
--- a/test/dynamo/test_exceptions.py
+++ b/test/dynamo/test_exceptions.py
@@ -32,7 +32,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase):
             try:
                 x = torch.sin(x)
                 raise NotImplementedError
-            except (NotImplementedError, AttributeError) as e:
+            except (NotImplementedError, AttributeError):
                 x = torch.sigmoid(x)
 
             return x
@@ -89,7 +89,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase):
             try:
                 x = torch.sin(x)
                 raise NotImplementedError("Not implemented")
-            except NotImplementedError as e:
+            except NotImplementedError:
                 x = torch.sigmoid(x)
                 try:
                     x = torch.cos(x)
@@ -131,7 +131,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase):
             try:
                 x = torch.cos(x)
                 raise NotImplementedError("Not implemented")
-            except NotImplementedError as e:
+            except NotImplementedError:
                 x = torch.sigmoid(x)
                 raise
 
@@ -144,10 +144,10 @@ class ExceptionTests(torch._dynamo.test_case.TestCase):
             return x
 
         x = torch.randn(4)
-        ref = fn(x)
+        fn(x)
         # Cant use fullgraph=True because RERAISE is not supported
         opt_fn = torch.compile(fn, backend="eager")
-        res = opt_fn(x)
+        opt_fn(x)
 
     # TODO(anijain2305) - does not work with fullgraph=True
     def test_exception_with_ctx_manager(self):
@@ -157,7 +157,7 @@ class ExceptionTests(torch._dynamo.test_case.TestCase):
                 with torch.no_grad():
                     x = torch.sin(x)
                     raise NotImplementedError("Not implemented")
-            except NotImplementedError as e:
+            except NotImplementedError:
                 x = torch.sigmoid(x)
             return x
 
diff --git a/test/dynamo/test_export.py b/test/dynamo/test_export.py
index 40c549bedb0..ef537692173 100644
--- a/test/dynamo/test_export.py
+++ b/test/dynamo/test_export.py
@@ -48,9 +48,9 @@ class ExportTests(torch._dynamo.test_case.TestCase):
             lc_key = state[0]
             lc_val = state[1]
             bar = []
-            for i in range(0, 4):
+            for _ in range(0, 4):
                 bar2 = []
-                for j in range(0, 3):
+                for _ in range(0, 3):
                     bar2.append(
                         lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
                     )
@@ -96,7 +96,7 @@ class ExportTests(torch._dynamo.test_case.TestCase):
             def func(x, y):
                 return x
 
-            exported = torch._dynamo.export(func, same_signature=False)(*inps)
+            torch._dynamo.export(func, same_signature=False)(*inps)
 
     def test_no_tensor_computation(self):
         inp = [torch.randn(3)]
@@ -645,9 +645,9 @@ def forward(self, x, y):
             lc_key = state[0]
             lc_val = state[1]
             bar = []
-            for i in range(0, 4):
+            for _ in range(0, 4):
                 bar2 = []
-                for j in range(0, 3):
+                for _ in range(0, 3):
                     bar2.append(
                         lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
                     )
@@ -1394,7 +1394,7 @@ def forward(self, x, y):
 
         module = MyModule()
         real_result = module(torch.tensor([1.0, 1.0]))
-        graph, guards = torch._dynamo.export(module)(torch.tensor([1.0, 1.0]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([1.0, 1.0]))
 
         # Tensor input can be almost anything here, and the result will capture what we
         # made constant at compile time.
@@ -1418,7 +1418,7 @@ def forward(self, x, y):
 
         module = MyModule()
         real_result = module(torch.tensor([1.0, 1.0]))
-        graph, guards = torch._dynamo.export(module)(torch.tensor([1.0, 1.0]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([1.0, 1.0]))
 
         # Tensor input can be almost anything here, and the result will capture what we
         # made constant at compile time.
@@ -1442,7 +1442,7 @@ def forward(self, x, y):
 
         module = MyModule()
         real_result = module(torch.tensor([1.0, 1.0]))
-        graph, guards = torch._dynamo.export(module)(torch.tensor([1.0, 1.0]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([1.0, 1.0]))
 
         # Tensor input can be almost anything here, and the result will capture what we
         # made constant at compile time.
@@ -1464,7 +1464,7 @@ def forward(self, x, y):
 
         module = MyModule()
         real_result = module(torch.tensor([2.0, 2.0]))
-        graph, guards = torch._dynamo.export(module)(torch.tensor([2.0, 2.0]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([2.0, 2.0]))
 
         # Tensor input can be almost anything here, and the result will capture what we
         # made constant at compile time.
@@ -1493,7 +1493,7 @@ def forward(self, x, y):
         # X is negative, so .item() < 0, which means we return y
         self.assertEqual(real_result, torch.tensor([0.5]))
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([-1]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([-1]))
         result = graph(torch.tensor([2]))
         # X is positive, but we compiled helper_fn to return None, so it will still return y
         self.assertTrue(torch._dynamo.utils.same(result, real_result))
@@ -1520,7 +1520,7 @@ def forward(self, x, y):
         # X is positive, so .item() > 0, which means we return y * x
         self.assertEqual(real_result, torch.tensor([1.0]))
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([2]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([2]))
         result = graph(torch.tensor([-0.5]))
         # X is negative, but we compiled helper_fn to return x, so it will still return y * x
         self.assertTrue(torch._dynamo.utils.same(result, real_result))
@@ -1547,7 +1547,7 @@ def forward(self, x, y):
         # X is negative, so .item() < 0, which means we return y
         self.assertEqual(real_result, torch.tensor([0.5]))
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([-1]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([-1]))
         result = graph(torch.tensor([2]))
         # X is positive, but we compiled helper_fn to return None, so it will still return y
         self.assertTrue(torch._dynamo.utils.same(result, real_result))
@@ -1574,7 +1574,7 @@ def forward(self, x, y):
         # X is positive, so .item() > 0, which means we return y * x
         self.assertEqual(real_result, torch.tensor([1.0]))
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([2]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([2]))
         result = graph(torch.tensor([-0.5]))
         # X is negative, but we compiled helper_fn to return x, so it will still return y * x
         self.assertTrue(torch._dynamo.utils.same(result, real_result))
@@ -1601,7 +1601,7 @@ def forward(self, x, y):
         # X is positive, so .item() > 0, which means we return y * x
         self.assertEqual(real_result, torch.tensor([1.0]))
 
-        graph, guards = torch._dynamo.export(module)(torch.tensor([2]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([2]))
         result = graph(torch.tensor([-0.5]))
         # X is negative, but we compiled helper_fn to return x, so it will still return y * x
         self.assertTrue(torch._dynamo.utils.same(result, real_result))
@@ -1622,7 +1622,7 @@ def forward(self, x, y):
         module = MyModule()
         module.val = "A"
         resA = module(torch.tensor([2]))
-        graph, guards = torch._dynamo.export(module)(torch.tensor([2]))
+        graph, _ = torch._dynamo.export(module)(torch.tensor([2]))
         module.val = "B"
         resB = graph(torch.tensor([2]))
         self.assertTrue(torch._dynamo.utils.same(resA, resB))
@@ -1647,7 +1647,7 @@ def forward(self, x, y):
 
         model = Bob(0.5, 0.3)
         inp = torch.ones(3, 4)
-        graph, guards = torch._dynamo.export(model)(inp)
+        graph, _ = torch._dynamo.export(model)(inp)
         self.assertEqual(model(inp), graph(inp))
 
     def test_export_with_constant_in_unspecialized_nn_module(self):
@@ -1707,7 +1707,7 @@ def forward(self, x, y):
             return x.cos()
 
         with self.assertRaises(AssertionError):
-            graph, _ = torch._dynamo.export(
+            torch._dynamo.export(
                 f,
                 (torch.randn(5)),
                 aten_graph=False,
@@ -1889,7 +1889,7 @@ def forward(self, x, y):
         mods = [Module(), Module2()]
         for mod in mods:
             x = torch.randn(2, 2)
-            out_graph, guards = torch._dynamo.export(mod)(x)
+            out_graph, _ = torch._dynamo.export(mod)(x)
             self.assertExpectedInline(
                 out_graph.code.strip(),
                 """\
@@ -1978,7 +1978,7 @@ def forward(self, l_x_):
             torch._dynamo.exc.Unsupported,
             "zero-sized tensor",
         ):
-            out_graph, _ = torch._dynamo.export(mod)(xs)
+            torch._dynamo.export(mod)(xs)
 
     def test_export_meta_val(self):
         def f(x, y, z):
@@ -2785,7 +2785,7 @@ def forward(self, x):
             return b
 
         y = torch.tensor([8, 8, 6])
-        gm, _ = torch._dynamo.export(
+        torch._dynamo.export(
             f,
             aten_graph=True,
             tracing_mode="symbolic",
@@ -3039,7 +3039,7 @@ def forward(self, x):
                 return self.my_lin(x)
 
         mod, input_tensor = BasicModule(), torch.randn(2, 3)
-        gm, guard = torch._dynamo.export(mod, aten_graph=True)(input_tensor)
+        gm, _ = torch._dynamo.export(mod, aten_graph=True)(input_tensor)
         ref = mod(x=input_tensor)
         res = gm(x=input_tensor)
         self.assertTrue(torch._dynamo.utils.same(ref, res))
@@ -3058,9 +3058,7 @@ def forward(self, x):
             torch.randn(2, 3),
             torch.randn(2, 3),
         )
-        gm, guard = torch._dynamo.export(mod, aten_graph=True)(
-            input_tensor, input_tensor2
-        )
+        gm, _ = torch._dynamo.export(mod, aten_graph=True)(input_tensor, input_tensor2)
         ref = mod(input_tensor, input_tensor2)
         res = gm(input_tensor, input_tensor2)
         self.assertTrue(torch._dynamo.utils.same(ref, res))
@@ -3086,7 +3084,7 @@ def forward(self, x):
             torch._dynamo.exc.UserError,
             "Dynamic control flow is not supported at the moment",
         ):
-            gm, _ = torch._dynamo.export(f, aten_graph=True)(torch.randn(5, 6))
+            torch._dynamo.export(f, aten_graph=True)(torch.randn(5, 6))
 
     @config.patch(assume_static_by_default=False)
     def test_export_persist_assert(self):
@@ -3094,7 +3092,7 @@ def forward(self, x):
             assert x[0].sum() > 4, "Shape must be more than 4"
             return x.cos() + x.sin()
 
-        gm, guard = torch._dynamo.export(f, aten_graph=True, tracing_mode="symbolic")(
+        gm, _ = torch._dynamo.export(f, aten_graph=True, tracing_mode="symbolic")(
             torch.ones(5, 4, 6)
         )
 
@@ -3642,7 +3640,7 @@ G['macademia'], accessed at:
         arglebargle = torch.randn(3)
 
         def f(y):
-            x = arglebargle
+            x = arglebargle  # noqa: F841
             return y
 
         torch._dynamo.export(f)(torch.randn(3))
@@ -3701,7 +3699,7 @@ G['macademia'], accessed at:
             inputs = (torch.randn(10, 2, 2),)
             dynamic_shapes = ({0: torch.export.Dim("dim")},)
             for aten_graph in [True, False]:
-                gm = torch._dynamo.export(
+                torch._dynamo.export(
                     model,
                     dynamic_shapes=dynamic_shapes,
                     aten_graph=aten_graph,
@@ -4128,7 +4126,7 @@ def forward(self, a, b, l_x_, d_true_branch, c_false_branch):
         with self.assertRaises(
             torch._dynamo.exc.Unsupported,
         ):
-            out_graph, _ = torch._dynamo.export(mod, xs)
+            torch._dynamo.export(mod, xs)
 
     def test_param_buffer_safe_from_mutation_simple(self):
         class Module(torch.nn.Module):
@@ -4170,7 +4168,7 @@ def forward(self, a, b, l_x_, d_true_branch, c_false_branch):
                 return x.sum() + self.buffer1.sum() + self.child(x)
 
         gm, _ = torch._dynamo.export(Module(), torch.ones(5), aten_graph=False)
-        for name, buffer in gm.named_buffers():
+        for _, buffer in gm.named_buffers():
             self.assertTrue(torch.allclose(buffer, torch.zeros(5)))
 
     def test_predispatch_with_higher_order(self):
diff --git a/test/dynamo/test_frame_init.py b/test/dynamo/test_frame_init.py
index 97aac1870e9..f0dec020fbc 100644
--- a/test/dynamo/test_frame_init.py
+++ b/test/dynamo/test_frame_init.py
@@ -105,7 +105,7 @@ class FrameInitTests(torch._dynamo.test_case.TestCase):
                 )
             return None
 
-        for callback in [callback1, callback2]:
+        for _ in [callback1, callback2]:
             torch._dynamo.reset()
             expected_varargs_output = target_with_varargs(
                 1, 2, 3, 4, name1=1, name2=2, name3=3
diff --git a/test/dynamo/test_functions.py b/test/dynamo/test_functions.py
index 1cedc01a2b2..b7dbe07fdea 100644
--- a/test/dynamo/test_functions.py
+++ b/test/dynamo/test_functions.py
@@ -587,7 +587,7 @@ class FunctionTests(torch._dynamo.test_case.TestCase):
     @make_test
     def test_range2(x, y):
         r = x + y
-        for i in range(x.size(0) + 2):
+        for _ in range(x.size(0) + 2):
             r = r / y
         return r
 
@@ -1128,7 +1128,7 @@ class FunctionTests(torch._dynamo.test_case.TestCase):
     @make_test
     def test_module_constant(x, y):
         r = x + y
-        for i in range(torch._dynamo.testing.three):
+        for _ in range(torch._dynamo.testing.three):
             r = r / y
         return r
 
@@ -2661,7 +2661,6 @@ class GraphModule(torch.nn.Module):
         dynamo_result = torch.compile(fn, backend=cnts)(udf_mul, udf_mul, x)
 
         eager_result = fn(udf_mul, udf_mul, x)
-        gm = backend.graphs[0]
         self.assertEqual(eager_result, dynamo_result)
         if torch._dynamo.config.assume_static_by_default:
             self.assertExpectedInline(
@@ -2708,7 +2707,6 @@ class GraphModule(torch.nn.Module):
         dynamo_result = torch.compile(fn, backend=cnts)(udf_mul, udf_add, x)
 
         eager_result = fn(udf_mul, udf_add, x)
-        gm = backend.graphs[0]
         self.assertEqual(eager_result, dynamo_result)
         if torch._dynamo.config.assume_static_by_default:
             self.assertExpectedInline(
@@ -2759,7 +2757,6 @@ class GraphModule(torch.nn.Module):
         dynamo_result = torch.compile(fn, backend=cnts)(udf_mul, x)
 
         eager_result = fn(udf_mul, x)
-        gm = backend.graphs[0]
         self.assertEqual(eager_result, dynamo_result)
         if torch._dynamo.config.assume_static_by_default:
             self.assertExpectedInline(
@@ -2807,7 +2804,6 @@ class GraphModule(torch.nn.Module):
         dynamo_result = torch.compile(fn, backend=cnts)(udf_mul2, x)
 
         eager_result = fn(udf_mul2, x)
-        gm = backend.graphs[0]
         self.assertEqual(eager_result, dynamo_result)
         if torch._dynamo.config.assume_static_by_default:
             self.assertExpectedInline(
@@ -2853,7 +2849,7 @@ class GraphModule(torch.nn.Module):
 
         x = torch.randn(2, 2)
         fn = torch.compile(fn, backend=cnts, fullgraph=True)
-        dynamo_result = fn(lambda0, lambda1, x)
+        fn(lambda0, lambda1, x)
         self.assertEqual(cnts.frame_count, 1)
 
         fn(lambda1, lambda0, x)
@@ -2880,7 +2876,7 @@ class GraphModule(torch.nn.Module):
 
         x = torch.randn(2, 2)
         fn2 = torch.compile(fn2, backend=cnts, fullgraph=True)
-        dynamo_result = fn2(lambda0, lambda1, [x])
+        fn2(lambda0, lambda1, [x])
         self.assertEqual(cnts.frame_count, 1)  # start over
 
         lambda4 = functools.partial(multiply, y=3, x=torch.randn(3, 3))
@@ -3047,7 +3043,7 @@ class GraphModule(torch.nn.Module):
         opt_fn_dtype = torch.compile(func_dtype, backend=cnts_1)
         a = torch.zeros(3, dtype=typ)
         for arg in dt_args:
-            r = opt_fn_dtype(a, arg)
+            opt_fn_dtype(a, arg)
         # each should produce an identical arg
         self.assertEqual(cnts_1.frame_count, 1)
 
@@ -3055,7 +3051,7 @@ class GraphModule(torch.nn.Module):
         opt_fn_info = torch.compile(func_info, backend=cnts_2)
         info_args = [info_func(dt) for dt in dt_args]
         for arg in info_args:
-            r = opt_fn_info(a, arg)
+            opt_fn_info(a, arg)
 
         # each should produce an identical arg
         self.assertEqual(cnts_2.frame_count, 1)
@@ -3259,7 +3255,7 @@ class GraphModule(torch.nn.Module):
         test(10, 1, -3)
 
         # Fuzz testing
-        for i in range(100):
+        for _ in range(100):
             args = self.gen_random_range_args()
             print("testing :", args)
             test(*args)
@@ -3285,7 +3281,7 @@ class GraphModule(torch.nn.Module):
         test(range(10, 20, 2), 1, expected=12)
 
         # Fuzz testing
-        for i in range(100):
+        for _ in range(100):
             range_args = self.gen_random_range_args()
             r = range(*range_args)
 
@@ -3348,7 +3344,7 @@ class GraphModule(torch.nn.Module):
                 return slice(r_item(), r_item(), r_item(False))
 
         # Fuzz testing
-        for i in range(100):
+        for _ in range(100):
             range_args = self.gen_random_range_args()
             r = range(*range_args)
             # generate random slice
@@ -3384,8 +3380,8 @@ class GraphModule(torch.nn.Module):
             idx_size = [10]
             idx_size[random.randint(0, 0)] = random.randint(1, 8)
             t = tuple(idx_size)
-            src_size = [random.randint(1, 5) + s for s in idx_size]
-            idx = torch.empty(t)
+            src_size = [random.randint(1, 5) + s for s in idx_size]  # noqa: F841
+            idx = torch.empty(t)  # noqa: F841
 
         fn()
 
@@ -3412,7 +3408,7 @@ class GraphModule(torch.nn.Module):
             )
             t1 = make_q_tensor()
             t2 = make_kv_tensor()
-            t3 = t1 + t2
+            t3 = t1 + t2  # noqa: F841
 
         func()
 
@@ -3420,7 +3416,7 @@ class GraphModule(torch.nn.Module):
         @torch.compile(backend="eager")
         def fn():
             t = torch.ones(2)
-            y = t.to("meta")
+            y = t.to("meta")  # noqa: F841
 
         fn()
 
@@ -3581,7 +3577,7 @@ class GraphModule(torch.nn.Module):
             y += 1
             return x
 
-        l = list(zip([a, b], map(f, [1, 2, 3, 4])))
+        l = list(zip([a, b], map(f, [1, 2, 3, 4])))  # noqa: F841
         return a + y
 
     @make_test
@@ -4182,7 +4178,6 @@ class DefaultsTests(torch._dynamo.test_case.TestCase):
 
             disallowed(g)
 
-        f_opt = torch._dynamo
         opt_f = torch.compile(f, backend="eager")
         opt_f()
         f()
diff --git a/test/dynamo/test_graph_deduplication.py b/test/dynamo/test_graph_deduplication.py
index 544dea24021..cc95db79ff3 100644
--- a/test/dynamo/test_graph_deduplication.py
+++ b/test/dynamo/test_graph_deduplication.py
@@ -28,7 +28,7 @@ class GraphDededuplicationTests(TestCase):
             return z
 
         def fn(x, y):
-            o0 = inner_fn(x, y)
+            _o0 = inner_fn(x, y)
             o1 = torch.sin(y)
             o2 = inner_fn(x, o1)
             o3 = inner_fn(x, y)
@@ -448,7 +448,7 @@ class GraphModule(torch.nn.Module):
 
         def fn(x, y):
             x0 = torch.sin(x)
-            y0 = torch.cos(y)
+            _y0 = torch.cos(y)
             # o0 = inner_fn(x0, y0)
             # o1 = inner_fn(x0, o0)
             o2 = inner_fn2(x0, y)
diff --git a/test/dynamo/test_graph_region_tracker.py b/test/dynamo/test_graph_region_tracker.py
index c701ede3d4c..584079f1b1d 100644
--- a/test/dynamo/test_graph_region_tracker.py
+++ b/test/dynamo/test_graph_region_tracker.py
@@ -56,7 +56,7 @@ class GraphRegionTrackerTests(TestCase):
             return z
 
         def fn(x, y):
-            o0 = inner_fn(x, y)
+            _o0 = inner_fn(x, y)
             o1 = torch.sin(y)
             o2 = inner_fn(x, o1)
             o3 = inner_fn(x, y)
diff --git a/test/dynamo/test_higher_order_ops.py b/test/dynamo/test_higher_order_ops.py
index ae3fd328b70..e2f12052145 100644
--- a/test/dynamo/test_higher_order_ops.py
+++ b/test/dynamo/test_higher_order_ops.py
@@ -425,7 +425,7 @@ class GraphModule(torch.nn.Module):
     def test_wrap_pytree_kwargs(self):
         def f(x, y, z):
             def fn(*, x, y, z):
-                z1, z2 = z
+                z1, _ = z
                 return (x * 2) + y + z1
 
             return wrap(fn, x=x, y=y, z=z)
@@ -459,7 +459,6 @@ class GraphModule(torch.nn.Module):
 
     def test_capture_constants(self):
         x = torch.randn(3, 3)
-        y = 4.0
 
         def fn(x, y, z):
             if z:
@@ -1719,9 +1718,6 @@ class GraphModule(torch.nn.Module):
         self._test_wrap_simple(f, default_args_generator((x, y, 8)), arg_count)
 
     def test_map_subgraph_name_is_valid(self):
-        backend = EagerAndRecordGraphs()
-        cnt = CompileCounterWithBackend(backend)
-
         xs = torch.randn(2, 3, 3)
         y = torch.randn(3)
 
@@ -1760,8 +1756,6 @@ def forward(self, child : torch.Tensor, l_y_ : torch.Tensor):
             )
 
     def test_map_multi_return(self):
-        cnt = CompileCounter()
-
         def f(x):
             return control_flow.map(lambda x: (x.sin(), x.sin()), x)
 
@@ -1790,8 +1784,6 @@ def forward(self, child : torch.Tensor):
             )
 
     def test_map_pytree_return(self):
-        cnt = CompileCounter()
-
         def _construct_pytree(a):
             return (a, [[[a]]], a, (a, (a,), a), {"a": a})
 
@@ -1840,9 +1832,6 @@ def forward(self, child : torch.Tensor):
         self.assertEqual(cnt.frame_count, 0)
 
     def test_map_symint_input(self):
-        backend = EagerAndRecordGraphs()
-        cnt = CompileCounterWithBackend(backend)
-
         def fn(x, y):
             def inner(x, y):
                 return torch.sin(x + y)
@@ -1874,9 +1863,6 @@ def forward(self, child : torch.Tensor, const_unused : int):
             )
 
     def test_map_lowers_to_graph(self):
-        backend = EagerAndRecordGraphs()
-        cnt = CompileCounterWithBackend(backend)
-
         def fn(x, y):
             def inner(x, y):
                 return torch.sin(x + y)
@@ -1933,7 +1919,7 @@ def forward(self, child : torch.Tensor, const_unused : int):
             rand_44.reshape(2, 8),
         ]
         for x in inps:
-            compiled_ret = torch.compile(
+            compiled_ret = torch.compile(  # noqa: F841
                 control_flow.map, backend=backend, fullgraph=True
             )(inner, x)
             eager_sin, eager_transpose, eager_view = map_dense(inner, (x,), ())
@@ -2920,7 +2906,7 @@ class GraphModule(torch.nn.Module):
 
             return control_flow.map(inner, xs, y).sin()
 
-        result = map_f(xs, y)
+        map_f(xs, y)
 
         gm = backend.graphs[0]
         actual_stack = self._get_source_fn_stack(gm, {"cos", "add", "sin"})
@@ -3095,7 +3081,6 @@ def forward(self, L_a_ : torch.SymInt, L_b_ : torch.SymInt, L_c_ : torch.SymInt,
             return torch.cond(pred, true_fn, false_fn, [pytree_in])
 
         backend = EagerAndRecordGraphs()
-        cnt = CompileCounterWithBackend(backend)
         compiled_res = torch.compile(fn, backend=backend)(pred, inp)
         eager_res = fn(pred, inp)
         self.assertEqual(compiled_res, eager_res)
@@ -3252,7 +3237,7 @@ class GraphModule(torch.nn.Module):
 
         msg = "hints_wrapper - key hints not provided"
         with self.assertRaisesRegex(RuntimeError, msg):
-            compiled_res = torch.compile(fn_with_hints, backend=cnt)(x, y)
+            torch.compile(fn_with_hints, backend=cnt)(x, y)
 
     def test_hints_wrapper_incorrect_type(self):
         def fn_with_hints(x, y):
@@ -3271,7 +3256,7 @@ class GraphModule(torch.nn.Module):
 
         msg = r"hints must be a dict containing int, float, bool or str value,"
         with self.assertRaisesRegex(RuntimeError, msg):
-            compiled_res = torch.compile(fn_with_hints, backend=cnt)(x, y)
+            torch.compile(fn_with_hints, backend=cnt)(x, y)
 
     def test_hints_wrapper_pytree_inputs(self):
         def fn_with_hints(x, y):
@@ -3284,9 +3269,6 @@ class GraphModule(torch.nn.Module):
             )
             return res
 
-        backend = EagerAndRecordGraphs()
-        cnt = CompileCounterWithBackend(backend)
-
         x = torch.randn(2, 4)
         y = torch.ones(4)
 
@@ -3515,10 +3497,10 @@ class HigherOrderOpVmapGuardTests(LoggingTestCase):
             return torch.vmap(lambda x: x.sin())(x)
 
         x = torch.zeros(3, 3, 4, 5)
-        y = torch.vmap(fn, randomness="same")(x)
+        torch.vmap(fn, randomness="same")(x)
         self.assertEqual(len(records), 0)  # sanity check
 
-        y = torch.vmap(fn, randomness="different")(x)
+        torch.vmap(fn, randomness="different")(x)
         self.assertGreater(len(records), 0)
         record = self.getRecord(records, "pyfunctorch")
         self.assertIn(
@@ -5891,9 +5873,9 @@ class GraphModule(torch.nn.Module):
             return torch.vmap(lambda x: x.sin())(x)
 
         x = torch.zeros(3, 3, 4, 5)
-        y = torch.vmap(fn)(x)
+        torch.vmap(fn)(x)
         # should not recompile on second call. See Pytorch issue #118493
-        y = torch.vmap(fn)(x)
+        torch.vmap(fn)(x)
 
     @xfailIfTorchDynamo
     @config.patch(error_on_recompile=True)
@@ -5903,7 +5885,7 @@ class GraphModule(torch.nn.Module):
             return torch.vmap(lambda x: x.sin())(x)
 
         x = torch.zeros(3, 3, 4, 5)
-        y = torch.vmap(fn)(x)
+        torch.vmap(fn)(x)
         with self.assertRaises(torch._dynamo.exc.RecompileError):
             fn(x)
 
diff --git a/test/dynamo/test_hooks.py b/test/dynamo/test_hooks.py
index 91fb5279419..29ff1ddf93f 100644
--- a/test/dynamo/test_hooks.py
+++ b/test/dynamo/test_hooks.py
@@ -102,7 +102,7 @@ class HooksTests(torch._dynamo.test_case.TestCase):
     def test_tensor_register_hook_repeated_handle_return(self):
         def fn(x, y, z):
             handle = x.register_hook(lambda grad: grad * 2)
-            h2 = handle
+            h2 = handle  # noqa: F841
             z = z * z
             return x, y * y, z, handle, handle
 
diff --git a/test/dynamo/test_logging.py b/test/dynamo/test_logging.py
index 83f7306de06..1ca1ed24592 100644
--- a/test/dynamo/test_logging.py
+++ b/test/dynamo/test_logging.py
@@ -487,7 +487,7 @@ LoweringException: AssertionError:
     def test_distributed_rank_logging(self):
         env = dict(os.environ)
         env["TORCH_LOGS"] = "dynamo"
-        stdout, stderr = self.run_process_no_exception(
+        _, stderr = self.run_process_no_exception(
             """\
 import torch.distributed as dist
 import logging
@@ -796,7 +796,7 @@ TRACE FX call mul from test_logging.py:N in fn (LoggingTests.test_trace_call_pre
             env = dict(os.environ)
             env["TORCH_LOGS"] = "dynamo"
             env["TORCH_LOGS_OUT"] = file_path
-            stdout, stderr = self.run_process_no_exception(
+            _, stderr = self.run_process_no_exception(
                 """\
 import torch
 @torch.compile(backend="eager")
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
index e00b7bb8abe..ae1fff3587c 100644
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@@ -1,4 +1,5 @@
 # Owner(s): ["module: dynamo"]
+# ruff: noqa: F841
 import abc
 import collections
 import collections.abc
diff --git a/test/dynamo/test_modes.py b/test/dynamo/test_modes.py
index 303c7b748d0..d5b3b1ab6b3 100644
--- a/test/dynamo/test_modes.py
+++ b/test/dynamo/test_modes.py
@@ -620,7 +620,7 @@ class TorchFunctionModeTests(torch._dynamo.test_case.TestCase):
             return prefix_lengths[b] >= kv
 
         # This runs in fullgraph already
-        mask = create_block_mask(prefix_lm, 8, None, 512, 512, _compile=True)
+        create_block_mask(prefix_lm, 8, None, 512, 512, _compile=True)
 
     def test_register_hook(self):
         import functools
@@ -641,7 +641,7 @@ class TorchFunctionModeTests(torch._dynamo.test_case.TestCase):
         x = torch.ones(4, requires_grad=True)
 
         with torch.device("cpu"):
-            out = torch.compile(mod, fullgraph=True)(x)
+            torch.compile(mod, fullgraph=True)(x)
 
 
 if __name__ == "__main__":
diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py
index 1a8907efde9..1cc62d5851f 100644
--- a/test/dynamo/test_modules.py
+++ b/test/dynamo/test_modules.py
@@ -1,4 +1,5 @@
 # Owner(s): ["module: dynamo"]
+# ruff: noqa: F841
 
 import collections
 import contextlib
@@ -3051,7 +3052,7 @@ class OptimizedModuleTest(torch._dynamo.test_case.TestCase):
                 self.x = res
                 return self.Linear2(res)
 
-        N, D_in, H, D_out, inner = 2, 2, 2, 2, 4
+        N, D_in, H, inner = 2, 2, 2, 4
         model = ReplayMutation(D_in, H, inner)
         model2 = copy.deepcopy(model)
         input = torch.ones(N, D_in)
diff --git a/test/dynamo/test_prim_hop_base.py b/test/dynamo/test_prim_hop_base.py
index 9094a83cb55..ffd7bc03ab7 100644
--- a/test/dynamo/test_prim_hop_base.py
+++ b/test/dynamo/test_prim_hop_base.py
@@ -160,10 +160,10 @@ class GraphModule(torch.nn.Module):
             return invoke_quant_test(inner, (x, y), scheme="nf4")
 
         with self.assertRaisesRegex(RuntimeError, "aliases of the inputs"):
-            out = f(inner, x, y)
+            f(inner, x, y)
 
         with self.assertRaisesRegex(RuntimeError, "inputs are mutated"):
-            out = f(inner2, x, y)
+            f(inner2, x, y)
 
     def test_eager_call(self):
         def inner(x, y):
diff --git a/test/dynamo/test_profiler.py b/test/dynamo/test_profiler.py
index 8d18dcd7bb6..9a7a892d8b0 100644
--- a/test/dynamo/test_profiler.py
+++ b/test/dynamo/test_profiler.py
@@ -101,7 +101,7 @@ class DynamoProfilerTests(torch._dynamo.test_case.TestCase):
         with TemporaryFileName() as fname:
             et.register_callback(fname)
             et.start()
-            out = opt_fn(*inputs)
+            opt_fn(*inputs)
             et.stop()
             et.unregister_callback()
 
diff --git a/test/dynamo/test_reorder_logs.py b/test/dynamo/test_reorder_logs.py
index b67013079fa..0b22ca50c18 100644
--- a/test/dynamo/test_reorder_logs.py
+++ b/test/dynamo/test_reorder_logs.py
@@ -171,7 +171,7 @@ class ReorderLogsTests(torch._dynamo.test_case.TestCase):
         counters.clear()
         with torch._dynamo.config.patch(reorderable_logging_functions={custom_log}):
             opt_f = torch.compile(backend="eager")(f)
-            opt_out = opt_f(x)
+            opt_f(x)
 
         self.assertEqual(sum(counters["graph_break"].values()), 1)
         self.assertEqual(custom_logs[0], "moo")
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
index 8a6550cead4..160084589b5 100644
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@@ -271,7 +271,7 @@ class _ReversibleFunction(torch.autograd.Function):
         # split duplicated tensor
         hidden_states, attn_output = torch.chunk(hidden_states, 2, dim=-1)
 
-        for layer_id, (layer, layer_head_mask) in enumerate(zip(layers, head_mask)):
+        for layer in layers:
             if output_hidden_states is True:
                 all_hidden_states.append(hidden_states)
 
@@ -650,7 +650,7 @@ class XSoftmax(torch.autograd.Function):
 
     @staticmethod
     def backward(self, grad_output):
-        (output, rmask) = self.saved_tensors
+        output, _ = self.saved_tensors
         inputGrad = softmax_backward_data(self, grad_output, output, self.dim, output)
         return inputGrad, None, None
 
@@ -1342,6 +1342,8 @@ class ReproTests(torch._dynamo.test_case.TestCase):
     @torch._dynamo.config.patch(error_on_recompile=True)
     @torch.fx.experimental._config.patch(use_duck_shape=False)
     def test_dynamic_shape_disable_duck_size(self):
+        # noqa: F841
+
         class TestModel(nn.Module):
             def __init__(
                 self,
@@ -1357,11 +1359,11 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         x1 = torch.rand(2, 5, 10, 10).to(memory_format=torch.channels_last)
         x2 = torch.rand(2, 5, 4, 8).to(memory_format=torch.channels_last)
 
-        o1_ref = main_model(x1, 4)
-        o1 = opt_model(x1, 4)
+        main_model(x1, 4)
+        opt_model(x1, 4)
 
-        o2_ref = main_model(x2, 20)
-        o2 = opt_model(x2, 20)
+        main_model(x2, 20)
+        opt_model(x2, 20)
 
     def test_chunk_reformer_ff(self):
         input = torch.randn([1, 4096, 256])
@@ -1483,7 +1485,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         self.assertEqual(cnt.frame_count, 2)
         self.assertEqual(cnt.op_count, 2)  # rand, rand
         try:
-            graph, _ = torch._dynamo.export(fn)()
+            _, _ = torch._dynamo.export(fn)()
             # See https://github.com/pytorch/pytorch/pull/87490
             self.fail("unexpected export success")
         except torch._dynamo.exc.Unsupported:
@@ -1713,7 +1715,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         )
 
         x = torch.rand([111, 262], device=device)
-        y2 = forward_aot(x, 2)  # previously failed
+        forward_aot(x, 2)  # previously failed
 
     def test_issue175(self):
         n_heads = 2
@@ -2323,7 +2325,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         class Mod(torch.nn.Module):
             def forward(self, listy):
                 x = listy[3:5]
-                for i in range(10):
+                for _ in range(10):
                     z = torch.abs(torch.randn(10)) + 1
                     x[0] = z
                 return x
@@ -2619,7 +2621,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
             def forward(self, inp):
                 res = 0
-                for name, buffer in self.named_buffers():
+                for _, buffer in self.named_buffers():
                     res += buffer.sum()
 
                 return inp.cos() + res
@@ -2718,7 +2720,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
             def forward(self, inp):
                 res = torch.zeros(3, 3)
-                for mod in self.modules():
+                for _ in self.modules():
                     res += self.fc(inp)
                 return res
 
@@ -2799,7 +2801,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
             for (sh, st, dt, dev, rg) in args
         ]
 
-        opt_foo = torch.compile(foo, backend="aot_eager_decomp_partition")
+        torch.compile(foo, backend="aot_eager_decomp_partition")
         with torch.cuda.amp.autocast(enabled=True):
             ref = foo(*args)[0]
             res = foo(*args)[0]
@@ -2974,7 +2976,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         # Repro of huggingface graph break inside loop in `get_parameter_dtype`.
         # Skip only the inner frame that has loop that contains graph break.
         def inner(x):
-            for i in range(3):
+            for _ in range(3):
                 x += 1
                 torch._dynamo.graph_break()
             return x
@@ -3647,7 +3649,6 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         ref2 = fn(x, inp_list2)
         ref3 = fn(x, inp_list3)
 
-        cnt = torch._dynamo.testing.CompileCounter()
         opt_fn = torch.compile(fn, fullgraph=True)
 
         opt_ret1 = opt_fn(x, inp_list1)
@@ -4143,7 +4144,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
             return x.sin()
 
         counter = CompileCounter()
-        compiled_fn = torch.compile(fn, backend=counter)(torch.randn([2, 2]), [])
+        torch.compile(fn, backend=counter)(torch.randn([2, 2]), [])
         self.assertEqual(counter.frame_count, 1)
 
     def test_graph_break_on_jit_isinstance(self):
@@ -4295,7 +4296,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         torch._dynamo.reset()
         torch._dynamo.utils.clear_compilation_metrics()
 
-        res = torch.compile(fn, backend="aot_eager")(x)
+        torch.compile(fn, backend="aot_eager")(x)
 
         all_metrics = torch._dynamo.utils.get_compilation_metrics()
 
@@ -4399,7 +4400,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
         compiled_fn = torch.compile(func, backend=cnt, fullgraph=True)
         requires_grad = func is not func1
-        for i in range(0, 5):
+        for _ in range(0, 5):
             # Inputs
             eager_a = torch.ones([6], requires_grad=requires_grad)
             compiled_a = torch.ones([6], requires_grad=requires_grad)
@@ -4487,7 +4488,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
                 pass
 
         def fn(x, y):
-            ucm = UserCtxManager()
+            ucm = UserCtxManager()  # noqa: F841
             return x * x
 
         cnt = torch._dynamo.testing.CompileCounter()
@@ -4538,11 +4539,11 @@ class ReproTests(torch._dynamo.test_case.TestCase):
             e = base[:, 8:10]
             f = base[:, 10:12]
             f2 = base[:, 10:14]
-            out = fn(a, b, c, d, e, f)
+            fn(a, b, c, d, e, f)
             with self.assertRaisesRegex(
                 AssertionError, "is being compiled with dynamic shapes"
             ):
-                out2 = fn(a, b, c, d, e, f2)
+                fn(a, b, c, d, e, f2)
 
     def test_user_ctor_ctx_manager_custom_init(self):
         class UserCtxManager:
@@ -4556,7 +4557,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
                 pass
 
         def fn(x, y):
-            ucm = UserCtxManager(y)
+            ucm = UserCtxManager(y)  # noqa: F841
             return x * y[0]
 
         cnt = torch._dynamo.testing.CompileCounter()
@@ -4580,7 +4581,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
         def fn(x, counter):
             x = x * x
-            ucm = UserCtxManager(counter)
+            ucm = UserCtxManager(counter)  # noqa: F841
             return x * x
 
         cnt = torch._dynamo.testing.CompileCounter()
@@ -4588,7 +4589,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         x = torch.rand([2, 2])
         self.assertEqual(opt_fn(x, counter), fn(x, counter))
         self.assertEqual(counter[0], 2)
-        for i in range(0, 10):
+        for _ in range(0, 10):
             opt_fn(x, counter)
         self.assertEqual(counter[0], 12)
         if torch._dynamo.config.assume_static_by_default:
@@ -4719,7 +4720,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
     def test_invalid_seq_unpack(self):
         def myfn(arg):
-            (a, b) = arg
+            (a, b) = arg  # noqa: F841
 
         def fn():
             return myfn((1, 2, 3))
@@ -4804,13 +4805,13 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
         a = torch.randn(2, 4)
         a_ref = a.clone()
-        out_ref = foo(a_ref)
+        foo(a_ref)
         f_compiled = torch.compile(foo, backend="aot_eager")
         with self.assertRaisesRegex(
             RuntimeError,
             "encountered a mutation on a view chain of length 2, where view 1 was an as_strided",
         ):
-            out = f_compiled(a)
+            f_compiled(a)
 
     def test_dont_aggressively_write_assert(self):
         record_graph = torch._dynamo.testing.EagerAndRecordGraphs()
@@ -5620,7 +5621,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor):
         random_op = torch.compile(random_op)
         params = {"from": -10, "to": 10}
         tensor = torch.randn([2, 3])
-        res = random_op(tensor, params)
+        random_op(tensor, params)
 
     # https://github.com/pytorch/pytorch/issues/131019
     def test_tensor_uniform(self):
@@ -5631,7 +5632,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor):
         uniform_op = torch.compile(uniform_op)
         params = {"from": -10, "to": 10}
         tensor = torch.randn([2, 3])
-        res = uniform_op(tensor, params)
+        uniform_op(tensor, params)
 
     def test_data_attr_mutation_after_saved_for_bw(self):
         def f(x):
@@ -5771,7 +5772,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor):
 
         @torch.compile(backend="aot_eager_decomp_partition")
         def f(x, l):
-            z = x.sin()
+            z = x.sin()  # noqa: F841
             y = x + 1
             # graph input has its storage mutated
             torch.ops.fsdp.copy_.default(x, y)
@@ -5860,14 +5861,14 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor):
         opt_mod = torch.compile(mod, backend="eager")
 
         x = torch.randn(1, 1)
-        ref = mod(x)
-        res = opt_mod(x)
+        ref = mod(x)  # noqa: F841
+        res = opt_mod(x)  # noqa: F841
 
         mod.submod.multipliers = [3.3, 4.4]
         # Since guard_nn_modules is False, this will not recompile
         with torch._dynamo.config.patch(error_on_recompile=True):
-            ref = mod(x)
-            res = opt_mod(x)
+            ref = mod(x)  # noqa: F841
+            res = opt_mod(x)  # noqa: F841
 
     def test_optimized_module_training(self):
         mod = torch.nn.Linear(3, 3)
@@ -6170,9 +6171,9 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor):
         x = torch.randn(4)
         opt_fn = torch.compile(fn, backend="eager", fullgraph=True)
 
-        ref = fn(config, x)
+        fn(config, x)
         cloned_config = copy.deepcopy(config)
-        res = opt_fn(cloned_config, x)
+        opt_fn(cloned_config, x)
 
         self.assertEqual(fn(config, x), opt_fn(config, x))
         self.assertEqual(cloned_config.baz, 4)
@@ -6230,7 +6231,7 @@ def forward(self, s0 : torch.SymInt, s1 : torch.SymInt, L_x_ : torch.Tensor):
 
         x = torch.ones(2)
         with torch.no_grad():
-            y = model(x)
+            model(x)
 
     def test_typed_dict(self):
         class LlavaImagePixelInputs(TypedDict):
diff --git a/test/dynamo/test_resume.py b/test/dynamo/test_resume.py
index 057392ca655..42103a7878e 100644
--- a/test/dynamo/test_resume.py
+++ b/test/dynamo/test_resume.py
@@ -13,7 +13,7 @@ def fn_creator():
         torch._dynamo.graph_break()
         x = x + var1
 
-        def inner_fn():
+        def inner_fn():  # noqa: F841
             return var2
 
         return x
diff --git a/test/dynamo/test_structured_trace.py b/test/dynamo/test_structured_trace.py
index 3801fd24cc1..9dd3f3038f9 100644
--- a/test/dynamo/test_structured_trace.py
+++ b/test/dynamo/test_structured_trace.py
@@ -824,7 +824,7 @@ def forward(self, x, y):
         trace_log.addHandler(payload_handler)
 
         def f(x):
-            y = x + 1
+            y = x + 1  # noqa: F841
             raise RuntimeError("boo")
 
         try:
diff --git a/test/dynamo/test_subclasses.py b/test/dynamo/test_subclasses.py
index d7ec7b6f5b6..f2bc6d3748d 100644
--- a/test/dynamo/test_subclasses.py
+++ b/test/dynamo/test_subclasses.py
@@ -126,7 +126,6 @@ def get_view_test_cases():
     def mk_dense_subclass_dense_subclass():
         values = torch.randn(10, 5)
         offsets = torch.tensor([0, 3, 6, 10])
-        offsets2 = offsets.detach().clone()
         return nested_view_from_values_offsets(
             nested_view_from_values_offsets(values, offsets).values(), offsets
         )
@@ -136,7 +135,7 @@ def get_view_test_cases():
     def mk_subclass_dense_subclass_dense():
         x = get_jagged_tensor(((2, 3, 4), 3), None, requires_grad=True)[0].clone()
         offsets2 = x.offsets().detach().clone()
-        nt_view = nested_view_from_values_offsets(x.values(), offsets2).values()
+        nested_view_from_values_offsets(x.values(), offsets2).values()
 
     yield mk_subclass_dense_subclass_dense, "subclass_dense_subclass_dense"
 
@@ -544,7 +543,7 @@ class SubclassTests(torch._dynamo.test_case.TestCase):
 
         input = torch.ones(2, 2)
 
-        res = fn(input)
+        fn(input)
 
     def test_torch_function_state_guards(self):
         cnt = torch._dynamo.testing.CompileCounter()
@@ -556,9 +555,9 @@ class SubclassTests(torch._dynamo.test_case.TestCase):
         input = torch.ones(2, 2)
 
         with torch._C.DisableTorchFunctionSubclass():
-            res = fn(input)
+            fn(input)
 
-        res = fn(input)
+        fn(input)
 
         self.assertEqual(cnt.frame_count, 2)
 
@@ -1160,7 +1159,7 @@ class GraphModule(torch.nn.Module):
         )
 
         ff = torch.func.functionalize(f)
-        ff_out = ff(t_clone)
+        ff_out = ff(t_clone)  # noqa: F841
         # frame count and op count are incremented due to re-compilation
         check_count_and_graph(
             2,
@@ -1187,7 +1186,7 @@ class GraphModule(torch.nn.Module):
             x = torch._to_functional_tensor(t_clone2)
             torch._mirror_autograd_meta_to(t_clone2, x)
             torch._enable_functionalization(reapply_views=False)
-            aot_f_out = f(x)
+            aot_f_out = f(x)  # noqa: F841
         finally:
             torch._disable_functionalization()
 
@@ -1334,7 +1333,7 @@ class GraphModule(torch.nn.Module):
 
         x = DoubleSizeMaybeAddGeThreeTensor(inp)
         torch._dynamo.mark_dynamic(x, 0)
-        res = fn(x)
+        res = fn(x)  # noqa: F841
         # During fakeifying, we end up allocating a separate symint
         # for the outer and inner tensor (in this test, s0 is unused).
         expected_var_to_val = {
@@ -3270,7 +3269,7 @@ Eq(s12, s10)""",
         x_inner = torch.ones(4)
         x = TwoTensor(x_inner, x_inner)
         x_view = x.view(2, 2)
-        out = f(x_view)
+        out = f(x_view)  # noqa: F841
 
     # NJT1 -> Dense -> NJT2 -> Dense view
     # During view replay, the Dense -> NJT2 part will construct an intermediate,
diff --git a/test/dynamo/test_subgraphs.py b/test/dynamo/test_subgraphs.py
index 7d1e7855c21..0cac9499b9d 100644
--- a/test/dynamo/test_subgraphs.py
+++ b/test/dynamo/test_subgraphs.py
@@ -342,7 +342,7 @@ class SubGraphTests(torch._dynamo.test_case.TestCase):
             tmp = [a + 1, b + 2, a + b]
             x = a
             x = unsupported(x, x)
-            for i in range(3):
+            for _ in range(3):
                 x += tmp.pop(-1)
             return x
 
@@ -369,7 +369,6 @@ class SubGraphTests(torch._dynamo.test_case.TestCase):
         opt_fn = torch.compile(fn, backend=cnt_dynamic, dynamic=True)
         start = 2
         end = 12
-        steps = end - start
         for i in range(start, end):
             opt_fn(torch.randn(i), torch.randn(i))
 
@@ -557,7 +556,7 @@ class SubGraphTests(torch._dynamo.test_case.TestCase):
         cnt = torch._dynamo.testing.CompileCounter()
         opt_fn = torch.compile(fn, backend=cnt)
         v3, it3 = opt_fn(v1)
-        v4, it4 = opt_fn(v1)
+        v4, _ = opt_fn(v1)
         self.assertEqual(v2.tolist(), v3.tolist())
         self.assertEqual(v2.tolist(), v4.tolist())
         self.assertEqual(list(it2), list(it3))