Enable all PIE rules on ruff (#165814)

This PR enables all PIE rules on ruff, there are already some enabled rules from this family, the new added rules are ``` PIE796 Enum contains duplicate value: {value} PIE808 Unnecessary start argument in range ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/165814 Approved by: https://github.com/ezyang
2025-12-06 00:20:18 +01:00 · 2025-10-18 07:36:18 +00:00 · 2025-10-18 07:36:18 +00:00 · fdab48a7c1
commit fdab48a7c1
parent a0948d4d23
92 changed files with 200 additions and 205 deletions
--- a/benchmarks/gpt_fast/mixtral_moe_quantize.py
+++ b/benchmarks/gpt_fast/mixtral_moe_quantize.py
@ -85,7 +85,7 @@ class WeightOnlyInt8QuantHandler:
                cur_state_dict[f"{fqn}.weight"] = int8_weight
                cur_state_dict[f"{fqn}.scales"] = scales.to(mod.weight.dtype)
            elif isinstance(mod, ConditionalFeedForward):
-                for weight_idx in range(0, 3):
+                for weight_idx in range(3):
                    weight_name = f"w{weight_idx + 1}"
                    scales_name = f"scales{weight_idx + 1}"
                    weight = getattr(mod, weight_name)
--- a/caffe2/perfkernels/hp_emblookup_codegen.py
+++ b/caffe2/perfkernels/hp_emblookup_codegen.py
@ -74,7 +74,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
        )

    code.append("      " + OutType + "* op = &out[rangeIndex * block_size];")
-    for i in range(0, uf):
+    for i in range(uf):
        j = 8 * i
        code.append("      __m256 vop" + str(j) + " = _mm256_setzero_ps();")

@ -158,7 +158,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
        "&input[idx_pref_T0 * fused_block_size];"
    )

-    for i in range(0, uf):
+    for i in range(uf):
        j = 8 * i
        cachelinesize = 64
        byteoffset = sizeof[InType] * j
@ -170,7 +170,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
        code.append("      if (!normalize_by_lengths || length == 0) {")
    else:
        code.append("      if (!normalize_by_lengths || lengths[rangeIndex] == 0) {")
-    for i in range(0, uf):
+    for i in range(uf):
        j = 8 * i
        code.append("        _mm256_storeu_ps(&op[" + str(j) + "], vop" + str(j) + ");")
    code.append("      } else {")
@ -181,7 +181,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
        code.append(
            "        __m256 vlen_inv = _mm256_set1_ps(1.0f / lengths[rangeIndex]);"
        )
-    for i in range(0, uf):
+    for i in range(uf):
        j = 8 * i
        code.append(
            "        _mm256_storeu_ps(&op["
--- a/pyproject.toml
+++ b/pyproject.toml
@ -204,12 +204,7 @@ select = [
    "NPY",
    "PERF",
    "PGH004",
-    "PIE790",
-    "PIE794",
-    "PIE800",
-    "PIE804",
-    "PIE807",
-    "PIE810",
+    "PIE",
    "PLC0131", # type bivariance
    "PLC0132", # type param mismatch
    "PLC1802", # len({expression}) used as condition without comparison
--- a/test/ao/sparsity/test_activation_sparsifier.py
+++ b/test/ao/sparsity/test_activation_sparsifier.py
@ -190,7 +190,7 @@ class TestActivationSparsifier(TestCase):
                if features is None:
                    assert torch.all(mask * input_data == output)
                else:
-                    for feature_idx in range(0, len(features)):
+                    for feature_idx in range(len(features)):
                        feature = torch.Tensor(
                            [features[feature_idx]], device=input_data.device
                        ).long()
@ -378,7 +378,7 @@ class TestActivationSparsifier(TestCase):
        # some dummy data
        data_list = []
        num_data_points = 5
-        for _ in range(0, num_data_points):
+        for _ in range(num_data_points):
            rand_data = torch.randn(16, 1, 28, 28)
            activation_sparsifier.model(rand_data)
            data_list.append(rand_data)
--- a/test/ao/sparsity/test_data_scheduler.py
+++ b/test/ao/sparsity/test_data_scheduler.py
@ -143,7 +143,7 @@ class TestBaseDataScheduler(TestCase):

        # checking step count
        step_cnt = 5
-        for _ in range(0, step_cnt):
+        for _ in range(step_cnt):
            sparsifier.step()
            scheduler.step()

--- a/test/ao/sparsity/test_data_sparsifier.py
+++ b/test/ao/sparsity/test_data_sparsifier.py
@ -123,7 +123,7 @@ class _BaseDataSparsiferTestCase(TestCase):

        step_count = 3

-        for _ in range(0, step_count):
+        for _ in range(step_count):
            sparsifier.step()
        for some_data in all_data:
            name, data, _ = self._get_name_data_config(some_data)
--- a/test/ao/sparsity/test_sparsifier.py
+++ b/test/ao/sparsity/test_sparsifier.py
@ -472,8 +472,8 @@ class TestNearlyDiagonalSparsifier(TestCase):
        else:
            height, width = mask.shape
            dist_to_diagonal = nearliness // 2
-            for row in range(0, height):
-                for col in range(0, width):
+            for row in range(height):
+                for col in range(width):
                    if abs(row - col) <= dist_to_diagonal:
                        assert mask[row, col] == 1
                    else:
--- a/test/distributed/algorithms/quantization/test_quantization.py
+++ b/test/distributed/algorithms/quantization/test_quantization.py
@ -79,7 +79,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
            dist.init_process_group(
                store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
            )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
            group_id = dist.group.WORLD
            self._test_all_gather(
                group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.FP16
@ -94,7 +94,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
            dist.init_process_group(
                store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
            )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
            group_id = dist.group.WORLD
            self._test_all_gather(
                group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.BFP16
@ -111,7 +111,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
            dist.init_process_group(
                store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
            )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
            group_id = dist.new_group(range(self.world_size))
            rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
            self._test_all_to_all(
@ -135,7 +135,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
            dist.init_process_group(
                store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
            )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
            group_id = dist.new_group(range(self.world_size))
            rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
            self._test_all_to_all(
@ -158,7 +158,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
            dist.init_process_group(
                store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
            )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
            group_id = dist.new_group(range(self.world_size))
            rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
            self._test_all_to_all_single(
@ -181,7 +181,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
            dist.init_process_group(
                store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
            )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
            group_id = dist.new_group(range(self.world_size))
            rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
            self._test_all_to_all_single(
--- a/test/distributed/checkpoint/test_planner.py
+++ b/test/distributed/checkpoint/test_planner.py
@ -66,7 +66,7 @@ if TEST_WITH_DEV_DBG_ASAN:
 def create_sharded_tensor(rank, world_size, shards_per_rank, shard_size=8):
    shards_metadata = []
    local_shards = []
-    for idx in range(0, world_size * shards_per_rank):
+    for idx in range(world_size * shards_per_rank):
        shard_rank = idx // shards_per_rank
        shard_md = ShardMetadata(
            shard_offsets=[idx * shard_size],
--- a/test/distributed/checkpoint/test_utils.py
+++ b/test/distributed/checkpoint/test_utils.py
@ -45,7 +45,7 @@ if TEST_WITH_DEV_DBG_ASAN:
 def create_sharded_tensor(rank, world_size, shards_per_rank):
    shards_metadata = []
    local_shards = []
-    for idx in range(0, world_size * shards_per_rank):
+    for idx in range(world_size * shards_per_rank):
        shard_rank = idx // shards_per_rank
        shard_md = ShardMetadata(
            shard_offsets=[idx * 8], shard_sizes=[8], placement=f"rank:{shard_rank}/cpu"
--- a/test/distributed/elastic/agent/server/test/api_test.py
+++ b/test/distributed/elastic/agent/server/test/api_test.py
@ -633,7 +633,7 @@ class SimpleElasticAgentTest(unittest.TestCase):
        worker_group = agent.get_worker_group()

        num_restarts = 3
-        for _ in range(0, num_restarts):
+        for _ in range(num_restarts):
            agent._restart_workers(worker_group)
            self.assertEqual(WorkerState.HEALTHY, worker_group.state)

--- a/test/distributed/elastic/multiprocessing/api_test.py
+++ b/test/distributed/elastic/multiprocessing/api_test.py
@ -146,7 +146,7 @@ def echo_large(size: int) -> dict[int, str]:
    returns a large output ({0: test0", 1: "test1", ..., (size-1):f"test{size-1}"})
    """
    out = {}
-    for idx in range(0, size):
+    for idx in range(size):
        out[idx] = f"test{idx}"
    return out

--- a/test/distributed/elastic/timer/file_based_local_timer_test.py
+++ b/test/distributed/elastic/timer/file_based_local_timer_test.py
@ -191,7 +191,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
        """
        client = timer.FileTimerClient(file_path)
        sem.release()
-        for _ in range(0, n):
+        for _ in range(n):
            client.acquire("test_scope", 0)
            time.sleep(interval)

--- a/test/distributed/elastic/timer/local_timer_example.py
+++ b/test/distributed/elastic/timer/local_timer_example.py
@ -102,7 +102,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):

            world_size = 8
            processes = []
-            for i in range(0, world_size):
+            for i in range(world_size):
                if i % 2 == 0:
                    p = spawn_ctx.Process(target=_stuck_function, args=(i, mp_queue))
                else:
@ -110,7 +110,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
                p.start()
                processes.append(p)

-            for i in range(0, world_size):
+            for i in range(world_size):
                p = processes[i]
                p.join()
                if i % 2 == 0:
--- a/test/distributed/elastic/timer/local_timer_test.py
+++ b/test/distributed/elastic/timer/local_timer_test.py
@ -127,7 +127,7 @@ if not INVALID_PLATFORMS:
        interval seconds. Releases the given semaphore once before going to work.
        """
        sem.release()
-        for i in range(0, n):
+        for i in range(n):
            mp_queue.put(TimerRequest(i, "test_scope", 0))
            time.sleep(interval)

--- a/test/distributed/elastic/utils/data/cycling_iterator_test.py
+++ b/test/distributed/elastic/utils/data/cycling_iterator_test.py
@ -15,7 +15,7 @@ class CyclingIteratorTest(unittest.TestCase):
    def generator(self, epoch, stride, max_epochs):
        # generate an continuously incrementing list each epoch
        # e.g. [0,1,2] [3,4,5] [6,7,8] ...
-        return iter([stride * epoch + i for i in range(0, stride)])
+        return iter([stride * epoch + i for i in range(stride)])

    def test_cycling_iterator(self):
        stride = 3
@ -25,7 +25,7 @@ class CyclingIteratorTest(unittest.TestCase):
            return self.generator(epoch, stride, max_epochs)

        it = CyclingIterator(n=max_epochs, generator_fn=generator_fn)
-        for i in range(0, stride * max_epochs):
+        for i in range(stride * max_epochs):
            self.assertEqual(i, next(it))

        with self.assertRaises(StopIteration):
--- a/test/distributed/fsdp/test_fsdp_hybrid_shard.py
+++ b/test/distributed/fsdp/test_fsdp_hybrid_shard.py
@ -124,7 +124,7 @@ class TestFSDPHybridShard(FSDPTest):
        model = MyModel().to(device_type)
        num_node_devices = torch.accelerator.device_count()
        shard_rank_lists = (
-            list(range(0, num_node_devices // 2)),
+            list(range(num_node_devices // 2)),
            list(range(num_node_devices // 2, num_node_devices)),
        )
        shard_groups = (
@ -175,7 +175,7 @@ class TestFSDPHybridShard(FSDPTest):
        model = MyModel().to(device_type)
        num_node_devices = torch.accelerator.device_count()
        shard_rank_lists = (
-            list(range(0, num_node_devices // 2)),
+            list(range(num_node_devices // 2)),
            list(range(num_node_devices // 2, num_node_devices)),
        )
        shard_groups = (
--- a/test/distributed/tensor/test_dtensor_ops.py
+++ b/test/distributed/tensor/test_dtensor_ops.py
@ -802,7 +802,7 @@ class TestLocalDTensorOps(TestDTensorOps):
        self.run_opinfo_test(dtype, op)

    def test_mean(self):
-        with LocalTensorMode(frozenset(range(0, self.world_size))):
+        with LocalTensorMode(frozenset(range(self.world_size))):
            self.run_mean()

    def test_one_hot(self):
@ -811,7 +811,7 @@ class TestLocalDTensorOps(TestDTensorOps):
    def run_opinfo_test(
        self, dtype, op, requires_grad=True, sample_inputs_filter=lambda s: True
    ):
-        with LocalTensorMode(frozenset(range(0, self.world_size))):
+        with LocalTensorMode(frozenset(range(self.world_size))):
            super().run_opinfo_test(dtype, op, requires_grad, sample_inputs_filter)

    def assertEqualOnRank(self, x, y, msg=None, *, rank=0):
--- a/test/distributed/test_device_mesh.py
+++ b/test/distributed/test_device_mesh.py
@ -536,7 +536,7 @@ class DeviceMeshTestNDim(DTensorTestBase):
        # Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7))
        # and assign the correct shard group to each rank
        shard_rank_lists = (
-            list(range(0, self.world_size // 2)),
+            list(range(self.world_size // 2)),
            list(range(self.world_size // 2, self.world_size)),
        )
        shard_groups = (
--- a/test/distributions/test_distributions.py
+++ b/test/distributions/test_distributions.py
@ -5722,11 +5722,11 @@ class TestKL(DistributionsTestCase):
    def test_kl_multivariate_normal(self):
        set_rng_seed(0)  # see Note [Randomized statistical tests]
        n = 5  # Number of tests for multivariate_normal
-        for i in range(0, n):
-            loc = [torch.randn(4) for _ in range(0, 2)]
+        for i in range(n):
+            loc = [torch.randn(4) for _ in range(2)]
            scale_tril = [
                transform_to(constraints.lower_cholesky)(torch.randn(4, 4))
-                for _ in range(0, 2)
+                for _ in range(2)
            ]
            p = MultivariateNormal(loc=loc[0], scale_tril=scale_tril[0])
            q = MultivariateNormal(loc=loc[1], scale_tril=scale_tril[1])
@ -5755,10 +5755,10 @@ class TestKL(DistributionsTestCase):

    def test_kl_multivariate_normal_batched(self):
        b = 7  # Number of batches
-        loc = [torch.randn(b, 3) for _ in range(0, 2)]
+        loc = [torch.randn(b, 3) for _ in range(2)]
        scale_tril = [
            transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3))
-            for _ in range(0, 2)
+            for _ in range(2)
        ]
        expected_kl = torch.stack(
            [
@ -5766,7 +5766,7 @@ class TestKL(DistributionsTestCase):
                    MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
                    MultivariateNormal(loc[1][i], scale_tril=scale_tril[1][i]),
                )
-                for i in range(0, b)
+                for i in range(b)
            ]
        )
        actual_kl = kl_divergence(
@ -5777,7 +5777,7 @@ class TestKL(DistributionsTestCase):

    def test_kl_multivariate_normal_batched_broadcasted(self):
        b = 7  # Number of batches
-        loc = [torch.randn(b, 3) for _ in range(0, 2)]
+        loc = [torch.randn(b, 3) for _ in range(2)]
        scale_tril = [
            transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3)),
            transform_to(constraints.lower_cholesky)(torch.randn(3, 3)),
@ -5788,7 +5788,7 @@ class TestKL(DistributionsTestCase):
                    MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
                    MultivariateNormal(loc[1][i], scale_tril=scale_tril[1]),
                )
-                for i in range(0, b)
+                for i in range(b)
            ]
        )
        actual_kl = kl_divergence(
@ -5800,15 +5800,15 @@ class TestKL(DistributionsTestCase):
    def test_kl_lowrank_multivariate_normal(self):
        set_rng_seed(0)  # see Note [Randomized statistical tests]
        n = 5  # Number of tests for lowrank_multivariate_normal
-        for i in range(0, n):
-            loc = [torch.randn(4) for _ in range(0, 2)]
-            cov_factor = [torch.randn(4, 3) for _ in range(0, 2)]
+        for i in range(n):
+            loc = [torch.randn(4) for _ in range(2)]
+            cov_factor = [torch.randn(4, 3) for _ in range(2)]
            cov_diag = [
-                transform_to(constraints.positive)(torch.randn(4)) for _ in range(0, 2)
+                transform_to(constraints.positive)(torch.randn(4)) for _ in range(2)
            ]
            covariance_matrix = [
                cov_factor[i].matmul(cov_factor[i].t()) + cov_diag[i].diag()
-                for i in range(0, 2)
+                for i in range(2)
            ]
            p = LowRankMultivariateNormal(loc[0], cov_factor[0], cov_diag[0])
            q = LowRankMultivariateNormal(loc[1], cov_factor[1], cov_diag[1])
@ -5861,10 +5861,10 @@ class TestKL(DistributionsTestCase):

    def test_kl_lowrank_multivariate_normal_batched(self):
        b = 7  # Number of batches
-        loc = [torch.randn(b, 3) for _ in range(0, 2)]
-        cov_factor = [torch.randn(b, 3, 2) for _ in range(0, 2)]
+        loc = [torch.randn(b, 3) for _ in range(2)]
+        cov_factor = [torch.randn(b, 3, 2) for _ in range(2)]
        cov_diag = [
-            transform_to(constraints.positive)(torch.randn(b, 3)) for _ in range(0, 2)
+            transform_to(constraints.positive)(torch.randn(b, 3)) for _ in range(2)
        ]
        expected_kl = torch.stack(
            [
@ -5876,7 +5876,7 @@ class TestKL(DistributionsTestCase):
                        loc[1][i], cov_factor[1][i], cov_diag[1][i]
                    ),
                )
-                for i in range(0, b)
+                for i in range(b)
            ]
        )
        actual_kl = kl_divergence(
--- a/test/dynamo/test_export.py
+++ b/test/dynamo/test_export.py
@ -49,9 +49,9 @@ class ExportTests(torch._dynamo.test_case.TestCase):
            lc_key = state[0]
            lc_val = state[1]
            bar = []
-            for _ in range(0, 4):
+            for _ in range(4):
                bar2 = []
-                for _ in range(0, 3):
+                for _ in range(3):
                    bar2.append(
                        lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
                    )
@ -665,9 +665,9 @@ def forward(self, x, y):
            lc_key = state[0]
            lc_val = state[1]
            bar = []
-            for _ in range(0, 4):
+            for _ in range(4):
                bar2 = []
-                for _ in range(0, 3):
+                for _ in range(3):
                    bar2.append(
                        lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
                    )
--- a/test/dynamo/test_functions.py
+++ b/test/dynamo/test_functions.py
@ -3627,7 +3627,7 @@ class GraphModule(torch.nn.Module):
                )

        test(range(10), slice(1, 10, 2), expected=range(1, 10, 2))
-        test(range(10), slice(None, 10, None), expected=range(0, 10))
+        test(range(10), slice(None, 10, None), expected=range(10))
        test(range(10), slice(-1, 7, None), expected=range(9, 7))
        test(range(10), slice(-1, 7, 2), expected=range(9, 7, 2))
        test(range(1, 10, 2), slice(3, 7, 2), expected=range(7, 11, 4))
--- a/test/dynamo/test_modules.py
+++ b/test/dynamo/test_modules.py
@ -3047,7 +3047,7 @@ class OptimizedModuleTest(torch._dynamo.test_case.TestCase):
        def generate(x, c):
            return mod(x) + c

-        for _ in range(0, 10):
+        for _ in range(10):
            generate(torch.randn(10, 10), 0)
            generate(torch.randn(10, 10), 1)
        self.assertEqual(cnt.frame_count, 2)
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@ -4471,7 +4471,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):

        compiled_fn = torch.compile(func, backend=cnt, fullgraph=True)
        requires_grad = func is not func1
-        for _ in range(0, 5):
+        for _ in range(5):
            # Inputs
            eager_a = torch.ones([6], requires_grad=requires_grad)
            compiled_a = torch.ones([6], requires_grad=requires_grad)
@ -4623,7 +4623,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
        x = torch.rand([2, 2])
        self.assertEqual(opt_fn(x, counter), fn(x, counter))
        self.assertEqual(counter[0], 2)
-        for _ in range(0, 10):
+        for _ in range(10):
            opt_fn(x, counter)
        self.assertEqual(counter[0], 12)
        if torch._dynamo.config.assume_static_by_default:
@ -4784,7 +4784,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
    def test_contains_range_constprop(self):
        def fn(x):
            # dynamo should const prop to False
-            if 3 in range(0, 10):
+            if 3 in range(10):
                return x + 1
            else:
                return x + 2
--- a/test/functorch/test_ac.py
+++ b/test/functorch/test_ac.py
@ -106,7 +106,7 @@ class MemoryBudgetTest(TestCase):
            return f(x, ws)

        _, eager_flops = get_mem_and_flops(call)
-        for budget in range(0, 11):
+        for budget in range(11):
            mem, flops = get_mem_and_flops(call, memory_budget=budget / 10)
            if budget <= 5:
                # We start saving the matmuls
@ -251,7 +251,7 @@ class MemoryBudgetTest(TestCase):
            return f(x, ws)

        expected = call()
-        for budget in range(0, 11):
+        for budget in range(11):
            memory_budget = budget / 10
            torch._dynamo.reset()
            with config.patch(activation_memory_budget=memory_budget):
--- a/test/inductor/test_codecache.py
+++ b/test/inductor/test_codecache.py
@ -1146,7 +1146,7 @@ class TestFxGraphCache(TestCase):
            raise unittest.SkipTest(f"requires {GPU_TYPE}")

        def fn1(x):
-            return x + torch.tensor(list(range(0, 12)), device=device)
+            return x + torch.tensor(list(range(12)), device=device)

        def fn2(x):
            return x + torch.tensor(list(range(1, 13)), device=device)
--- a/test/inductor/test_compiled_autograd.py
+++ b/test/inductor/test_compiled_autograd.py
@ -1599,7 +1599,7 @@ main()

        eager_check()

-        for i in range(0, 5):
+        for i in range(5):
            with compiled_autograd._enable(compiler_fn):
                eager_check()

--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@ -2095,7 +2095,7 @@ class TestMaxAutotune(TestCase):

        # Test loop.
        def test_func2(x):
-            for i in range(0, 10):
+            for i in range(10):
                x = torch.matmul(x, x)
            return x

--- a/test/inductor/test_triton_kernels.py
+++ b/test/inductor/test_triton_kernels.py
@ -3005,7 +3005,7 @@ class MutationTests(torch._inductor.test_case.TestCase):
            mask = offsets < n_elements
            x = tl.load(in_ptr0 + offsets, mask=mask)
            y = tl.load(in_ptr1 + offsets, mask=mask)
-            for i in range(0, BLOCK_SIZE):
+            for i in range(BLOCK_SIZE):
                i = tl.multiple_of(i, 1)
            output = x + y
            tl.store(out_ptr + offsets, output, mask=mask)
@ -3160,7 +3160,7 @@ class MutationTests(torch._inductor.test_case.TestCase):
            x = tl.load(x_block_ptr)

            # Compute gating
-            for c2 in range(0, tl.cdiv(C2, BLOCK_SIZE_C2)):
+            for c2 in range(tl.cdiv(C2, BLOCK_SIZE_C2)):
                # Compute block pointers
                offs_c2 = c2 * BLOCK_SIZE_C2 + tl.arange(0, BLOCK_SIZE_C2)
                o_block_ptr = O_ptr + offs_m[:, None] * C2 + offs_c2[None, :]
--- a/test/jit/xnnpack/test_xnnpack_delegate.py
+++ b/test/jit/xnnpack/test_xnnpack_delegate.py
@ -32,7 +32,7 @@ class TestXNNPackBackend(unittest.TestCase):
            },
        )

-        for _ in range(0, 20):
+        for _ in range(20):
            sample_input = torch.randn(4, 4, 4)
            actual_output = scripted_module(sample_input)
            expected_output = lowered_module(sample_input)
--- a/test/nn/test_convolution.py
+++ b/test/nn/test_convolution.py
@ -1292,7 +1292,7 @@ class TestConvolutionNN(NNTestCase):
            kernel_x = torch.zeros([3, 1, 1, radius * 2 + 1], device=image.device)
            image = torch.nn.functional.conv2d(image, kernel_x, groups=image.shape[-3])

-        for i in range(0, 128):
+        for i in range(128):
            # This should not fail
            reproducer(radius=i)

--- a/test/nn/test_embedding.py
+++ b/test/nn/test_embedding.py
@ -551,7 +551,7 @@ class TestEmbeddingNNDeviceType(NNTestCase):
                # Pull out the bag's indices from indices_1D, and fill any
                # remaining space with padding indices
                indices_in_bag = []
-                for item_pos in range(0, max_indices_per_bag):
+                for item_pos in range(max_indices_per_bag):
                    if (start + item_pos) < end:
                        indices_in_bag.append(indices_1D[start + item_pos])
                    else:
--- a/test/nn/test_multihead_attention.py
+++ b/test/nn/test_multihead_attention.py
@ -485,7 +485,7 @@ class TestMultiheadAttentionNN(NNTestCase):
        )[0]
        output_3d = output_3d.transpose(0, 1)  # [N, T, D]

-        for i in range(0, batch_size):
+        for i in range(batch_size):
            output_2d = mta_model(
                query[i].unsqueeze(0).transpose(0, 1),
                key[i].unsqueeze(0).transpose(0, 1),
--- a/test/nn/test_pooling.py
+++ b/test/nn/test_pooling.py
@ -1135,7 +1135,7 @@ torch.cuda.synchronize()
        for size, kernel_size, stride, dilation, ceil_mode in itertools.product(
            sizes, kernel_sizes, strides, dilations, ceil_modes
        ):
-            padding = random.sample(range(0, math.floor(kernel_size / 2) + 1), 1)
+            padding = random.sample(range(math.floor(kernel_size / 2) + 1), 1)
            check(
                torch.randn(size, device=device, dtype=dtype),
                kernel_size,
--- a/test/onnx/test_onnx_opset.py
+++ b/test/onnx/test_onnx_opset.py
@ -36,12 +36,12 @@ def check_onnx_opset_operator(
    # but the op's attributes can optionally be
    # specified as well
    assert len(ops) == len(graph.node)
-    for i in range(0, len(ops)):
+    for i in range(len(ops)):
        assert graph.node[i].op_type == ops[i]["op_name"]
        if "attributes" in ops[i]:
            attributes = ops[i]["attributes"]
            assert len(attributes) == len(graph.node[i].attribute)
-            for j in range(0, len(attributes)):
+            for j in range(len(attributes)):
                for attribute_field in attributes[j].keys():
                    assert attributes[j][attribute_field] == getattr(
                        graph.node[i].attribute[j], attribute_field
--- a/test/optim/test_lrscheduler.py
+++ b/test/optim/test_lrscheduler.py
@ -1509,7 +1509,7 @@ class TestLRScheduler(TestCase):
            14.0 / 3,
            29.0 / 6,
        ]
-        deltas = [2 * i for i in range(0, 2)]
+        deltas = [2 * i for i in range(2)]
        base_lrs = [1 + delta for delta in deltas]
        max_lrs = [5 + delta for delta in deltas]
        lr_targets = [[x + delta for x in lr_base_target] for delta in deltas]
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@ -1930,7 +1930,7 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
        event_list.table()

    def _check_all_gpu_present(self, gpu_dict, max_gpu_count):
-        for i in range(0, max_gpu_count):
+        for i in range(max_gpu_count):
            self.assertEqual(gpu_dict["GPU " + str(i)], 1)

    # Do json sanity testing. Checks that all events are between profiler start and end
@ -2139,8 +2139,8 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
                        step_helper_funcs.append(event)
            self.assertEqual(len(prof_steps), 5)
            self.assertEqual(len(step_helper_funcs), 5)
-            for i in range(0, len(step_helper_funcs)):
-                for j in range(0, len(step_helper_funcs)):
+            for i in range(len(step_helper_funcs)):
+                for j in range(len(step_helper_funcs)):
                    self.assertTrue(
                        not self._partial_overlap(prof_steps[i], step_helper_funcs[j])
                    )
--- a/test/quantization/core/experimental/test_floatx.py
+++ b/test/quantization/core/experimental/test_floatx.py
@ -275,7 +275,7 @@ class TestFloat8Dtype(TestCase):
        IMO simpler to special case e8m0 here.
        """

-        for biased_exponent in range(0, 256):
+        for biased_exponent in range(256):
            # iterate through all the possible options of guard, round, sticky bits
            # for the current exponent
            for grs in range(8):
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@ -3494,7 +3494,7 @@ class TestIndividualWorkerQueue(TestCase):
            max_num_workers = 1

        for batch_size in (8, 16, 32, 64):
-            for num_workers in range(0, min(6, max_num_workers)):
+            for num_workers in range(min(6, max_num_workers)):
                self._run_ind_worker_queue_test(
                    batch_size=batch_size, num_workers=num_workers + 1
                )
--- a/test/test_datapipe.py
+++ b/test/test_datapipe.py
@ -520,7 +520,7 @@ class TestIterableDataPipeBasic(TestCase):
        self.assertEqual(list(range(9)), list(n))

        # Functional Test: Uneven DataPipes
-        source_numbers = list(range(0, 10)) + [10, 12]
+        source_numbers = list(range(10)) + [10, 12]
        numbers_dp = dp.iter.IterableWrapper(source_numbers)
        n1, n2 = numbers_dp.demux(2, lambda x: x % 2)
        self.assertEqual([0, 2, 4, 6, 8, 10, 12], list(n1))
@ -1257,7 +1257,7 @@ class TestFunctionalIterDataPipe(TestCase):
        )
        output1, output2 = list(dp1), list(dp2)
        self.assertEqual(list(range(5, 10)), output1)
-        self.assertEqual(list(range(0, 5)), output2)
+        self.assertEqual(list(range(5)), output2)

        # Functional Test: values of the same classification are lumped together, and unlimited buffer
        with warnings.catch_warnings(record=True) as wa:
@ -1271,7 +1271,7 @@ class TestFunctionalIterDataPipe(TestCase):
            self.assertRegex(str(wa[-1].message), r"Unlimited buffer size is set")
        output1, output2 = list(dp1), list(dp2)
        self.assertEqual(list(range(5, 10)), output1)
-        self.assertEqual(list(range(0, 5)), output2)
+        self.assertEqual(list(range(5)), output2)

        # Functional Test: classifier returns a value outside of [0, num_instance - 1]
        dp0 = input_dp.demux(num_instances=1, classifier_fn=lambda x: x % 2)
--- a/test/test_dynamic_shapes.py
+++ b/test/test_dynamic_shapes.py
@ -1385,7 +1385,7 @@ class f(torch.nn.Module):
            self.assertEqual(x.storage_offset(), y.storage_offset())

    def test_tensor_factory_with_symint(self):
-        args = list(range(0, 3))
+        args = list(range(3))
        expected = torch.tensor(args)

        shape_env = ShapeEnv()
@ -4291,7 +4291,7 @@ def forward(self, arg0_1: "i64[1][1]cpu", arg1_1: "Sym(u1)", arg2_1: "i64[u1][1]
            start = start.item()
            N = 3
            result = X0[start]
-            for i in range(0, N):
+            for i in range(N):
                result += X0[start + 1 + i]
            return result

--- a/test/test_indexing.py
+++ b/test/test_indexing.py
@ -902,7 +902,7 @@ class TestIndexing(TestCase):
        # Set window size
        W = 10
        # Generate a list of lists, containing overlapping window indices
-        indices = [range(i, i + W) for i in range(0, N - W)]
+        indices = [range(i, i + W) for i in range(N - W)]

        for i in [len(indices), 100, 32]:
            windowed_data = t[indices[:i]]
--- a/test/test_jit.py
+++ b/test/test_jit.py
@ -3153,7 +3153,7 @@ class TestScript(JitTestCase):
            eplan = get_execution_plan(dstate)
            num_bailouts = eplan.code.num_bailouts()

-            for i in range(0, num_bailouts):
+            for i in range(num_bailouts):
                eplan.code.request_bailout(i)
                self.assertEqual(jitted(x), expected)

@ -5950,7 +5950,7 @@ a")
            # type: (int) -> int
            prev = 1
            v = 1
-            for i in range(0, x):
+            for i in range(x):
                save = v
                v = v + prev
                prev = save
@ -10938,7 +10938,7 @@ dedent """

            # Test symbolic differentiation
            # Run Forward and Backward thrice to trigger autodiff graph
-            for i in range(0, 3):
+            for i in range(3):
                y = jit_module(x)
                y.backward(grad)
            x.grad.zero_()
@ -11802,7 +11802,7 @@ dedent """
        def fn_zip_enumerate(x, y):
            # type: (List[int], List[int]) -> int
            sum = 0
-            for (i, (j, v), k) in zip(x, enumerate(y), range(0, 100)):
+            for (i, (j, v), k) in zip(x, enumerate(y), range(100)):
                sum += i * j * v * k

            return sum
--- a/test/test_jit_fuser_te.py
+++ b/test/test_jit_fuser_te.py
@ -243,7 +243,7 @@ class TestTEFuser(JitTestCase):
            return x2.sum()

        with texpr_reductions_enabled():
-            a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
+            a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
            a = a.reshape(5, 3)
            scripted = self.checkScript(func, (a,))
            self.assertLastGraphAllFused()
@ -259,7 +259,7 @@ class TestTEFuser(JitTestCase):
            return x.sum((-2,)) * 2

        with texpr_reductions_enabled():
-            a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
+            a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
            a = a.reshape(5, 3)
            scripted = self.checkScript(func, (a,))
            self.assertLastGraphAllFused()
@ -271,7 +271,7 @@ class TestTEFuser(JitTestCase):
            return x.sum((0,), keepdim=True, dtype=torch.double) * 2

        with texpr_reductions_enabled():
-            a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
+            a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
            a = a.reshape(5, 3)

            self.checkScript(func, (a,))
@ -2234,7 +2234,7 @@ class TestTEFuser(JitTestCase):

        indices = [0, 1, 2, 3]
        sets = []
-        for i in range(0, len(indices) + 1):
+        for i in range(len(indices) + 1):
            for subset in combinations(indices, i):
                sets.append(subset)  # noqa: PERF402

--- a/test/test_matmul_cuda.py
+++ b/test/test_matmul_cuda.py
@ -231,7 +231,7 @@ class TestMatmulCuda(InductorTestCase):
    def test_cublas_addmm_alignment(self, dtype):
        device = 'cuda'
        # perturb X, A, or B alignment
-        for idx in range(0, 3):
+        for idx in range(3):
            for offset in range(1, 3):
                offsets = [0, 0, 0]
                offsets[idx] = offset
--- a/test/test_mps.py
+++ b/test/test_mps.py
@ -1900,7 +1900,7 @@ class TestMPS(TestCaseMPS):
        res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5)
        self.assertEqual(res_mps, res_cpu)

-        for dim in range(0, B_mps.dim()):
+        for dim in range(B_mps.dim()):
            res_mps = torch.linalg.vector_norm(B_mps, ord=3.5, dim=dim)
            res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5, dim=dim)
            self.assertEqual(res_mps, res_cpu)
@ -2871,8 +2871,8 @@ class TestMPS(TestCaseMPS):

    def test_contiguous_slice_2d(self):
        def helper(shape):
-            for i in range(0, shape[0]):
-                for j in range(0, shape[1]):
+            for i in range(shape[0]):
+                for j in range(shape[1]):
                    t_mps = torch.randn(shape, device="mps")
                    t_cpu = t_mps.detach().clone().cpu()

@ -3432,12 +3432,12 @@ class TestMPS(TestCaseMPS):
        elems = torch.arange(n_tensors * n_tensor_elems, dtype=torch.float32)

        tensor_list = []
-        for i in range(0, n_tensors - 1):
+        for i in range(n_tensors - 1):
            # create a list of contiguous view tensors (view tensor created by the slice op)
            t = elems[n_tensor_elems * i : n_tensor_elems * (i + 1)]
            tensor_list.append(t)

-        for i in range(0, n_tensors - 1):
+        for i in range(n_tensors - 1):
            t = tensor_list[i].view(1, n_tensor_elems)
            t_mps = t.to("mps")
            self.assertEqual(t, t_mps.cpu(), f"i={i}")
@ -4942,7 +4942,7 @@ class TestMPS(TestCaseMPS):
            x_mps = fn(torch.zeros(shape, device="mps"), dim=dim)
            self.assertEqual(x_cpu, x_mps.cpu())
        for fn in [torch.any, torch.all]:
-            for dim in range(0, 4):
+            for dim in range(4):
                helper(fn, dim)

        # 6D tensor reductions
@ -9750,7 +9750,7 @@ class TestGatherScatter(TestCaseMPS):
        self.assertEqual(x_cpu, x_mps)

    def test_cast_gather_scatter(self):
-        for _ in range(0, 50):
+        for _ in range(50):
            input = np.random.randint(0, 255, size=(5, 5, 4), dtype=np.uint8)
            with torch.no_grad():
                s = torch.tensor(input, dtype=torch.uint8, device="mps").unsqueeze(0)
--- a/test/test_numa_binding.py
+++ b/test/test_numa_binding.py
@ -549,7 +549,7 @@ class NumaBindingTest(TestCase):
            bound_logical_cpu_indices_0,
            # Gets an extra physical core due to odd number of physical cores on numa node
            # 3 physical cores total, 2 GPUs: GPU 0 gets 2 physical cores (CPUs 0-3)
-            set(range(0, 4)),
+            set(range(4)),
        )

        bound_logical_cpu_indices_1 = (
@ -677,7 +677,7 @@ class NumaBindingTest(TestCase):
            # 1 numa node, 2 L3 caches, 1 physical core per L3 cache = 2 logical CPUs per cache
            # L3 cache 0: CPUs 0-1, L3 cache 1: CPUs 2-3
            # Both have same number of CPUs, so prefer lower cache key (0)
-            set(range(0, 2)),
+            set(range(2)),
        )

    def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None:
@ -709,7 +709,7 @@ class NumaBindingTest(TestCase):
            # GPU 0 has numa node stored as -1, which is treated as numa node 0
            # Each numa node has 1 * 1 * 2 = 2 logical CPUs
            # Numa node 0 has CPUs 0-1
-            set(range(0, 2)),
+            set(range(2)),
        )

    def test_callable_entrypoint_basic(self) -> None:
--- a/test/test_reductions.py
+++ b/test/test_reductions.py
@ -1710,7 +1710,7 @@ class TestReductions(TestCase):
                                            with_extremal=False, atol=None, rtol=None,
                                            exact_dtype=True, with_keepdim=False):
        # Test 0-d to 3-d tensors.
-        for ndims in range(0, 4):
+        for ndims in range(4):
            shape = _rand_shape(ndims, min_size=5, max_size=10)
            for n in range(ndims + 1):
                for c in combinations(list(range(ndims)), n):
@ -2623,7 +2623,7 @@ class TestReductions(TestCase):
        # Generate some random test cases
        ops = ['quantile', 'nanquantile']
        inputs = [tuple(np.random.randint(2, 10, size=i)) for i in range(1, 4)]
-        quantiles = [tuple(np.random.rand(i)) for i in range(0, 5)]
+        quantiles = [tuple(np.random.rand(i)) for i in range(5)]
        keepdims = [True, False]

        # Add corner cases
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@ -295,7 +295,7 @@ class SerializationMixin:
            5,
            6
        ]
-        for i in range(0, 100):
+        for i in range(100):
            data.append(0)
        t = torch.tensor(data, dtype=torch.uint8)

--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@ -5300,7 +5300,7 @@ class TestSparseAny(TestCase):
            x_dense = torch.eye(dense_dim, dtype=dtype, device=device)
            for sparse_dim_in in range(1, dense_dim):
                x_sparse = x_dense.to_sparse(sparse_dim_in)
-                for sparse_dim_out in range(0, dense_dim):
+                for sparse_dim_out in range(dense_dim):
                    if sparse_dim_out == sparse_dim_in:
                        self.assertTrue(x_sparse.to_sparse(sparse_dim_out).sparse_dim() == sparse_dim_out)
                    else:
--- a/test/test_sparse_csr.py
+++ b/test/test_sparse_csr.py
@ -135,7 +135,7 @@ class TestSparseCSRSampler(TestCase):
        index_dtype = torch.int32
        for n_rows in range(1, 10):
            for n_cols in range(1, 10):
-                for nnz in range(0, n_rows * n_cols + 1):
+                for nnz in range(n_rows * n_cols + 1):
                    crow_indices = self._make_crow_indices(
                        n_rows, n_cols, nnz,
                        device=device, dtype=index_dtype)
--- a/test/test_static_runtime.py
+++ b/test/test_static_runtime.py
@ -60,7 +60,7 @@ class MultiHeadAttentionLayer(nn.Module):
 # Taken from https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py
 def create_mlp(ln, sigmoid_layer):
    layers = nn.ModuleList()
-    for i in range(0, len(ln) - 1):
+    for i in range(len(ln) - 1):
        n = ln[i]
        m = ln[i + 1]

--- a/test/test_tensorboard.py
+++ b/test/test_tensorboard.py
@ -200,7 +200,7 @@ class TestTensorBoardPyTorchNumpy(BaseTestCase):
                bucket_counts=counts.tolist(),
            )

-            ints = torch.tensor(range(0, 100)).float()
+            ints = torch.tensor(range(100)).float()
            nbins = 100
            counts = torch.histc(ints, bins=nbins, min=0, max=99)
            limits = torch.tensor(range(nbins))
--- a/test/test_tensorexpr.py
+++ b/test/test_tensorexpr.py
@ -1216,7 +1216,7 @@ class TestTensorExprFuser(BaseTestClass):
        @torch.jit.script
        def test(x: torch.Tensor, y: torch.Tensor, z: int) -> torch.Tensor:
            b = y
-            for i in range(0, z):
+            for i in range(z):
                a = x + y
                b = b + y
            return b
--- a/test/test_torch.py
+++ b/test/test_torch.py
@ -8424,7 +8424,7 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j,  ..., 1.+1.j, 1.+1.j, 1.+1.j],
    def test_Size_iter(self):
        for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]:
            x = torch.Size(sizes)
-            for i in range(0, 5):
+            for i in range(5):
                self.assertEqual(x[i], i + 1)

    def test_t_not_2d_error(self):
--- a/test/test_view_ops.py
+++ b/test/test_view_ops.py
@ -1559,7 +1559,7 @@ class TestOldViewOps(TestCase):
            self.compare_with_numpy(torch_fn, np_fn, x, device=None, dtype=None)

    def _test_atleast_dim(self, torch_fn, np_fn, device, dtype):
-        for ndims in range(0, 5):
+        for ndims in range(5):
            shape = _rand_shape(ndims, min_size=5, max_size=10)
            for _ in range(ndims + 1):
                for with_extremal in [False, True]:
--- a/test/test_xnnpack_integration.py
+++ b/test/test_xnnpack_integration.py
@ -1316,7 +1316,7 @@ class TestXNNPACKConv1dTransformPass(TestCase):
        groups_list = range(1, 3)
        kernel_list = range(1, 4)
        stride_list = range(1, 3)
-        padding_list = range(0, 3)
+        padding_list = range(3)
        dilation_list = range(1, 3)

        for hparams in itertools.product(
@ -1401,7 +1401,7 @@ class TestXNNPACKConv1dTransformPass(TestCase):
        groups_list = range(1, 3)
        kernel_list = range(1, 4)
        stride_list = range(1, 3)
-        padding_list = range(0, 3)
+        padding_list = range(3)
        dilation_list = range(1, 3)
        output_features_list = range(1, 3)

--- a/torch/_decomp/decompositions_for_jvp.py
+++ b/torch/_decomp/decompositions_for_jvp.py
@ -147,7 +147,7 @@ def native_layer_norm_backward(
    inner_dims = input_shape[axis:]
    outer_dims = input_shape[:axis]
    inner_dim_indices = list(range(axis, input_ndim))
-    outer_dim_indices = list(range(0, axis))
+    outer_dim_indices = list(range(axis))

    N = 1
    for i in inner_dims:
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@ -1248,7 +1248,7 @@ def argument_names(
        # signature. Assign names as {varargs}_0, {varargs}_1, ...
        assert fullargspec.varargs is not None, "More arguments than expected"
        input_strs += [
-            f"{fullargspec.varargs}_{i}" for i in range(0, len(args) - len(input_strs))
+            f"{fullargspec.varargs}_{i}" for i in range(len(args) - len(input_strs))
        ]
    elif len(args) < len(fullargspec.args):
        # 3. If there are fewer arguments in `args` than `fullargspec.args`,
@ -1538,7 +1538,7 @@ class FlattenInputOutputSignature(torch.fx.Transformer):
        }

        self.new_args = []
-        for i in range(0, len(flat_args)):
+        for i in range(len(flat_args)):
            arg = super().placeholder(f"arg{i}", (), {})
            if i in matched_input_elements_to_fake:
                arg.node.meta["val"] = matched_input_elements_to_fake[i]
--- a/torch/_inductor/dependencies.py
+++ b/torch/_inductor/dependencies.py
@ -151,7 +151,7 @@ class MemoryDep(Dep):
        stride_to_index = {s: i for i, s in enumerate(self_strides)}
        order = [stride_to_index[s] for s in other_strides]

-        assert OrderedSet(order) == OrderedSet(range(0, self.num_vars))
+        assert OrderedSet(order) == OrderedSet(range(self.num_vars))
        return order

    def get_offset(self) -> sympy.Expr:
--- a/torch/_meta_registrations.py
+++ b/torch/_meta_registrations.py
@ -1787,7 +1787,7 @@ def _padding_check_valid_input(input, padding, *, dim):
        for d in range(1, input_dim):
            valid_batch_mode = valid_batch_mode and input.size(d) != 0
    else:
-        for d in range(0, input_dim):
+        for d in range(input_dim):
            valid_non_batch_mode = valid_non_batch_mode and input.size(d) != 0

    # allow empty batch size but not other dimensions.
--- a/torch/_numpy/_funcs_impl.py
+++ b/torch/_numpy/_funcs_impl.py
@ -1449,7 +1449,7 @@ def rollaxis(a: ArrayLike, axis, start=0):
        # numpy returns a view, here we try returning the tensor itself
        # return tensor[...]
        return a
-    axes = list(range(0, n))
+    axes = list(range(n))
    axes.remove(axis)
    axes.insert(start, axis)
    return a.view(axes)
--- a/torch/_refs/init.py
+++ b/torch/_refs/init.py
@ -4738,7 +4738,7 @@ def transpose(a: TensorLikeType, dim0: int, dim1: int) -> TensorLikeType:
    if a.ndim <= 1 or dim0 == dim1:
        return aten.alias.default(a)

-    _permutation = list(range(0, a.ndim))
+    _permutation = list(range(a.ndim))
    _permutation[_dim0] = _dim1
    _permutation[_dim1] = _dim0
    return torch.permute(a, _permutation)
--- a/torch/_tensor_str.py
+++ b/torch/_tensor_str.py
@ -307,7 +307,7 @@ def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=N
                _tensor_str_with_formatter(
                    self[i], indent + 1, summarize, formatter1, formatter2
                )
-                for i in range(0, PRINT_OPTS.edgeitems)
+                for i in range(PRINT_OPTS.edgeitems)
            ]
            + ["..."]
            + [
@ -322,7 +322,7 @@ def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=N
            _tensor_str_with_formatter(
                self[i], indent + 1, summarize, formatter1, formatter2
            )
-            for i in range(0, self.size(0))
+            for i in range(self.size(0))
        ]

    tensor_str = ("," + "\n" * (dim - 1) + " " * (indent + 1)).join(slices)
@ -406,7 +406,7 @@ def get_summarized_data(self):
    if not PRINT_OPTS.edgeitems:
        return self.new_empty([0] * self.dim())
    elif self.size(0) > 2 * PRINT_OPTS.edgeitems:
-        start = [self[i] for i in range(0, PRINT_OPTS.edgeitems)]
+        start = [self[i] for i in range(PRINT_OPTS.edgeitems)]
        end = [self[i] for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))]
        return torch.stack([get_summarized_data(x) for x in (start + end)])
    else:
--- a/torch/ao/ns/fx/pattern_utils.py
+++ b/torch/ao/ns/fx/pattern_utils.py
@ -28,7 +28,7 @@ def get_type_a_related_to_b(
    for s in base_name_to_sets_of_related_ops.values():
        s_list = list(s)
        # add every bidirectional pair
-        for idx_0 in range(0, len(s_list)):
+        for idx_0 in range(len(s_list)):
            for idx_1 in range(idx_0, len(s_list)):
                type_a_related_to_b.add((s_list[idx_0], s_list[idx_1]))
                type_a_related_to_b.add((s_list[idx_1], s_list[idx_0]))
--- a/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
+++ b/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
@ -158,9 +158,9 @@ class ActivationSparsifier:
                # data should be a list [aggregated over each feature only]
                if data is None:
                    out_data = [
-                        0 for _ in range(0, len(features))
+                        0 for _ in range(len(features))
                    ]  # create one in case of 1st forward
-                    self.state[name]["mask"] = [0 for _ in range(0, len(features))]
+                    self.state[name]["mask"] = [0 for _ in range(len(features))]
                else:
                    out_data = data  # a list

@ -336,7 +336,7 @@ class ActivationSparsifier:
                return input_data * mask
            else:
                # apply per feature, feature_dim
-                for feature_idx in range(0, len(features)):
+                for feature_idx in range(len(features)):
                    feature = (
                        torch.Tensor([features[feature_idx]])
                        .long()
--- a/torch/ao/pruning/_experimental/data_sparsifier/benchmarks/evaluate_disk_savings.py
+++ b/torch/ao/pruning/_experimental/data_sparsifier/benchmarks/evaluate_disk_savings.py
@ -99,7 +99,7 @@ def sparsify_model(path_to_model, sparsified_model_dump_path):
        sparse_block_shapes (List of tuples)
            List of sparse block shapes to be sparsified on
    """
-    sparsity_levels = [sl / 10 for sl in range(0, 10)]
+    sparsity_levels = [sl / 10 for sl in range(10)]
    sparsity_levels += [0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]

    norms = ["L1", "L2"]
--- a/torch/ao/pruning/_experimental/data_sparsifier/lightning/tests/test_callbacks.py
+++ b/torch/ao/pruning/_experimental/data_sparsifier/lightning/tests/test_callbacks.py
@ -299,7 +299,7 @@ class TestTrainingAwareCallback(TestCase):
        self._check_on_train_start(pl_module, callback, sparsifier_args, scheduler_args)

        num_epochs = 5
-        for _ in range(0, num_epochs):
+        for _ in range(num_epochs):
            self._check_on_train_epoch_start(pl_module, callback)
            self._simulate_update_param_model(pl_module)
            self._check_on_train_epoch_end(pl_module, callback)
--- a/torch/ao/pruning/sparsifier/nearly_diagonal_sparsifier.py
+++ b/torch/ao/pruning/sparsifier/nearly_diagonal_sparsifier.py
@ -53,7 +53,7 @@ class NearlyDiagonalSparsifier(base_sparsifier.BaseSparsifier):
                "nearliness cannot be larger than the dimensions of tensor."
            )

-        for row in range(0, height):
+        for row in range(height):
            # Bounds of entries that needs to be set to 1
            low = max(0, row - dist_to_diagonal)
            high = min(width, row + dist_to_diagonal + 1)
--- a/torch/ao/quantization/experimental/observer.py
+++ b/torch/ao/quantization/experimental/observer.py
@ -68,10 +68,10 @@ class APoTObserver(ObserverBase):
        p_all = []

        # create levels
-        for i in range(0, self.n):
+        for i in range(self.n):
            p_curr = torch.tensor([0])

-            for j in range(0, (2**self.k - 2) + 1):
+            for j in range((2**self.k - 2) + 1):
                curr_ele = 2 ** (-(i + j * self.n))
                p_append = torch.tensor([curr_ele])
                p_curr = torch.cat((p_curr, p_append))
--- a/torch/ao/quantization/fx/_decomposed.py
+++ b/torch/ao/quantization/fx/_decomposed.py
@ -1159,7 +1159,7 @@ class FakeQuantPerChannel(torch.autograd.Function):
            f"Expecting input to have dtype torch.float32, but got dtype: {input.dtype}"
        )
        assert axis < input.dim(), f"Expecting axis to be < {input.dim()}"
-        broadcast_dims = list(range(0, axis)) + list(range(axis + 1, input.ndim))
+        broadcast_dims = list(range(axis)) + list(range(axis + 1, input.ndim))
        unsqueeze_scales = _unsqueeze_multiple(scales, broadcast_dims)
        unsqueeze_zero_points = _unsqueeze_multiple(zero_points, broadcast_dims)
        temp = torch.round(input * (1.0 / unsqueeze_scales)) + unsqueeze_zero_points
--- a/torch/autograd/profiler.py
+++ b/torch/autograd/profiler.py
@ -1212,7 +1212,7 @@ class KinetoStepTracker:
                    "Profiler step count has increased more than 1 - "
                    f"current_step = {cls._current_step} step dict =  {cls._step_dict}"
                )
-            for _ in range(0, delta):
+            for _ in range(delta):
                _kineto_step()
            cls._current_step = new_step
        return cls._current_step
--- a/torch/distributed/_pycute/layout.py
+++ b/torch/distributed/_pycute/layout.py
@ -162,7 +162,7 @@ def coalesce(layout: Layout, profile: LayoutProfile = None) -> Layout:
        assert len(layout) >= len(profile)
        return make_layout(
            chain(
-                (coalesce(layout[i], profile[i]) for i in range(0, len(profile))),  # type: ignore[arg-type]
+                (coalesce(layout[i], profile[i]) for i in range(len(profile))),  # type: ignore[arg-type]
                (layout[i] for i in range(len(profile), len(layout))),
            )
        )
@ -203,7 +203,7 @@ def filter(layout: Layout, profile: LayoutProfile = None) -> Layout:
        assert len(layout) >= len(profile)
        return make_layout(
            chain(
-                (filter(layout[i], profile[i]) for i in range(0, len(profile))),  # type: ignore[arg-type]
+                (filter(layout[i], profile[i]) for i in range(len(profile))),  # type: ignore[arg-type]
                (layout[i] for i in range(len(profile), len(layout))),
            )
        )
@ -233,7 +233,7 @@ def composition(layoutA: Layout, layoutB: LayoutInput) -> Layout:
        assert len(layoutA) >= len(layoutB)
        return make_layout(
            chain(
-                (composition(layoutA[i], layoutB[i]) for i in range(0, len(layoutB))),  # type: ignore[arg-type]
+                (composition(layoutA[i], layoutB[i]) for i in range(len(layoutB))),  # type: ignore[arg-type]
                (layoutA[i] for i in range(len(layoutB), len(layoutA))),
            )
        )
@ -371,7 +371,7 @@ def logical_divide(layoutA: Layout, layoutB: LayoutInput) -> Layout:
            chain(
                (
                    logical_divide(layoutA[i], layoutB[i])  # type: ignore[arg-type]
-                    for i in range(0, len(layoutB))
+                    for i in range(len(layoutB))
                ),
                (layoutA[i] for i in range(len(layoutB), len(layoutA))),
            )
@ -396,7 +396,7 @@ def logical_product(layoutA: Layout, layoutB: LayoutInput) -> Layout:
            chain(
                (
                    logical_product(layoutA[i], layoutB[i])  # type: ignore[arg-type]
-                    for i in range(0, len(layoutB))
+                    for i in range(len(layoutB))
                ),
                (layoutA[i] for i in range(len(layoutB), len(layoutA))),
            )
@ -421,14 +421,14 @@ def hier_unzip(
        # A layout with shape ((A,a),(B,b),(C,c))
        split = make_layout(
            hier_unzip(splitter, layoutA[i], layoutB[i])  # type: ignore[arg-type]
-            for i in range(0, len(layoutB))
+            for i in range(len(layoutB))
        )
        # Gather to shape ((A,B,C,...),(a,b,c,...,y,z))
        return make_layout(
-            make_layout(split[i][0] for i in range(0, len(layoutB))),  # type: ignore[arg-type]
+            make_layout(split[i][0] for i in range(len(layoutB))),  # type: ignore[arg-type]
            make_layout(
                chain(  # type: ignore[arg-type]
-                    (split[i][1] for i in range(0, len(layoutB))),
+                    (split[i][1] for i in range(len(layoutB))),
                    (layoutA[i] for i in range(len(layoutB), len(layoutA))),
                )
            ),
--- a/torch/distributed/_symmetric_memory/init.py
+++ b/torch/distributed/_symmetric_memory/init.py
@ -1671,7 +1671,7 @@ def _low_contention_all_gather(
            local_buf.copy_(tensor)
        # pull
        symm_mem.barrier()
-        for step in range(0, world_size):
+        for step in range(world_size):
            remote_rank = (rank - step) % world_size
            src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype)
            chunks[remote_rank].copy_(src_buf)
@ -1706,7 +1706,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input(
    with _get_backend_stream():
        # pull + offline reduction
        symm_mem.barrier()
-        for step in range(0, world_size):
+        for step in range(world_size):
            remote_rank = (rank - step) % world_size
            src_buf = symm_mem.get_buffer(
                remote_rank,
@ -1743,7 +1743,7 @@ def _low_contention_reduce_scatter_with_workspace(
    with _get_backend_stream():
        # push + offline reduction
        workspace.barrier()
-        for step in range(0, world_size):
+        for step in range(world_size):
            remote_rank = (rank - step) % world_size
            dst_buf = workspace.get_buffer(
                remote_rank, chunks[0].shape, chunks[0].dtype, chunks[0].numel() * rank
--- a/torch/distributed/elastic/multiprocessing/api.py
+++ b/torch/distributed/elastic/multiprocessing/api.py
@ -727,7 +727,7 @@ class MultiprocessContext(PContext):
            # pipe. Hence to prevent deadlocks on large return values,
            # we opportunistically try queue.get on each join call
            # See: https://docs.python.org/2/library/multiprocessing.html#all-platforms
-            for local_rank in range(0, self.nprocs):
+            for local_rank in range(self.nprocs):
                return_queue = self._ret_vals[local_rank]
                if not return_queue.empty():
                    # save the return values temporarily into a member var
--- a/torch/distributed/elastic/timer/local_timer.py
+++ b/torch/distributed/elastic/timer/local_timer.py
@ -59,7 +59,7 @@ class MultiprocessingRequestQueue(RequestQueue):
    def get(self, size, timeout: float) -> list[TimerRequest]:
        requests = []
        wait = timeout
-        for _ in range(0, size):
+        for _ in range(size):
            start = time.time()

            try:
--- a/torch/distributed/tensor/_dtensor_spec.py
+++ b/torch/distributed/tensor/_dtensor_spec.py
@ -107,7 +107,7 @@ class DTensorSpec:
        # follow default left-to-right device order if shard_order is not specified
        tensor_dim_to_mesh_dims: defaultdict[int, list[int]] = defaultdict(list)
        mesh_ndim = len(placements)
-        for mesh_dim in range(0, mesh_ndim):
+        for mesh_dim in range(mesh_ndim):
            # shard_order doesn't work with _StridedShard
            if isinstance(placements[mesh_dim], _StridedShard):
                return ()
--- a/torch/distributed/tensor/parallel/fsdp.py
+++ b/torch/distributed/tensor/parallel/fsdp.py
@ -306,7 +306,7 @@ def _all_gather_dtensor(
    placements = list(copy.deepcopy(tensor.placements))
    # FSDP + TP: [Shard(0), tp_placement] -> [Replicate(), tp_placement]
    # HSDP + TP: [Replicate(), Shard(0), tp_placement] -> [Replicate(), Replicate(), tp_placement]
-    for i in range(0, len(placements) - 1):
+    for i in range(len(placements) - 1):
        placements[i] = Replicate()
    tensor = tensor.redistribute(
        device_mesh=tensor.device_mesh,
--- a/torch/nested/_internal/ops.py
+++ b/torch/nested/_internal/ops.py
@ -1112,7 +1112,7 @@ def chunk_default(func, *args, **kwargs):
        # the input number; it can be counter-intuitive, but it matches dense behavior.
        return [
            NestedTensor(values=chunk_values[i], **(nested_kwargs[i]))
-            for i in range(0, len(chunk_values))
+            for i in range(len(chunk_values))
        ]
    else:
        return [
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_helper.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_helper.py
@ -1005,7 +1005,7 @@ def _interpolate_size_to_scales(g: jit_utils.GraphContext, input, output_size, d
            if i < 2
            else float(output_size[-(dim - i)])
            / float(input.type().sizes()[-(dim - i)])
-            for i in range(0, dim)
+            for i in range(dim)
        ]
        scales = g.op(
            "Constant", value_t=torch.tensor(scales_constant, dtype=torch.float32)
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset12.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset12.py
@ -331,7 +331,7 @@ def unfold(g: jit_utils.GraphContext, input, dimension, size, step):

        ndim = symbolic_helper._get_tensor_rank(input)
        assert ndim is not None
-        perm = list(range(0, ndim))
+        perm = list(range(ndim))
        perm.append(perm.pop(dimension))

        unsqueeze_list = []
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset8.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset8.py
@ -116,7 +116,7 @@ def _interpolate(name, dim, interpolate_mode):
                if i < 2
                else float(output_size[-(dim - i)])
                / float(input.type().sizes()[-(dim - i)])
-                for i in range(0, dim)
+                for i in range(dim)
            ]
        return g.op("Upsample", input, mode_s=interpolate_mode, scales_f=scales)

--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
@ -840,7 +840,7 @@ def t(g: jit_utils.GraphContext, self):
 def numpy_T(g: jit_utils.GraphContext, input):
    ndim = symbolic_helper._get_tensor_rank(input)
    assert ndim is not None
-    perm = list(reversed(range(0, ndim)))
+    perm = list(reversed(range(ndim)))
    return g.op("Transpose", input, perm_i=perm)


@ -990,7 +990,7 @@ def transpose(g: jit_utils.GraphContext, self, dim0, dim1):
@_onnx_symbolic("aten::permute")
@symbolic_helper.parse_args("v", "is")
 def permute(g: jit_utils.GraphContext, self, dims):
-    if dims == list(range(0, len(dims))):
+    if dims == list(range(len(dims))):
        return self
    return g.op("Transpose", self, perm_i=dims)

@ -1368,7 +1368,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
        )
    ceiled_output_dim = [
        math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])) + 1
-        for i in range(0, len(padding))
+        for i in range(len(padding))
    ]
    # ensure last pooling starts inside
    ceiled_output_dim = [
@ -1377,7 +1377,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
            if (((ceiled_output_dim[i] - 1) * stride[i]) >= (dim[i] + padding[i]))
            else ceiled_output_dim[i]
        )
-        for i in range(0, len(ceiled_output_dim))
+        for i in range(len(ceiled_output_dim))
    ]
    padding_ceil = [
        (
@ -1392,7 +1392,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
                )
            )
        )
-        for i in range(0, len(padding))
+        for i in range(len(padding))
    ]
    # ensure padding is not > kernel_size
    padding_ceil = [
@ -1405,7 +1405,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
            if ((padding_ceil[i] + 2 * padding[i]) >= (kernel_size[i]))
            else int(padding_ceil[i])
        )
-        for i in range(0, len(padding_ceil))
+        for i in range(len(padding_ceil))
    ]
    return padding_ceil

@ -1697,14 +1697,14 @@ def _adaptive_pool(name, type, tuple_fn, fn=None):
                name, "input size not accessible", input
            )
        # verify if output size % input size = 0 for all dim
-        mod = [dim[i] % output_size[i] for i in range(0, len(dim))]
+        mod = [dim[i] % output_size[i] for i in range(len(dim))]
        if mod != [0] * len(mod):
            if output_size == [1] * len(output_size):
                return g.op("GlobalMaxPool", input), None
            return symbolic_helper._unimplemented(
                name, "output size that are not factor of input size", output_size_value
            )
-        k = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]
+        k = [int(dim[i] / output_size[i]) for i in range(len(dim))]
        # call max_poolxd_with_indices to get indices in the output
        if type == "MaxPool":
            # pyrefly: ignore  # not-callable
@ -2906,7 +2906,7 @@ def unfold(g: jit_utils.GraphContext, input, dimension, size, step):
            for low, hi in zip(low_indices, hi_indices)
        ]
        ndim = len(sizes)
-        perm = list(range(0, ndim))
+        perm = list(range(ndim))
        perm.append(perm.pop(dimension))
        unsqueeze = [
            symbolic_helper._unsqueeze_helper(
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@ -11615,7 +11615,7 @@ def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=Fal
        # numpy searchsorted only supports 1D inputs so we split up ND inputs
        orig_shape = boundary.shape
        num_splits = np.prod(sorted_sequence.shape[:-1])
-        splits = range(0, num_splits)
+        splits = range(num_splits)
        sorted_sequence, boundary = sorted_sequence.reshape(num_splits, -1), boundary.reshape(num_splits, -1)
        if sorter is not None:
            sorter = sorter.reshape(num_splits, -1)
@ -16258,7 +16258,7 @@ op_db: list[OpInfo] = [
        aten_backward_name='_prelu_kernel_backward',
        ref=lambda x, weight:
            np.maximum(0., x) + np.minimum(0., x) *
-            (weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(0, x.ndim)])),
+            (weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(x.ndim)])),
        dtypes=floating_types_and(torch.bfloat16, torch.float16),
        supports_forward_ad=True,
        supports_fwgrad_bwgrad=True,
--- a/torch/testing/_internal/common_nn.py
+++ b/torch/testing/_internal/common_nn.py
@ -2896,7 +2896,7 @@ def _multilabelmarginloss_reference(input, target):

    sum = 0
    for target_index in targets:
-        for i in range(0, len(input)):
+        for i in range(len(input)):
            if i not in targets:
                sum += max(0, 1 - input[target_index] + input[i])

@ -2914,7 +2914,7 @@ def multilabelmarginloss_reference(input, target, reduction='mean'):
    n = input.size(0)
    dim = input.size(1)
    output = input.new(n).zero_()
-    for i in range(0, n):
+    for i in range(n):
        output[i] = _multilabelmarginloss_reference(input[i], target[i])

    if reduction == 'mean':
@ -2955,7 +2955,7 @@ def _multimarginloss_reference(input, target_idx, p, margin, weight):
        weight = input.new(len(input)).fill_(1)

    output = 0
-    for i in range(0, len(input)):
+    for i in range(len(input)):
        if i != target_idx:
            output += weight[target_idx] * (max(0, (margin - input[target_idx] + input[i])) ** p)
    return output
@ -2972,7 +2972,7 @@ def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reducti
    n = input.size(0)
    dim = input.size(1)
    output = input.new(n)
-    for x in range(0, n):
+    for x in range(n):
        output[x] = _multimarginloss_reference(input[x], target[x], p, margin, weight)

    if reduction == 'mean':
@ -2987,7 +2987,7 @@ def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reducti
 def cosineembeddingloss_reference(input1, input2, target, margin=0, reduction='mean'):
    def _cos(a, b):
        cos = a.new(a.size(0))
-        for i in range(0, a.size(0)):
+        for i in range(a.size(0)):
            cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5)
        return cos

--- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py
+++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py
@ -705,7 +705,7 @@ class LocalDTensorTestBase(DTensorTestBase):
        self.skipTest(msg)

    def _get_local_tensor_mode(self):
-        return LocalTensorMode(frozenset(range(0, self.world_size)))
+        return LocalTensorMode(frozenset(range(self.world_size)))

    def setUp(self) -> None:
        super().setUp()
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@ -658,13 +658,13 @@ class DistributedTest:
            return (group, group_id, rank)

        def _init_full_group_test(self, **kwargs):
-            group = list(range(0, dist.get_world_size()))
+            group = list(range(dist.get_world_size()))
            group_id = dist.new_group(**kwargs)
            rank = dist.get_rank()
            return (group, group_id, rank)

        def _init_global_test(self):
-            group = list(range(0, dist.get_world_size()))
+            group = list(range(dist.get_world_size()))
            group_id = dist.group.WORLD
            rank = dist.get_rank()
            return (group, group_id, rank)
@ -1114,7 +1114,7 @@ class DistributedTest:
                averager = averagers.PeriodicModelAverager(
                    period=period, warmup_steps=warmup_steps
                )
-                for step in range(0, 20):
+                for step in range(20):
                    # Reset the parameters at every step.
                    param.data = copy.deepcopy(tensor)
                    for params in model.parameters():
@ -1143,7 +1143,7 @@ class DistributedTest:
                averager = averagers.PeriodicModelAverager(
                    period=period, warmup_steps=warmup_steps
                )
-                for step in range(0, 20):
+                for step in range(20):
                    # Reset the parameters at every step.
                    for param_group in opt.param_groups:
                        for params in param_group["params"]:
@ -1203,7 +1203,7 @@ class DistributedTest:
                averager = averagers.PeriodicModelAverager(
                    period=period, warmup_steps=warmup_steps
                )
-                for step in range(0, 20):
+                for step in range(20):
                    # Reset the parameters at every step.
                    param.data = copy.deepcopy(tensor)
                    for params in model.parameters():
@ -1284,7 +1284,7 @@ class DistributedTest:
            expected_global_avg_tensor = (
                torch.ones_like(param.data) * sum(range(world_size)) / world_size
            )
-            for step in range(0, 25):
+            for step in range(25):
                # Reset the parameters at every step.
                param.data = copy.deepcopy(tensor)
                for params in model.parameters():
@ -1390,7 +1390,7 @@ class DistributedTest:

            for val in ["1", "0"]:
                os.environ["TORCH_NCCL_BLOCKING_WAIT"] = val
-                for src in range(0, world_size):
+                for src in range(world_size):
                    send_tensor = _build_tensor(rank + 1, device_id=device_id).fill_(
                        src
                    )
@ -1409,7 +1409,7 @@ class DistributedTest:
                for req in reqs:
                    req.wait()

-                for src in range(0, world_size):
+                for src in range(world_size):
                    self.assertEqual(recv_tensors[src], expected_tensors[src])

            self._barrier()
@ -1505,7 +1505,7 @@ class DistributedTest:
            rank = dist.get_rank()
            p2p_op_list = []

-            for src in range(0, dist.get_world_size()):
+            for src in range(dist.get_world_size()):
                if src == rank:
                    continue
                send_tensor = _build_tensor(rank + 1)
@ -1528,7 +1528,7 @@ class DistributedTest:
            rank = dist.get_rank()
            p2p_op_list = []

-            for src in range(0, dist.get_world_size()):
+            for src in range(dist.get_world_size()):
                if src == rank:
                    continue
                send_tensor = _build_tensor(rank + 1)
@ -1602,10 +1602,10 @@ class DistributedTest:
            tensor = _build_tensor(rank + 1, device_id=device_id)
            profiler_cls = profiler_ctx if profiler_ctx is not None else nullcontext()
            with profiler_cls as prof:
-                for src in range(0, world_size):
+                for src in range(world_size):
                    if src == rank:
                        # Send mode
-                        for dst in range(0, world_size):
+                        for dst in range(world_size):
                            if dst == rank:
                                continue
                            dist.send(tensor, dst)
@ -1674,10 +1674,10 @@ class DistributedTest:
            tensor = _build_tensor(send_size)
            ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
            with ctx as prof:
-                for src in range(0, dist.get_world_size()):
+                for src in range(dist.get_world_size()):
                    if src == rank:
                        # Send mode
-                        for dst in range(0, dist.get_world_size()):
+                        for dst in range(dist.get_world_size()):
                            if dst == rank:
                                continue
                            dist.send(tensor, dst)
@ -1742,10 +1742,10 @@ class DistributedTest:

            ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
            with ctx as prof:
-                for dst in range(0, dist.get_world_size()):
+                for dst in range(dist.get_world_size()):
                    if dst == rank:
                        # Recv mode
-                        for dst in range(0, dist.get_world_size()):
+                        for dst in range(dist.get_world_size()):
                            if dst == rank:
                                continue

@ -1846,10 +1846,10 @@ class DistributedTest:
            tensor = _build_tensor(send_recv_size, value=rank)
            ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
            with ctx as prof:
-                for dst in range(0, world_size):
+                for dst in range(world_size):
                    if dst == rank:
                        # Recv mode
-                        for src in range(0, world_size):
+                        for src in range(world_size):
                            if src == rank:
                                continue
                            output_tensor = _build_tensor(send_recv_size, value=-1)
@ -7480,7 +7480,7 @@ class DistributedTest:
                for baseline_iter in baseline_num_iters:
                    for offset in iteration_offsets:
                        mapping = dict.fromkeys(
-                            range(0, num_early_join_ranks), baseline_iter
+                            range(num_early_join_ranks), baseline_iter
                        )
                        # if num_early_join_ranks > 1, ranks > 0 that will join early
                        # iterate offset//2 more times than rank 0, to test nodes
--- a/torch/testing/_internal/distributed/multi_threaded_pg.py
+++ b/torch/testing/_internal/distributed/multi_threaded_pg.py
@ -166,7 +166,7 @@ class AllReduce:
            # collect all data to the list and make them
            # all on rank 0 device
            tensors = [
-                data[src_rank][i].to(rank_0_device) for src_rank in range(0, len(data))
+                data[src_rank][i].to(rank_0_device) for src_rank in range(len(data))
            ]

            # now mimic reduce across all ranks
--- a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
+++ b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
@ -266,7 +266,7 @@ class CommonDistAutogradTest(RpcAgentTestFixture):
        grads = dist_autograd.get_gradients(context_id)
        nargs = len(args)
        ngrads = 0
-        for i in range(0, nargs):
+        for i in range(nargs):
            if local_grads[i] is not None:
                self.assertIn(args[i], grads)
                self.assertEqual(local_grads[i], grads[args[i]])
@ -1973,7 +1973,7 @@ class DistAutogradTest(CommonDistAutogradTest):
        DistAutogradTest._test_clean_context_backward_context_id = context_id

        # Send the context id to all nodes.
-        for i in range(0, self.world_size):
+        for i in range(self.world_size):
            if i != self.rank:
                rank_distance = (i - self.rank + self.world_size) % self.world_size
                rpc.rpc_sync(
@ -1988,7 +1988,7 @@ class DistAutogradTest(CommonDistAutogradTest):
        self.assertEqual(self.world_size - 1, len(known_context_ids))

        t1 = torch.rand((3, 3), requires_grad=True)
-        for i in range(0, 100):
+        for i in range(100):
            dst = self._next_rank()
            t1 = rpc.rpc_sync(worker_name(dst), torch.add, args=(t1, t1))

--- a/torch/testing/_internal/distributed/rpc/rpc_test.py
+++ b/torch/testing/_internal/distributed/rpc/rpc_test.py
@ -1818,7 +1818,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
        # Spawn multiple threads that send RPCs to ensure keys are correctly
        # prefixed when there are multiple RPCs being created/in flight at the
        # same time.
-        dst_ranks = [rank for rank in range(0, self.world_size) if rank != self.rank]
+        dst_ranks = [rank for rank in range(self.world_size) if rank != self.rank]

        def rpc_with_profiling(dst_worker):
            with _profile() as prof:
@ -1884,7 +1884,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
        if self.rank != 1:
            return

-        dst_ranks = [rank for rank in range(0, self.world_size) if rank != self.rank]
+        dst_ranks = [rank for rank in range(self.world_size) if rank != self.rank]
        for dst in dst_ranks:
            dst_worker = worker_name(dst)
            with _profile() as prof:
--- a/torch/testing/_internal/jit_utils.py
+++ b/torch/testing/_internal/jit_utils.py
@ -439,7 +439,7 @@ class JitTestCase(JitCommonTestCase):
        state = model.get_debug_state()
        plan = get_execution_plan(state)
        num_bailouts = plan.code.num_bailouts()
-        for i in range(0, num_bailouts):
+        for i in range(num_bailouts):
            plan.code.request_bailout(i)
            bailout_outputs = model(*inputs)
            self.assertEqual(bailout_outputs, expected)
--- a/torch/testing/_internal/triton_utils.py
+++ b/torch/testing/_internal/triton_utils.py
@ -912,7 +912,7 @@ if has_triton():
        b_ptrs = b_ptr + (offs_k[:, None] + offs_bn[None, :])

        accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
-        for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
+        for k in range(tl.cdiv(K, BLOCK_SIZE_K)):
            a = tl.load(a_ptrs, mask=offs_k[None, :] < K - k * BLOCK_SIZE_K, other=0.0)
            b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0)
            accumulator = tl.dot(a, b, accumulator)