diff --git a/test/conftest.py b/test/conftest.py
index 078e4b3b2b8..de5818bda8f 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -238,7 +238,7 @@ def pytest_pycollect_makemodule(module_path, path, parent) -> Module:
 
 @pytest.hookimpl(hookwrapper=True)
 def pytest_report_teststatus(report, config):
-    # Add the test time to the verbose output, unforunately I don't think this
+    # Add the test time to the verbose output, unfortunately I don't think this
     # includes setup or teardown
     pluggy_result = yield
     if not isinstance(report, pytest.TestReport):
diff --git a/test/cpp/api/autograd.cpp b/test/cpp/api/autograd.cpp
index b7e75acb659..b3d47b71e9a 100644
--- a/test/cpp/api/autograd.cpp
+++ b/test/cpp/api/autograd.cpp
@@ -584,7 +584,7 @@ TEST(CustomAutogradTest, MarkDirty) {
     }
   };
 
-  // Clone here because modifying leafs inplace is not allowed
+  // Clone here because modifying leaves inplace is not allowed
   auto x = torch::randn({5, 5}, torch::requires_grad()).clone();
   auto version_before = x._version();
   auto out = MyFunction::apply(x);
diff --git a/test/cpp/api/parallel.cpp b/test/cpp/api/parallel.cpp
index 1ec7c463a59..0dab78c3a28 100644
--- a/test/cpp/api/parallel.cpp
+++ b/test/cpp/api/parallel.cpp
@@ -264,7 +264,7 @@ TEST_F(ParallelTest, DataParallelNumericalEquivalence_MultiCUDA) {
     input += i;
     input_dp += i;
 
-    // non-prallel training
+    // non-parallel training
     torch::optim::SGD optim(model->parameters(), torch::optim::SGDOptions(0.1));
     auto output = model->forward(input);
     auto loss = torch::mse_loss(output, torch::zeros_like(output));
diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/README.md b/test/cpp_extensions/open_registration_extension/torch_openreg/README.md
index 3fcc0f1c211..422aa4c8da2 100644
--- a/test/cpp_extensions/open_registration_extension/torch_openreg/README.md
+++ b/test/cpp_extensions/open_registration_extension/torch_openreg/README.md
@@ -188,7 +188,7 @@ Please refer to [this](https://docs.pytorch.org/docs/main/accelerator/index.html
   - Device-agnostic APIs
   - Memory Management
   - Generator
-  - Distrubuted
+  - Distributed
   - Custom Tensor&Storage
   - ...
 - **Improve Tests**: Add more test cases related to the integration mechanism.
diff --git a/test/distributed/checkpoint/test_planner.py b/test/distributed/checkpoint/test_planner.py
index 86bed29de99..16f7089206e 100644
--- a/test/distributed/checkpoint/test_planner.py
+++ b/test/distributed/checkpoint/test_planner.py
@@ -216,7 +216,7 @@ class TestSavePlan(TestCase):
         # Number of plans should remain unchanged
         self.assertEqual(len(all_plans), len(deduped_plans))
 
-        # Numer of items in the deduped plans should be less than the original plans
+        # Number of items in the deduped plans should be less than the original plans
         for new_plan, old_plan in zip(deduped_plans, all_plans):
             self.assertFalse(_compare_save_plans(new_plan, old_plan))
             self.assertTrue(len(new_plan.items) < len(old_plan.items))
diff --git a/test/distributed/tensor/test_attention.py b/test/distributed/tensor/test_attention.py
index 66d80f60455..64d86ba3c12 100644
--- a/test/distributed/tensor/test_attention.py
+++ b/test/distributed/tensor/test_attention.py
@@ -158,7 +158,7 @@ class RingAttentionTest(DTensorTestBase):
             # parameters because when require_grad is True, resize_ is not
             # allowed. But requires_grad of cp_q, cp_k, and cp_v are False
             # now. So we can just use context_parallel() to shard q, k, v.
-            # In reality, context_paralle() should be used to shard the input.
+            # In reality, context_parallel() should be used to shard the input.
             # In reality, context_parallel() should only be used to shard
             # the model inputs (batch).
 
@@ -701,7 +701,7 @@ class CPFlexAttentionTest(DTensorTestBase):
         )
 
         # TODO: change this for-loop to run_subtests
-        # Use a for-loop instead of run_subtests because we need to intialize the mask
+        # Use a for-loop instead of run_subtests because we need to initialize the mask
         # for each subtest. This can be baked into self._test_cp_flex_attention as
         # a str argument denoting mask type.
         for batch_size, max_seq_len, lb_type in itertools.product(
diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py
index c74f8143cc8..c117bc810b1 100644
--- a/test/distributed/test_c10d_nccl.py
+++ b/test/distributed/test_c10d_nccl.py
@@ -4901,7 +4901,7 @@ class NCCLTraceTest(NCCLTraceTestBase):
             for p2p_op_idx, input_sizes in zip(
                 range(first_op, coalesced_op, 1), op_sizes_per_coalesce
             ):
-                # the indivudal ops inside the coalescing group the individual op metadata,
+                # the individual ops inside the coalescing group the individual op metadata,
                 # but not the timing info coming from the actual coalesced kernel
                 profiling_name = (
                     "nccl:recv 0<-1" if self.rank == 0 else "nccl:send 1->0"
diff --git a/test/distributed/test_nvshmem.py b/test/distributed/test_nvshmem.py
index 8cee8b2a046..8c6d40ced07 100644
--- a/test/distributed/test_nvshmem.py
+++ b/test/distributed/test_nvshmem.py
@@ -398,7 +398,7 @@ class NVSHMEMAll2AllTest(MultiProcContinuousTest):
             nsplits, dtype=torch.int64, device=self.device
         ).copy_(inp_splits)
         # 2 rows: output splits, output offsets
-        # Initiallizing all values to -1 to check if they are updated
+        # Initializing all values to -1 to check if they are updated
         out_splits_offsets = symm_mem.empty(
             (2, nsplits), dtype=torch.int64, device=self.device
         ).fill_(-1)
@@ -503,7 +503,7 @@ class NVSHMEMAll2AllTest(MultiProcContinuousTest):
             (2, nsplits), dtype=torch.int64, device=self.device
         )
         # 2 rows: output splits, output offsets
-        # Initiallizing all values to -1 to check if they are updated
+        # Initializing all values to -1 to check if they are updated
         out_splits_offsets = symm_mem.empty(
             (2, nsplits), dtype=torch.int64, device=self.device
         ).fill_(-1)
@@ -617,7 +617,7 @@ def dispatch_then_combine(device, align: int, group) -> None:
         inp_splits
     )
     # 2 rows: output splits, output offsets
-    # Initiallizing all values to -1 to check if they are updated
+    # Initializing all values to -1 to check if they are updated
     out_splits_offsets = symm_mem.empty(
         (2, nsplits), dtype=torch.int64, device=device
     ).fill_(-1)
@@ -625,7 +625,7 @@ def dispatch_then_combine(device, align: int, group) -> None:
     # Buffers for combine
     combine_out = symm_mem.empty(max_out_numel, dtype=dtype, device=device).fill_(-1)
     # 2 rows: output splits, output offsets
-    # Initiallizing all values to -1 to check if they are updated
+    # Initializing all values to -1 to check if they are updated
     combine_out_splits_offsets = symm_mem.empty(
         (2, nsplits), dtype=torch.int64, device=device
     ).fill_(-1)
diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py
index 1ea6faebf66..f589339f194 100644
--- a/test/distributed/test_symmetric_memory.py
+++ b/test/distributed/test_symmetric_memory.py
@@ -274,7 +274,7 @@ class SymmetricMemoryTest(MultiProcContinuousTest):
             self.assertTrue(buf.eq(peer_rank + world.size() // 2).all())
 
 
-# We move AsyncTP tests to a seperate test suite because 1) Async TP ops are not
+# We move AsyncTP tests to a separate test suite because 1) Async TP ops are not
 # the core symmetric memory APIs, they are more like applications, 2)
 # MultiProcContinuousTest will skip all the following tests if a test fails (
 # we should fix this too). We still want to get the test signals for the core
@@ -621,7 +621,7 @@ class AsyncTPTest(MultiProcContinuousTest):
 
 # [READ ME FIRST]
 # The `SymmMemEmptySetDeviceTest` suite parameterizes whether user sets the
-# device before calling symm_mem.emtpy.  Either way should work.
+# device before calling symm_mem.empty.  Either way should work.
 # However, since `set_device` is persistent, we cannot use the
 # `MultiProcContinuousTest` template because the next function will be
 # "contaminated", leading to flaky tests (e.g. hang). Therefore, we use
diff --git a/test/dynamo/cpython/3_13/mathdata/ieee754.txt b/test/dynamo/cpython/3_13/mathdata/ieee754.txt
index a8b8a0a2148..3e986cdb102 100644
--- a/test/dynamo/cpython/3_13/mathdata/ieee754.txt
+++ b/test/dynamo/cpython/3_13/mathdata/ieee754.txt
@@ -51,7 +51,7 @@ nan
 >>> INF / INF
 nan
 
-However unambigous operations with inf return inf:
+However unambiguous operations with inf return inf:
 >>> INF * INF
 inf
 >>> 1.5 * INF
diff --git a/test/dynamo/cpython/3_13/test_itertools.py b/test/dynamo/cpython/3_13/test_itertools.py
index fe32a3491d1..b33fa6b878d 100644
--- a/test/dynamo/cpython/3_13/test_itertools.py
+++ b/test/dynamo/cpython/3_13/test_itertools.py
@@ -1711,7 +1711,7 @@ class TestBasicOps(__TestCase):
         t3 = tnew(t1)
         self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc'))
 
-        # test that tee objects are weak referencable
+        # test that tee objects are weak referenceable
         a, b = tee(range(10))
         p = weakref.proxy(a)
         self.assertEqual(getattr(p, '__class__'), type(b))
@@ -2243,7 +2243,7 @@ class TestPurePythonRoughEquivalents(__TestCase):
         t3 = tnew(t1)
         self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc'))
 
-        # test that tee objects are weak referencable
+        # test that tee objects are weak referenceable
         a, b = tee(range(10))
         p = weakref.proxy(a)
         self.assertEqual(getattr(p, '__class__'), type(b))
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
index 5d5d2281b89..c6138f7574f 100644
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@@ -5760,7 +5760,7 @@ def forward(self, s77 : torch.SymInt, s27 : torch.SymInt, L_x_ : torch.Tensor):
         self.assertEqual(func(x, 0), opt_func(x, 0))
 
     def test_grad(self):
-        # Write to `grad` or `_grad` should reflecte in reading from the other,
+        # Write to `grad` or `_grad` should reflective in reading from the other,
         # and should be codegen-ed.
         def fn(x, y):
             x._grad = y + 1
diff --git a/test/export/test_export.py b/test/export/test_export.py
index 6a9aff1477a..762ad512ae3 100755
--- a/test/export/test_export.py
+++ b/test/export/test_export.py
@@ -3955,7 +3955,7 @@ def forward(self, causal_mask, fill_value):
     def test_export_custom_op_lib(self):
         ops_registered_before = set(torch.ops.mylib)
 
-        # Assert warning for CompositeImplictAutograd op
+        # Assert warning for CompositeImplicitAutograd op
         with torch.library._scoped_library("mylib", "FRAGMENT") as lib:
             lib.define("foo123(Tensor x) -> Tensor")
             lib.impl("foo123", lambda x: x.sin(), "CompositeImplicitAutograd")
diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py
index 0e1eb0140bb..2f68cdf4794 100644
--- a/test/export/test_serialize.py
+++ b/test/export/test_serialize.py
@@ -2000,7 +2000,7 @@ class TestSaveLoad(TestCase):
 
     def test_save_load_with_multiple_empty_tensors(self) -> None:
         # Test scenario where models have multiple empty tensors
-        # but with differnt data types.
+        # but with different data types.
         class M(torch.nn.Module):
             def __init__(self):
                 super().__init__()
diff --git a/test/functorch/attn_ft.py b/test/functorch/attn_ft.py
index c5130e5f8a2..d2aabe51ec9 100644
--- a/test/functorch/attn_ft.py
+++ b/test/functorch/attn_ft.py
@@ -115,7 +115,7 @@ class BertSelfAttention(nn.Module):
 
             # we can then use that as an indirect index into the embedding table values to look up the features for that index
             # this is just a `gather` primitive op. The resulting tensor will
-            # have all the dimensions of embeddeding_idx (query_sequence x key_sequence),
+            # have all the dimensions of embedding_idx (query_sequence x key_sequence),
             # plus all the dimensions of `embed` that were not indirectly accessed (`embedding_range`).
             # this form of indirect indexing is more straightforward than either advanced indexing or torch.gather which both
             # have a lot of dependencies on the positions of indexing tensors.
diff --git a/test/functorch/discover_coverage.py b/test/functorch/discover_coverage.py
index 6d9d9e7e8a7..2ffdfec1e86 100644
--- a/test/functorch/discover_coverage.py
+++ b/test/functorch/discover_coverage.py
@@ -36,7 +36,7 @@ def get_public_overridable_apis(pytorch_root="/raid/rzou/pt/debug-cpu"):
     for module, module_name, src in public_docs:
         with open(f"{pytorch_root}/{src}") as f:
             lines = f.readlines()
-        # APIs eitehr begin with 4 spaces or ".. autofunction::"
+        # APIs either begin with 4 spaces or ".. autofunction::"
         api_lines1 = [line.strip() for line in lines if line.startswith(" " * 4)]
         api_lines2 = [
             line.strip()[len(".. autofunction:: ") :]
diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py
index 840ead0a281..29b69322d2f 100644
--- a/test/functorch/test_aotdispatch.py
+++ b/test/functorch/test_aotdispatch.py
@@ -6399,7 +6399,7 @@ def forward(self, primals_1, primals_2, primals_3):
 
         # Important pieces of the graph:
         # - 4 total dense outputs.
-        #   This corresponds to the fact that each user fwd inpt (a, b)
+        #   This corresponds to the fact that each user fwd input (a, b)
         #   will get a gradient that is a TwoTensor subclass,
         #   so (mul_2, mul_3) will be wrapped into a.grad
         #   and (div_1, div_2) will be wrapped into b.grad
@@ -8395,7 +8395,7 @@ aot_autograd_module_failures = set(
         # implementation not traceable or that there is a bug in AOTAutograd.
         torch.nn.TransformerEncoder,  # DataDependentOutputException: aten.eq compares a mask input
         # to a causal mask tensor, to see if Boolean is_causal should be set
-        # for TrnasformerEncoder layers, MHA and sdp custom kernels
+        # for TransformerEncoder layers, MHA and sdp custom kernels
         torch.nn.Transformer,  # DataDependentOutputException: aten.equal compares a mask input
         # to a causal mask tensor, to see if Boolean is_causal should be set
         # for TransformerEncoder layers, MHA and sdp custom kernels
diff --git a/test/functorch/test_control_flow.py b/test/functorch/test_control_flow.py
index 8009654b90d..5bfd1f200dd 100644
--- a/test/functorch/test_control_flow.py
+++ b/test/functorch/test_control_flow.py
@@ -1236,7 +1236,7 @@ def forward(self, pred_1, x_1):
         from torch.fx.passes.shape_prop import _extract_tensor_metadata, TensorMetadata
 
         # This is a helper function that extracts the metadata from the tensor and
-        # sets the requries_grad flag to false. This is needed as we compare the
+        # sets the requires_grad flag to false. This is needed as we compare the
         # metadata of the operands and the gradients
         def _extract_tensor_metadata_except_requires_grad(arg):
             metadata = _extract_tensor_metadata(arg)
diff --git a/test/fx/test_fx_param_shape_control_flow.py b/test/fx/test_fx_param_shape_control_flow.py
index 8972540076f..f3485f94a95 100644
--- a/test/fx/test_fx_param_shape_control_flow.py
+++ b/test/fx/test_fx_param_shape_control_flow.py
@@ -118,7 +118,7 @@ class TestConstParamShapeInControlFlow(TestCase):
         graph1_node_targets = [n.target for n in traced_graph.nodes]
         graph2_node_targets = [n.target for n in traced_graph2.nodes]
 
-        # the second graph has an exta relu function call node
+        # the second graph has an extra relu function call node
         assert torch.mm in graph1_node_targets and torch.mm in graph2_node_targets
         assert (
             torch.relu not in graph1_node_targets and torch.relu in graph2_node_targets
diff --git a/test/fx/test_fx_xform_observer.py b/test/fx/test_fx_xform_observer.py
index d9dcb8504ba..8db18f0c55e 100644
--- a/test/fx/test_fx_xform_observer.py
+++ b/test/fx/test_fx_xform_observer.py
@@ -181,7 +181,7 @@ class TestGraphTransformObserver(TestCase):
 
     @torch._inductor.config.patch("trace.provenance_tracking_level", 1)
     def test_graph_transform_observer_replace(self):
-        # the node sohuld should not be duplicated
+        # the node should should not be duplicated
         class Model(torch.nn.Module):
             def forward(self, x):
                 y = x + 1
diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py
index abcaf6649d9..a1e5aa3cebc 100644
--- a/test/inductor/test_flex_attention.py
+++ b/test/inductor/test_flex_attention.py
@@ -1865,7 +1865,7 @@ class TestFlexAttention(InductorTestCase):
             requires_grad=True,
         )
         query, key, value = make_tensor(), make_tensor(), make_tensor()
-        # floor_div is not decomposed in decompostion_table is empty
+        # floor_div is not decomposed in decomposition_table is empty
         attention = functools.partial(flex_attention, score_mod=score_mod_func)
         gm = make_fx(attention, decomposition_table={})(query, key, value)
         self.assertExpectedInline(
diff --git a/test/inductor/test_flex_decoding.py b/test/inductor/test_flex_decoding.py
index a794f5e6e52..995262b0f21 100644
--- a/test/inductor/test_flex_decoding.py
+++ b/test/inductor/test_flex_decoding.py
@@ -1188,7 +1188,7 @@ class TestFlexDecoding(InductorTestCase):
             requires_grad=True,
         )
         query, key, value = make_q(), make_kv(), make_kv()
-        # floor_div is not decomposed in decompostion_table is empty
+        # floor_div is not decomposed in decomposition_table is empty
         attention = functools.partial(flex_attention, score_mod=score_mod_func)
         gm = make_fx(attention, decomposition_table={})(query, key, value)
         self.assertExpectedInline(
diff --git a/test/inductor/test_fxir_backend.py b/test/inductor/test_fxir_backend.py
index c17d0bf19b5..72eb37c1e1b 100644
--- a/test/inductor/test_fxir_backend.py
+++ b/test/inductor/test_fxir_backend.py
@@ -1128,7 +1128,7 @@ class TestReplaceFloorDiv(InductorTestCase):
         replaced = replace_floor_div(expr)
 
         # Check that all floor's were replaced.
-        # We shoud have no more new FloorDiv's than floor's in the original expression,
+        # We should have no more new FloorDiv's than floor's in the original expression,
         # although we can have less due to simplification.
         self.assertEqual(replaced.count(sympy.floor), 0)
         self.assertLessEqual(
diff --git a/test/inductor/test_loop_ordering.py b/test/inductor/test_loop_ordering.py
index efe0fbfc283..c77b3574b22 100644
--- a/test/inductor/test_loop_ordering.py
+++ b/test/inductor/test_loop_ordering.py
@@ -231,7 +231,7 @@ class LoopOrderingTest(TestCase):
                     return x.to(torch.float32)
                 return x
 
-            # Wordaround the issue that call allclose on fp8 tensor triggers error
+            # Workaround the issue that call allclose on fp8 tensor triggers error
             #   RuntimeError: "mul_cuda" not implemented for 'Float8_e4m3fn'
             expect = tree_map(_cast, expect)
             actual = tree_map(_cast, actual)
@@ -547,7 +547,7 @@ class LoopOrderingTest(TestCase):
 
         # A small amount of extra memory access for:
         # - store output for the first reduction
-        # - load input for the second redution
+        # - load input for the second reduction
         # - store output for the second reduction
         expected_numbytes += (M * 2 + 1) * x.itemsize
 
diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py
index aa140e4c0cb..420a9ee8292 100644
--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@@ -384,7 +384,7 @@ class TestMaxAutotune(TestCase):
         a[:] = torch.randn((M, K), dtype=torch.float16)
         b = torch.empty_strided((K, N), (1, K), dtype=torch.float16, device=GPU_TYPE)
         b[:] = torch.randn((K, N), dtype=torch.float16)
-        # allocate an output with a stride not divisble by 16, so it can't satisfy TMA alignment checks.
+        # allocate an output with a stride not divisible by 16, so it can't satisfy TMA alignment checks.
         out = torch.empty_strided((M, N), (N, 1), dtype=torch.float16, device=GPU_TYPE)
 
         with (
diff --git a/test/inductor/test_utils.py b/test/inductor/test_utils.py
index 7d23457732a..c3dcd966df3 100644
--- a/test/inductor/test_utils.py
+++ b/test/inductor/test_utils.py
@@ -74,7 +74,7 @@ class TestUtils(TestCase):
         self.assertEqual(expr.is_integer, None)
         self.assertEqual(expr.is_nonnegative, None)
         # replace abs(x) with y
-        # propagte abs(x) sympy properties.
+        # propagate abs(x) sympy properties.
         result = sympy_subs(expr, {expr: Symbol("y")})
         self.assertEqual(result.name, "y")
         self.assertEqual(result.is_integer, None)
diff --git a/test/jit/test_backend_nnapi.py b/test/jit/test_backend_nnapi.py
index 042c82eca80..3e79b257131 100644
--- a/test/jit/test_backend_nnapi.py
+++ b/test/jit/test_backend_nnapi.py
@@ -17,7 +17,7 @@ from torch.testing._internal.common_utils import (
 # hacky way to skip these tests in fbcode:
 # during test execution in fbcode, test_nnapi is available during test discovery,
 # but not during test execution. So we can't try-catch here, otherwise it'll think
-# it sees tests but then fails when it tries to actuall run them.
+# it sees tests but then fails when it tries to actually run them.
 if not IS_FBCODE:
     from test_nnapi import TestNNAPI
 
diff --git a/test/jit/test_cuda.py b/test/jit/test_cuda.py
index c781c1e4c57..8cfe63faa0e 100644
--- a/test/jit/test_cuda.py
+++ b/test/jit/test_cuda.py
@@ -292,7 +292,7 @@ class TestCUDA(JitTestCase):
             default_stream_id: int
             user_stream_id: int
 
-        # The test aims at checking different stream proporties.
+        # The test aims at checking different stream properties.
         @torch.jit.script
         def test_get_stream():
             device_index = torch.cuda.current_device()
@@ -499,7 +499,7 @@ class TestCUDA(JitTestCase):
 
         # Record the CUDA event for operation torch.mm on the current stream
         # and then test if the elapsed time is greater than 0. This test is also
-        # an adaption from eager mdoe CUDA tests available at test/test_cuda.py
+        # an adaption from eager mode CUDA tests available at test/test_cuda.py
         @torch.jit.script
         def test_event():
             device_index = torch.cuda.current_device()
diff --git a/test/jit/test_freezing.py b/test/jit/test_freezing.py
index ca1172a2ce7..91ecf6f3629 100644
--- a/test/jit/test_freezing.py
+++ b/test/jit/test_freezing.py
@@ -563,7 +563,7 @@ class TestFreezing(JitTestCase):
         self.assertTrue(mf.hasattr("sub1"))
         self.assertTrue(mf.sub1.hasattr("a"))
         self.assertFalse(mf.sub1.hasattr("b"))
-        # sub2 is fully folded becasue self.sub1 and self.sub2.sub are not alias (Scripting bug)
+        # sub2 is fully folded because self.sub1 and self.sub2.sub are not alias (Scripting bug)
         self.assertFalse(mf.hasattr("sub2"))
         input = torch.randn(2, 2)
         output = m.forward(input)
diff --git a/test/jit/test_peephole.py b/test/jit/test_peephole.py
index 914d423a519..12b9c3f1834 100644
--- a/test/jit/test_peephole.py
+++ b/test/jit/test_peephole.py
@@ -152,7 +152,7 @@ class TestPeephole(JitTestCase):
         self.run_pass("peephole", test.graph)
         FileCheck().check_not("prim::unchecked_cast").run(test.graph)
 
-        # refinement not optimzied out
+        # refinement not optimized out
         def is_int_tensor(x):
             scalar = x.item()
             if isinstance(scalar, int):
diff --git a/test/jit/test_upgraders.py b/test/jit/test_upgraders.py
index 22d05052b4f..c2228b2de85 100644
--- a/test/jit/test_upgraders.py
+++ b/test/jit/test_upgraders.py
@@ -151,7 +151,7 @@ class TestUpgraders(JitTestCase):
         version = self._load_model_version(loaded_func)
         self.assertTrue(version == 5)
 
-        # make sure we preserve old behaviou
+        # make sure we preserve old behaviour
         torch._C._calculate_package_version_based_on_upgraders(current_flag_value)
 
     def test_aten_linspace(self):
diff --git a/test/lazy/test_extract_compiled_graph.py b/test/lazy/test_extract_compiled_graph.py
index 1ea0219066d..844b9fef1af 100644
--- a/test/lazy/test_extract_compiled_graph.py
+++ b/test/lazy/test_extract_compiled_graph.py
@@ -195,7 +195,7 @@ def maketest(module_cls, exception_msg_pattern=None, ctxmgr=None):
 class OptimizeTest(unittest.TestCase):
     test_sub = maketest(ModuleSub)
     # Same as test_sub but force aten::sub to fallback
-    # We expect an exception caught because of LTC fallabck.
+    # We expect an exception caught because of LTC fallback.
     test_ltc_fallback = maketest(
         ModuleSub,
         exception_msg_pattern="fallback.*aten::sub",
diff --git a/test/lazy/test_ts_opinfo.py b/test/lazy/test_ts_opinfo.py
index 7c467dc6241..3e065395153 100644
--- a/test/lazy/test_ts_opinfo.py
+++ b/test/lazy/test_ts_opinfo.py
@@ -164,7 +164,7 @@ class TestLazyTensor(JitTestCase):
             if mark_step:
                 torch._lazy.mark_step()
 
-            # y and x should contiue to be aliased after the mark_step call.
+            # y and x should continue to be aliased after the mark_step call.
             y.add_(1)
             return x
 
diff --git a/test/mobile/model_test/README.md b/test/mobile/model_test/README.md
index 87c9f9bc910..f176a746c26 100644
--- a/test/mobile/model_test/README.md
+++ b/test/mobile/model_test/README.md
@@ -81,7 +81,7 @@ python test/mobile/model_test/gen_test_model.py ios
 The test coverage is based on the number of root ops tested in these test models. The full list of generated ops can be found in:
 https://github.com/pytorch/pytorch/blob/master/test/mobile/model_test/coverage.yaml
 
-In additional, the simulator tests will also report the percentage of Meta's production ops that are covered. The list of production ops changes overtime, so a Meta employee needs to regularly udpate the list it using
+In additional, the simulator tests will also report the percentage of Meta's production ops that are covered. The list of production ops changes overtime, so a Meta employee needs to regularly update the list it using
 ```
 python test/mobile/model_test/update_production_ops.py ~/fbsource/xplat/pytorch_models/build/all_mobile_model_configs.yaml
 ```
diff --git a/test/mobile/model_test/update_production_ops.py b/test/mobile/model_test/update_production_ops.py
index b4549a585e1..dbec56e6426 100644
--- a/test/mobile/model_test/update_production_ops.py
+++ b/test/mobile/model_test/update_production_ops.py
@@ -16,10 +16,10 @@ with open(sys.argv[1]) as input_yaml_file:
     model_infos = yaml.safe_load(input_yaml_file)
     for info in model_infos:
         for op in info["root_operators"]:
-            # aggregate occurance per op
+            # aggregate occurrence per op
             root_operators[op] = 1 + (root_operators.get(op, 0))
         for op in info["traced_operators"]:
-            # aggregate occurance per op
+            # aggregate occurrence per op
             traced_operators[op] = 1 + (traced_operators.get(op, 0))
         # merge dtypes for each kernel
         for kernal, dtypes in info["kernel_metadata"].items():
diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py
index 3c3b3f53e52..8c1e1ee7a69 100644
--- a/test/nn/test_convolution.py
+++ b/test/nn/test_convolution.py
@@ -1009,7 +1009,7 @@ class TestConvolutionNN(NNTestCase):
     @unittest.skipIf(not TEST_CUDNN, "needs cudnn")
     def test_conv_cudnn_memory_layout_dominance(self):
         # desired behavior here is to have the memory_layout of conv.weight to
-        # dominante the layout of output.
+        # dominant the layout of output.
         # which is not the same as current behavior, we'll fix this in
         # following up PRs and remove the `expectedFailure` tag
         input = torch.randint(
@@ -3599,7 +3599,7 @@ class TestConvolutionNNDeviceType(NNTestCase):
                     input_format=input_format,
                     weight_format=weight_format,
                 )
-                # test when input chanels is 1 and not converted to channels last
+                # test when input channel is 1 and not converted to channels last
                 helper(
                     nn.Conv2d,
                     2,
diff --git a/test/nn/test_parametrization.py b/test/nn/test_parametrization.py
index da83ed26caa..aee8d4df50e 100644
--- a/test/nn/test_parametrization.py
+++ b/test/nn/test_parametrization.py
@@ -1395,7 +1395,7 @@ class TestNNParametrization(NNTestCase):
                     eval_out0 = wrapped_m(input)
                     # assert eval gives same result as last training iteration
                     self.assertEqual(eval_out0, last_train_out)
-                    # assert doing more iteartion in eval don't change things
+                    # assert doing more iteration in eval don't change things
                     self.assertEqual(eval_out0, wrapped_m(input))
                     self.assertEqual(last_train_u, spectral_norm_m._u)
                     self.assertEqual(last_train_v, spectral_norm_m._v)
@@ -1440,7 +1440,7 @@ class TestNNParametrization(NNTestCase):
 
         class SplitAndCat(nn.Module):
             def right_inverse(self, x):
-                # split the tensor in two halfs
+                # split the tensor in two halves
                 return torch.split(x, x.shape[1] // 2)
 
             def forward(self, x0, x1):
diff --git a/test/nn/test_pruning.py b/test/nn/test_pruning.py
index a2ca609af6e..51078cbcf64 100644
--- a/test/nn/test_pruning.py
+++ b/test/nn/test_pruning.py
@@ -894,14 +894,14 @@ class TestPruningNN(NNTestCase):
         prune.l1_unstructured(l, "weight_ih_l0", 0.5)
         assert sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights) == 3
 
-        # Removing the pruning reparametrization restores the Parameter
+        # Removing the pruning reparameterization restores the Parameter
         prune.remove(l, "weight_ih_l0")
         assert sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights) == 4
 
-        # Make sure that, upon removal of the reparametrization, the
+        # Make sure that, upon removal of the reparameterization, the
         # `._parameters` and `.named_parameters` contain the right params.
         # Specifically, the original weight ('weight_ih_l0') should be placed
-        # back in the parameters, while the reparametrization component
+        # back in the parameters, while the reparameterization component
         # ('weight_ih_l0_orig') should be removed.
         assert "weight_ih_l0" in l._parameters
         assert l._parameters["weight_ih_l0"] is not None
diff --git a/test/onnx/autograd_helper.py b/test/onnx/autograd_helper.py
index 4a3a3eca384..23d42f7b63c 100644
--- a/test/onnx/autograd_helper.py
+++ b/test/onnx/autograd_helper.py
@@ -3,7 +3,7 @@
 import torch
 
 
-# Autograd funtion that is a replica of the autograd funtion in
+# Autograd function that is a replica of the autograd function in
 # test_utility_funs.py (test_autograd_module_name)
 class CustomFunction(torch.autograd.Function):
     @staticmethod
diff --git a/test/onnx/exporter/test_api.py b/test/onnx/exporter/test_api.py
index a81b7106084..2fefd592ecf 100644
--- a/test/onnx/exporter/test_api.py
+++ b/test/onnx/exporter/test_api.py
@@ -574,7 +574,7 @@ class TestCustomTranslationTable(common_utils.TestCase):
 
     def test_01_specialization_with_run_decomp_is_supported(self):
         # Phi3RMSNorm changes and redo shape inference after `run_decompositions` call
-        # We ned this test to make sure everything we do on fx graph is covered by
+        # We need this test to make sure everything we do on fx graph is covered by
         # backed_size_oblivious
         class Phi3RMSNorm(torch.nn.Module):
             def __init__(self, hidden_size, eps=1e-6):
diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py
index 5c11682deed..a474d71d49b 100644
--- a/test/onnx/test_pytorch_onnx_onnxruntime.py
+++ b/test/onnx/test_pytorch_onnx_onnxruntime.py
@@ -1935,7 +1935,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
         class DivModule(torch.nn.Module):
             def forward(self, x, y):
                 # Add transpose to hide shape/type information
-                # Otherwise shape and type are still avaiable from input.
+                # Otherwise shape and type are still available from input.
                 x = x.transpose(1, 2)
                 y = y.transpose(1, 2)
                 return x / y, torch.true_divide(x, y)
@@ -3878,7 +3878,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
     def test_topk_smallest_unsorted(self):
         class MyModule(torch.nn.Module):
             def forward(self, x, k):
-                # When sorted=False, order of elements in the outout tensors
+                # When sorted=False, order of elements in the output tensors
                 # are not expected to match between PyTorch and ORT
                 topk_unsorted = torch.topk(x, k, largest=False, sorted=False)
                 topk_sorted = torch.topk(x, k, largest=False, sorted=True)
@@ -4361,7 +4361,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
                 super().__init__()
                 self.weight = torch.nn.Buffer(torch.ones(5))
                 # torch.nn.Embedding is converted to ONNX::Gather.
-                # Constant folding will be triggerred for constant inputs.
+                # Constant folding will be triggered for constant inputs.
                 # This pattern is common for constant mask inputs in transformer models.
                 self.embed = torch.nn.Embedding(8, 3)
 
@@ -5389,7 +5389,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
         input = torch.randn(7, 3, 5)
         self._argmin_argmax_model(input)
 
-    # Argmin and Argmax with "select_last_index" is not supprted before opset 12
+    # Argmin and Argmax with "select_last_index" is not supported before opset 12
     # "select_last_index" was added in opset 12 to deal with corner case where the
     # same value appears multiple times in the tensor
     @skipIfUnsupportedMinOpsetVersion(12)
@@ -10511,7 +10511,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime):
                 amax = torch.ones(4)
                 scale = amax / 127.0
                 zero_point = torch.zeros_like(amax, dtype=torch.int)
-                # Quantize twice to test differnet branches
+                # Quantize twice to test different branches
                 y = torch.fake_quantize_per_channel_affine(
                     input, scale, zero_point, 1, 0, 255
                 )
diff --git a/test/onnx/torchlib/ops_test_data.py b/test/onnx/torchlib/ops_test_data.py
index 6adb43044e5..6dd3a39a8d6 100644
--- a/test/onnx/torchlib/ops_test_data.py
+++ b/test/onnx/torchlib/ops_test_data.py
@@ -275,7 +275,7 @@ def _empty_input_wrangler(
 def _grid_sample_input_wrangler(
     args: list[Any], kwargs: dict[str, Any]
 ) -> tuple[list[Any], dict[str, Any]]:
-    # Convert string attriute to int as input
+    # Convert string attribute to int as input
     inter_mode_options = {"bilinear": 0, "nearest": 1, "bicubic": 2}
     padding_mode_options = {"zeros": 0, "border": 1, "reflection": 2}
     args.append(inter_mode_options[kwargs["mode"]])
diff --git a/test/package/generate_bc_packages.py b/test/package/generate_bc_packages.py
index 6fa5e81ef5a..52acca1d946 100644
--- a/test/package/generate_bc_packages.py
+++ b/test/package/generate_bc_packages.py
@@ -11,7 +11,7 @@ torch.package.package_exporter._gate_torchscript_serialization = False
 
 
 def generate_bc_packages():
-    """Function to create packages for testing backwards compatiblity"""
+    """Function to create packages for testing backwards compatibility"""
     if not IS_FBCODE or IS_SANDCASTLE:
         from package_a.test_nn_module import TestNnModule
 
diff --git a/test/package/test_load_bc_packages.py b/test/package/test_load_bc_packages.py
index 2536f81aaaa..4280736d6e3 100644
--- a/test/package/test_load_bc_packages.py
+++ b/test/package/test_load_bc_packages.py
@@ -17,7 +17,7 @@ packaging_directory = f"{Path(__file__).parent}/package_bc"
 
 
 class TestLoadBCPackages(PackageTestCase):
-    """Tests for checking loading has backwards compatiblity"""
+    """Tests for checking loading has backwards compatibility"""
 
     @skipIf(
         IS_FBCODE or IS_SANDCASTLE,
diff --git a/test/package/test_misc.py b/test/package/test_misc.py
index 850dec67681..897d250bc67 100644
--- a/test/package/test_misc.py
+++ b/test/package/test_misc.py
@@ -196,7 +196,7 @@ class TestMisc(PackageTestCase):
         "Tests that use temporary files are disabled in fbcode",
     )
     def test_load_python_version_from_package(self):
-        """Tests loading a package with a python version embdded"""
+        """Tests loading a package with a python version embedded"""
         importer1 = PackageImporter(
             f"{Path(__file__).parent}/package_e/test_nn_module.pt"
         )
diff --git a/test/package/test_model.py b/test/package/test_model.py
index 09b10a1ea2f..ea0d2c0788b 100644
--- a/test/package/test_model.py
+++ b/test/package/test_model.py
@@ -97,7 +97,7 @@ class ModelTest(PackageTestCase):
         # how they want to save it but the 'server' can always
         # use the same API to load the package.
 
-        # The convension is for each model to provide a
+        # The convention is for each model to provide a
         # 'model' package with a 'load' function that actual
         # reads the model out of the archive.
 
@@ -123,7 +123,7 @@ class ModelTest(PackageTestCase):
                 import torch_package_importer as resources
 
                 # server knows to call model.load() to get the model,
-                # maybe in the future it passes options as arguments by convension
+                # maybe in the future it passes options as arguments by convention
                 def load():
                     return resources.load_pickle('model', 'pickled')
                 """
diff --git a/test/profiler/test_execution_trace.py b/test/profiler/test_execution_trace.py
index 2cd51136ab4..3a174b1d66a 100644
--- a/test/profiler/test_execution_trace.py
+++ b/test/profiler/test_execution_trace.py
@@ -43,7 +43,7 @@ from torch.utils._triton import has_triton
 # This causes an issue in the multithreading test because we check all events
 # in that test with their tids. The events that correspond to these lingering
 # threads all have TID of (uint64_t)(-1) which is invalid.
-# The work around is turnning off monitoring thread when tqdm is loaded.
+# The work around is turning off monitoring thread when tqdm is loaded.
 # Since these are unit tests, it is safe to turn off monitor thread.
 try:
     import tqdm
diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py
index e30e0812b1d..b30d25ec9af 100644
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@@ -82,7 +82,7 @@ if TYPE_CHECKING:
 # This causes an issue in the multithreading test because we check all events
 # in that test with their tids. The events that correspond to these lingering
 # threads all have TID of (uint64_t)(-1) which is invalid.
-# The work around is turnning off monitoring thread when tqdm is loaded.
+# The work around is turning off monitoring thread when tqdm is loaded.
 # Since these are unit tests, it is safe to turn off monitor thread.
 try:
     import tqdm
diff --git a/test/profiler/test_record_function.py b/test/profiler/test_record_function.py
index 03a15f29907..26a6c0edf80 100644
--- a/test/profiler/test_record_function.py
+++ b/test/profiler/test_record_function.py
@@ -21,7 +21,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase
 # This causes an issue in the multithreading test because we check all events
 # in that test with their tids. The events that correspond to these lingering
 # threads all have TID of (uint64_t)(-1) which is invalid.
-# The work around is turnning off monitoring thread when tqdm is loaded.
+# The work around is turning off monitoring thread when tqdm is loaded.
 # Since these are unit tests, it is safe to turn off monitor thread.
 try:
     import tqdm
diff --git a/test/profiler/test_torch_tidy.py b/test/profiler/test_torch_tidy.py
index efbd4b8189d..a0f41114e91 100644
--- a/test/profiler/test_torch_tidy.py
+++ b/test/profiler/test_torch_tidy.py
@@ -20,7 +20,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase
 # This causes an issue in the multithreading test because we check all events
 # in that test with their tids. The events that correspond to these lingering
 # threads all have TID of (uint64_t)(-1) which is invalid.
-# The work around is turnning off monitoring thread when tqdm is loaded.
+# The work around is turning off monitoring thread when tqdm is loaded.
 # Since these are unit tests, it is safe to turn off monitor thread.
 try:
     import tqdm
@@ -425,7 +425,7 @@ class TestTorchTidyProfiler(TestCase):
             self.assertEqual(state[0][0], "momentum_buffer")
             self.assertEqual(state[0][1].id, weight_momenumtum_id)
 
-        # Check that we handle first step (lazy initalization) and steady state.
+        # Check that we handle first step (lazy initialization) and steady state.
         check(cold_start=True)
         check(cold_start=False)
 
diff --git a/test/quantization/bc/test_backward_compatibility.py b/test/quantization/bc/test_backward_compatibility.py
index 911c26defe2..01c546a95a5 100644
--- a/test/quantization/bc/test_backward_compatibility.py
+++ b/test/quantization/bc/test_backward_compatibility.py
@@ -68,7 +68,7 @@ def get_filenames(self, subname):
 
 
 class TestSerialization(TestCase):
-    """Test backward compatiblity for serialization and numerics"""
+    """Test backward compatibility for serialization and numerics"""
 
     # Copy and modified from TestCase.assertExpected
     def _test_op(
diff --git a/test/quantization/core/experimental/test_linear.py b/test/quantization/core/experimental/test_linear.py
index 6a46b4fc3cc..df668248476 100644
--- a/test/quantization/core/experimental/test_linear.py
+++ b/test/quantization/core/experimental/test_linear.py
@@ -14,7 +14,7 @@ class TestNonUniformObserver(unittest.TestCase):
         # weight: fp tensor
         weight = 1000 * torch.rand(4, 4)
 
-        # activtion: fp32 tensor with ~ integer values
+        # activation: fp32 tensor with ~ integer values
         activation = torch.randint(low=0, high=255, size=(4, 4), dtype=torch.float)
 
         # calculate result from calling linear forward method
@@ -41,7 +41,7 @@ class TestNonUniformObserver(unittest.TestCase):
         # weight: fp tensor
         weight = 1000 * torch.rand(5, 3)
 
-        # activtion: fp32 tensor with ~ integer values
+        # activation: fp32 tensor with ~ integer values
         # note: transpose of activation matrix will have dimension (3, 5)
         activation = torch.randint(low=0, high=255, size=(5, 3), dtype=torch.float)
 
diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py
index d8a35264f7d..21330aa7fc0 100644
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@@ -225,7 +225,7 @@ class TestQuantizedOps(TestCase):
             `output_is_observed`: if specified and is True, we'll append extra
              output_scale/output_zero_point keyword argument when calling quantized op
         """
-        # Retrives the default parameters from X.
+        # Retrieves the default parameters from X.
         X, (scale, zero_point, torch_type) = X
         if not isinstance(X, torch.Tensor):
             X = torch.from_numpy(X)
@@ -3584,7 +3584,7 @@ class TestDynamicQuantizedOps(TestCase):
     def test_wrapped_fbgemm_pack_gemm_matrix_fp16_pt2_compliant(self):
         # We are not using opcheck over here because the output for the op we're testing
         # (_quantized.wrapped_fbgemm_pack_gemm_matrix_fp16) is not deterministic
-        # due to the C-struct it's procuding. This would fail the check when we're trying
+        # due to the C-struct it's producing. This would fail the check when we're trying
         # to match the result between compiled and eager version.
         #
         # This is only a temporary solution, long term, we should be able to support PT2
@@ -5572,7 +5572,7 @@ class TestQuantizedConv(TestCase):
         )
 
         act_qdtypes = [torch.quint8]
-        # Only qnnpack qengine supportes qint8
+        # Only qnnpack qengine supports qint8
         if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
             act_qdtypes.append(torch.qint8)
 
@@ -5653,7 +5653,7 @@ class TestQuantizedConv(TestCase):
         )
 
         act_qdtypes = [torch.quint8]
-        # Only qnnpack qengine supportes qint8
+        # Only qnnpack qengine supports qint8
         if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
             act_qdtypes.append(torch.qint8)
 
@@ -6084,7 +6084,7 @@ class TestQuantizedConv(TestCase):
             )
 
             act_qdtypes = [torch.quint8]
-            # Only qnnpack qengine supportes qint8
+            # Only qnnpack qengine supports qint8
             if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
                 act_qdtypes.append(torch.qint8)
 
@@ -6210,7 +6210,7 @@ class TestQuantizedConv(TestCase):
             bias=use_bias
         )
         act_qdtypes = [torch.quint8]
-        # Only qnnpack qengine supportes qint8
+        # Only qnnpack qengine supports qint8
         if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
             act_qdtypes.append(torch.qint8)
 
@@ -6515,7 +6515,7 @@ class TestQuantizedConv(TestCase):
         qconv = torch.ops.quantized.conv1d
 
         act_qdtypes = [torch.quint8]
-        # Only qnnpack qengine supportes qint8
+        # Only qnnpack qengine supports qint8
         if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
             act_qdtypes.append(torch.qint8)
 
@@ -6586,7 +6586,7 @@ class TestQuantizedConv(TestCase):
         qconv = torch.ops.quantized.conv1d_relu
 
         act_qdtypes = [torch.quint8]
-        # Only qnnpack qengine supportes qint8
+        # Only qnnpack qengine supports qint8
         if qengine_is_qnnpack() and torch.backends.xnnpack.enabled:
             act_qdtypes.append(torch.qint8)
 
diff --git a/test/quantization/core/test_quantized_tensor.py b/test/quantization/core/test_quantized_tensor.py
index 65633dbf37f..d0c00ad61ba 100644
--- a/test/quantization/core/test_quantized_tensor.py
+++ b/test/quantization/core/test_quantized_tensor.py
@@ -765,7 +765,7 @@ class TestQuantizedTensor(TestCase):
                 qr = torch.quantize_per_tensor(r, scale, zero_point, dtype=dtype)
                 qr = qr.transpose(0, 1)
                 rqr = qr.dequantize()
-                # compare transpose + dequantized result with orignal transposed result
+                # compare transpose + dequantized result with original transposed result
                 self.assertTrue(np.allclose(r.cpu().numpy().transpose([1, 0, 2, 3]), rqr.cpu().numpy(), atol=2 / scale))
 
                 qr = torch.quantize_per_tensor(r, scale, zero_point, dtype=dtype)
@@ -1210,7 +1210,7 @@ class TestQuantizedTensor(TestCase):
             if device == 'cpu':
                 self.assertFalse(torch.equal(b, c))
 
-            # a case can't view non-contiguos Tensor
+            # a case can't view non-contiguous Tensor
             a_int = torch.randint(0, 100, [1, 2, 3, 4], device=device, dtype=dtype)
             a = torch._make_per_tensor_quantized_tensor(a_int, scale=scale, zero_point=zero_point)
             b = a.transpose(1, 2)  # swaps 2nd and 3rd dimension
diff --git a/test/quantization/fx/test_model_report_fx.py b/test/quantization/fx/test_model_report_fx.py
index 51bce95e30a..58c88c48734 100644
--- a/test/quantization/fx/test_model_report_fx.py
+++ b/test/quantization/fx/test_model_report_fx.py
@@ -946,7 +946,7 @@ class TestFxModelReportClass(QuantizationTestCase):
 
             model_report = ModelReport(model_prep, test_detector_set)
 
-            # prepare the model for callibration
+            # prepare the model for calibration
             prepared_for_callibrate_model = model_report.prepare_detailed_calibration()
 
             # see whether observers properly in regular nn.Module
@@ -985,7 +985,7 @@ class TestFxModelReportClass(QuantizationTestCase):
                 elif isinstance(detector, DynamicStaticDetector):
                     self.assertEqual(len(detector_obs_of_interest_fqns), 4)
 
-            # ensure that we can prepare for callibration only once
+            # ensure that we can prepare for calibration only once
             with self.assertRaises(ValueError):
                 prepared_for_callibrate_model = model_report.prepare_detailed_calibration()
 
@@ -1037,7 +1037,7 @@ class TestFxModelReportClass(QuantizationTestCase):
             model_full = TwoThreeOps()
             model_single = TwoThreeOps()
 
-            # prepare and callibrate two different instances of same model
+            # prepare and calibrate two different instances of same model
             # prepare the model
             example_input = model_full.get_example_inputs()[0]
             current_backend = torch.backends.quantized.engine
@@ -1052,11 +1052,11 @@ class TestFxModelReportClass(QuantizationTestCase):
             # initialize another with a single detector set
             model_report_single = ModelReport(model_prep_single, single_detector_set)
 
-            # prepare the models for callibration
+            # prepare the models for calibration
             prepared_for_callibrate_model_full = model_report_full.prepare_detailed_calibration()
             prepared_for_callibrate_model_single = model_report_single.prepare_detailed_calibration()
 
-            # now callibrate the two models
+            # now calibrate the two models
             num_iterations = 10
             for i in range(num_iterations):
                 example_input = torch.tensor(torch.randint(100, (1, 3, 3, 3)), dtype=torch.float)
@@ -1109,12 +1109,12 @@ class TestFxModelReportClass(QuantizationTestCase):
 
             model = TwoThreeOps()
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
                 model, detector_set, model.get_example_inputs()[0]
             )
 
-            # now we actually callibrate the model
+            # now we actually calibrate the model
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1162,12 +1162,12 @@ class TestFxModelReportClass(QuantizationTestCase):
 
             model = TwoThreeOps()
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
                 model, detector_set, model.get_example_inputs()[0]
             )
 
-            # now we actually callibrate the models
+            # now we actually calibrate the models
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1192,7 +1192,7 @@ class TestFxModelReportClass(QuantizationTestCase):
             self.assertEqual(len(qconfig_mapping.module_name_qconfigs), 2)
 
             # only two linears, make sure per channel min max for weight since fbgemm
-            # also static distribution since a simple single callibration
+            # also static distribution since a simple single calibration
             for key in qconfig_mapping.module_name_qconfigs:
                 config = qconfig_mapping.module_name_qconfigs[key]
                 self.assertEqual(config.weight, default_per_channel_weight_observer)
@@ -1220,12 +1220,12 @@ class TestFxModelReportClass(QuantizationTestCase):
 
             model = TwoThreeOps()
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
                 model, detector_set, model.get_example_inputs()[0]
             )
 
-            # now we actually callibrate the models
+            # now we actually calibrate the models
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1319,7 +1319,7 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
 
             detector_set = {InputWeightEqualizationDetector(0.5)}
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             non_fused = self._get_prepped_for_calibration_model(self.TwoBlockComplexNet(), detector_set)
             fused = self._get_prepped_for_calibration_model(self.TwoBlockComplexNet(), detector_set, fused=True)
 
@@ -1365,12 +1365,12 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
             test_input_weight_detector = InputWeightEqualizationDetector(0.4)
             detector_set = {test_input_weight_detector}
             model = self.TwoBlockComplexNet()
-            # prepare the model for callibration
+            # prepare the model for calibration
             prepared_for_callibrate_model, model_report = self._get_prepped_for_calibration_model(
                 model, detector_set
             )
 
-            # now we actually callibrate the model
+            # now we actually calibrate the model
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1430,7 +1430,7 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
                     self.assertEqual(global_max, max(dimension_max))
 
                     input_ratio = torch.sqrt((per_channel_max - per_channel_min) / (global_max - global_min))
-                    # ensure comparision stat passed back is sqrt of range ratios
+                    # ensure comparison stat passed back is sqrt of range ratios
                     # need to get the weight ratios first
 
                     # make sure per channel min and max are as expected
@@ -1474,10 +1474,10 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase):
             test_input_weight_detector = InputWeightEqualizationDetector(0.4)
             detector_set = {test_input_weight_detector}
             model = self.ReluOnly()
-            # prepare the model for callibration
+            # prepare the model for calibration
             prepared_for_callibrate_model, model_report = self._get_prepped_for_calibration_model(model, detector_set)
 
-            # now we actually callibrate the model
+            # now we actually calibrate the model
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1531,7 +1531,7 @@ class TestFxDetectOutliers(QuantizationTestCase):
 
     def _get_prepped_for_calibration_model(self, model, detector_set, use_outlier_data=False):
         r"""Returns a model that has been prepared for callibration and corresponding model_report"""
-        # call the general helper function to callibrate
+        # call the general helper function to calibrate
         example_input = model.get_example_inputs()[0]
 
         # if we specifically want to test data with outliers replace input
@@ -1550,7 +1550,7 @@ class TestFxDetectOutliers(QuantizationTestCase):
 
             detector_set = {OutlierDetector(reference_percentile=0.95)}
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
                 self.LargeBatchModel(param_size=128), detector_set
             )
@@ -1594,12 +1594,12 @@ class TestFxDetectOutliers(QuantizationTestCase):
             detector_set = {outlier_detector, dynamic_static_detector}
             model = self.LargeBatchModel(param_size=param_size)
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
                 model, detector_set
             )
 
-            # now we actually callibrate the model
+            # now we actually calibrate the model
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1644,12 +1644,12 @@ class TestFxDetectOutliers(QuantizationTestCase):
             detector_set = {outlier_detector}
             model = self.LargeBatchModel(param_size=param_size)
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
                 model, detector_set
             )
 
-            # now we actually callibrate the model
+            # now we actually calibrate the model
             example_input = model.get_example_inputs()[0]
             example_input = example_input.to(torch.float)
 
@@ -1694,16 +1694,16 @@ class TestFxDetectOutliers(QuantizationTestCase):
             detector_set = {outlier_detector}
             model = self.LargeBatchModel(param_size=param_size)
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model(
                 model, detector_set, use_outlier_data=True
             )
 
-            # now we actually callibrate the model
+            # now we actually calibrate the model
             example_input = model.get_outlier_inputs()[0]
             example_input = example_input.to(torch.float)
 
-            # now callibrate minimum 30 times to make it above minimum threshold
+            # now calibrate minimum 30 times to make it above minimum threshold
             for i in range(30):
                 example_input = model.get_outlier_inputs()[0]
                 example_input = example_input.to(torch.float)
@@ -1764,7 +1764,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase):
         r"""
         Callibrates the passed in model, generates report, and returns the visualizer
         """
-        # now we actually callibrate the model
+        # now we actually calibrate the model
         example_input = model.get_example_inputs()[0]
         example_input = example_input.to(torch.float)
 
@@ -1796,7 +1796,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase):
 
             model = TwoThreeOps()
 
-            # get tst model and callibrate
+            # get tst model and calibrate
             prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
                 model, detector_set, model.get_example_inputs()[0]
             )
@@ -1843,7 +1843,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase):
 
         model = TwoThreeOps()
 
-        # get tst model and callibrate
+        # get tst model and calibrate
         prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper(
             model, detector_set, model.get_example_inputs()[0]
         )
@@ -1953,7 +1953,7 @@ def _get_prepped_for_calibration_model_helper(model, detector_set, example_input
 
     model_report = ModelReport(model_prep, detector_set)
 
-    # prepare the model for callibration
+    # prepare the model for calibration
     prepared_for_callibrate_model = model_report.prepare_detailed_calibration()
 
     return (prepared_for_callibrate_model, model_report)
diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py
index f6f1128e422..c54c741bcec 100644
--- a/test/quantization/fx/test_quantize_fx.py
+++ b/test/quantization/fx/test_quantize_fx.py
@@ -1221,7 +1221,7 @@ class TestQuantizeFx(QuantizationTestCase):
             def checkSerDeser(model, is_dynamic):
                 for module_name in ("linear", "conv"):
                     if hasattr(model, module_name):
-                        # make sure seralization works
+                        # make sure serialization works
                         state_dict = copy.deepcopy(model.state_dict())
                         all_keys = _get_keys(module_name, is_dynamic)
                         for key in all_keys:
@@ -1484,7 +1484,7 @@ class TestQuantizeFx(QuantizationTestCase):
             def checkSerDeser(model, is_dynamic):
                 module_name = "deconv"
                 if hasattr(model, module_name):
-                    # make sure seralization works
+                    # make sure serialization works
                     state_dict = copy.deepcopy(model.state_dict())
                     all_keys = _get_keys(module_name, is_dynamic)
                     for key in all_keys:
@@ -1569,7 +1569,7 @@ class TestQuantizeFx(QuantizationTestCase):
             def checkSerDeser(model, is_dynamic):
                 module_name = "deconv"
                 if hasattr(model, module_name):
-                    # make sure seralization works
+                    # make sure serialization works
                     state_dict = copy.deepcopy(model.state_dict())
                     all_keys = _get_keys(module_name, is_dynamic)
                     for key in all_keys:
diff --git a/test/quantization/jit/test_quantize_jit.py b/test/quantization/jit/test_quantize_jit.py
index c71f7182b70..ec7618fb551 100644
--- a/test/quantization/jit/test_quantize_jit.py
+++ b/test/quantization/jit/test_quantize_jit.py
@@ -2926,7 +2926,7 @@ class TestQuantizeJitOps(QuantizationTestCase):
                 m._c, "forward", {"": qconfig}, inplace=False
             )
         )
-        # Checking the model before fianlize contain unfused patterns
+        # Checking the model before finalize contain unfused patterns
         # that numerically matches the model after quantize by checking
         # number of aten::quantize_per_tensor functions
         # conv has 3 quantize_per_tensor for activations and 1 for weight
diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py
index f6d3eae2332..25db7d97d9d 100644
--- a/test/quantization/pt2e/test_quantize_pt2e.py
+++ b/test/quantization/pt2e/test_quantize_pt2e.py
@@ -1682,7 +1682,7 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
         qconfig_mapping.set_object_type(torch.nn.Linear, dynamic_qconfig)
         # Had to turn off check against fx because fx quant workflow does not seem
         # to propagate observers for permute node for this model.
-        # Suprisingly it does propagate it for EmbeddingConvLinearModule
+        # Surprisingly it does propagate it for EmbeddingConvLinearModule
         # TODO: Figure out the right behavior for propagation
         self._test_quantizer(
             m_eager,
@@ -2253,7 +2253,7 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
             model = prepare_qat_pt2e(model, composed_quantizer)
             cur = time.time()
             # print("prepare time:", cur - prev)
-            # Without Calibraiton, scale/zero value will have an initialized value of 1.0
+            # Without Calibration, scale/zero value will have an initialized value of 1.0
             # Per channel quantization needs a proper scale/zero shape/value to work properly.
             # So we need to run calibration before converting to quantized model.
             model(*example_inputs)
diff --git a/test/quantization/pt2e/test_x86inductor_quantizer.py b/test/quantization/pt2e/test_x86inductor_quantizer.py
index 9e2e690c21d..dfd591cb941 100644
--- a/test/quantization/pt2e/test_x86inductor_quantizer.py
+++ b/test/quantization/pt2e/test_x86inductor_quantizer.py
@@ -2464,11 +2464,11 @@ class TestQuantizePT2EX86Inductor(X86InductorQuantTestCase):
             torch.ops.quantized_decomposed.dequantize_per_channel.default: 2,
         }
         node_list = [
-            # Q/DQ for first lienar
+            # Q/DQ for first linear
             torch.ops.quantized_decomposed.quantize_per_tensor.default,
             torch.ops.quantized_decomposed.dequantize_per_tensor.default,
             torch.ops.aten.linear.default,
-            # Q/DQ for second lienar
+            # Q/DQ for second linear
             torch.ops.quantized_decomposed.quantize_per_tensor.default,
             torch.ops.quantized_decomposed.dequantize_per_tensor.default,
             torch.ops.aten.linear.default,
diff --git a/test/quantization/pt2e/test_xnnpack_quantizer.py b/test/quantization/pt2e/test_xnnpack_quantizer.py
index 3baec3f8004..6b9acaaf741 100644
--- a/test/quantization/pt2e/test_xnnpack_quantizer.py
+++ b/test/quantization/pt2e/test_xnnpack_quantizer.py
@@ -1062,7 +1062,7 @@ class TestXNNPACKQuantizerModels(PT2EQuantizationTestCase):
             # the result matches exactly after prepare
             # Note: this currently will always be true since we are inserting observers
             # the check becomes useful when we add qat examples
-            # but we can still manully inspect the printed observers to make sure
+            # but we can still manually inspect the printed observers to make sure
             # it matches
             self.assertEqual(after_prepare_result, after_prepare_result_fx)
             self.assertEqual(
diff --git a/test/run_test.py b/test/run_test.py
index ca17754b33f..4b7030d4615 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -1626,7 +1626,7 @@ def get_selected_tests(options) -> list[str]:
     if options.xpu:
         selected_tests = exclude_tests(XPU_BLOCKLIST, selected_tests, "on XPU")
     else:
-        # Exclude all xpu specifc tests otherwise
+        # Exclude all xpu specific tests otherwise
         options.exclude.extend(XPU_TEST)
 
     # Filter to only run onnx tests when --onnx option is specified
diff --git a/test/test_autograd.py b/test/test_autograd.py
index bebe89e0965..ee6d9c09282 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -5896,7 +5896,7 @@ Done""",
 
             @staticmethod
             def backward(ctx, grad):
-                # Create a sparse tensor with non-contigous indices and values
+                # Create a sparse tensor with non-contiguous indices and values
                 # and return as grad.
                 v = torch.rand(1, 3)
                 i = torch.ones(1, 1, dtype=torch.long)
diff --git a/test/test_fx.py b/test/test_fx.py
index 6d581f9d41d..4c4a6d8c619 100644
--- a/test/test_fx.py
+++ b/test/test_fx.py
@@ -204,7 +204,7 @@ def side_effect_func(x: torch.Tensor):
 class TestFX(JitTestCase):
     def setUp(self):
         super().setUp()
-        # Checking for mutable operations whil tracing is feature flagged
+        # Checking for mutable operations while tracing is feature flagged
         # Enable it in testing but not by default
         self.orig_tracer_mutable_flag = (
             torch.fx.proxy.TracerBase.check_mutable_operations
@@ -4198,7 +4198,7 @@ def run_getitem_target():
 
 class TestOperatorSignatures(JitTestCase):
     def setUp(self):
-        # Checking for mutable operations whil tracing is feature flagged
+        # Checking for mutable operations while tracing is feature flagged
         # Enable it in testing but not by default
         self.orig_tracer_mutable_flag = (
             torch.fx.proxy.TracerBase.check_mutable_operations
@@ -4241,7 +4241,7 @@ class TestFXAPIBackwardCompatibility(JitTestCase):
         super().setUp()
         self.maxDiff = None
 
-        # Checking for mutable operations whil tracing is feature flagged
+        # Checking for mutable operations while tracing is feature flagged
         # Enable it in testing but not by default
         self.orig_tracer_mutable_flag = (
             torch.fx.proxy.TracerBase.check_mutable_operations
@@ -4597,7 +4597,7 @@ class TestFXAPIBackwardCompatibility(JitTestCase):
 class TestFunctionalTracing(JitTestCase):
     def setUp(self):
         super().setUp()
-        # Checking for mutable operations whil tracing is feature flagged
+        # Checking for mutable operations while tracing is feature flagged
         # Enable it in testing but not by default
         self.orig_tracer_mutable_flag = (
             torch.fx.proxy.TracerBase.check_mutable_operations
diff --git a/test/test_indexing.py b/test/test_indexing.py
index 99d84a65abc..cca7a21165d 100644
--- a/test/test_indexing.py
+++ b/test/test_indexing.py
@@ -247,7 +247,7 @@ class TestIndexing(TestCase):
                 x[ri([0, 2, 4]),], torch.tensor([5, 4, 3], dtype=dtype, device=device)
             )
 
-        # Only validates indexing and setting for Halfs
+        # Only validates indexing and setting for Halves
         if dtype == torch.half:
             reference = consec((10,))
             validate_indexing(reference)
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 16bbf47ec48..01a6dd5c8ec 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -4842,7 +4842,7 @@ class TestLinalg(TestCase):
             self.assertTrue(torch.cuda.tunable.record_untuned_is_enabled())
 
             make_arg = partial(make_tensor, device=device, dtype=dtype)
-            # offline tuning only handles matmuls on two dimensionsal tensors
+            # offline tuning only handles matmuls on two dimensional tensors
             # matmul that require broadcasting are
             # not supported either.
             # Below we check the different transA and transB combinations.
@@ -4871,7 +4871,7 @@ class TestLinalg(TestCase):
                     continue
 
             # offline tuning only handles batched matmuls on
-            # three dimensionsal tensors
+            # three dimensional tensors
             # matmul that require broadcasting are
             # not supported either.
             # Below we check the different transA and transB combinations.
diff --git a/test/test_mkldnn.py b/test/test_mkldnn.py
index e2ec92fc8da..4e1ef44bb31 100644
--- a/test/test_mkldnn.py
+++ b/test/test_mkldnn.py
@@ -1520,7 +1520,7 @@ class TestMkldnn(TestCase):
                 h = torch.randn(num_layers * num_directions, batch_size, hidden_size, dtype=torch.float32)
                 c = torch.randn(num_layers * num_directions, batch_size, hidden_size, dtype=torch.float32)
                 if fp16:
-                    # TODO add traing support when oneDNN support lstm FP16 training
+                    # TODO add training support when oneDNN support lstm FP16 training
                     training = False
                 model = torch.nn.LSTM(input_size, hidden_size, num_layers, bidirectional=bidirectional,
                                       bias=bias, dropout=dropout, batch_first=batch_first).float()
diff --git a/test/test_modules.py b/test/test_modules.py
index e587c67815c..2f881c89b78 100644
--- a/test/test_modules.py
+++ b/test/test_modules.py
@@ -328,7 +328,7 @@ class TestModule(TestCase):
 
     def _retain_grad(self, obj):
         # gradients needs to be retained to check for grad. This is useful when
-        # non-leafs are present in the graph.
+        # non-leaves are present in the graph.
         def inner_retain_grad(obj):
             if obj.requires_grad:
                 obj.retain_grad()
diff --git a/test/test_mps.py b/test/test_mps.py
index 3dd4c8261ff..83d5b46d468 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -7842,7 +7842,7 @@ class TestMPS(TestCaseMPS):
         shape = (2, 3, 4, 5, 6)
         x = torch.rand(shape, device="mps")
         self.assertNotEqual(x[0], x[1])
-        # Check that normal distributino is not affected by the same
+        # Check that normal distributions is not affected by the same
         y = torch.normal(torch.zeros(shape, device="mps"), torch.ones(shape, device="mps"))
         self.assertNotEqual(y[0], y[1])
 
@@ -12644,7 +12644,7 @@ class TestConsistency(TestCaseMPS):
         self.assertEqual(out_mps, out_cpu)
 
     def test_fmax_mixed_dtypes(self, device):
-        # Regression tesing for https://github.com/pytorch/pytorch/issues/149951
+        # Regression testing for https://github.com/pytorch/pytorch/issues/149951
         # fmax and fmin are implemented as binary metal shaders and they were implemented
         # with the assumption that both args have the same dtype
         x = torch.rand((3, 3), device=device, dtype=torch.float32)
diff --git a/test/test_nn.py b/test/test_nn.py
index cb755992ffc..eac0d887c42 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -1809,17 +1809,17 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
                 num_params - 1,
             )
 
-            # Removing the weight norm reparametrization restores the Parameter
+            # Removing the weight norm reparameterization restores the Parameter
             l = torch.nn.utils.remove_weight_norm(l, name=name)
             self.assertEqual(
                 sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights),
                 num_params,
             )
 
-            # Make sure that, upon removal of the reparametrization, the
+            # Make sure that, upon removal of the reparameterization, the
             # `._parameters` and `.named_parameters` contain the right params.
             # Specifically, the original weight ('weight_ih_l0') should be placed
-            # back in the parameters, while the reparametrization components
+            # back in the parameters, while the reparameterization components
             # ('weight_ih_l0_v' and 'weight_ih_l0_g') should be removed.
             self.assertTrue(name in l._parameters)
             self.assertIsNotNone(l._parameters[name])
@@ -7308,7 +7308,7 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
             count_tensor
         )
 
-        # Test batch_norm_backward_elemt gives the same answer for all
+        # Test batch_norm_backward_element gives the same answer for all
         # combinations of contiguous as channels_last input
         for a, b in [
                 (torch.channels_last, torch.contiguous_format),
diff --git a/test/test_ops.py b/test/test_ops.py
index 3ec023f3d67..7427de04bf8 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -2647,7 +2647,7 @@ fake_skips = (
     "linalg.eigvals",  # The tensor has a non-zero number of elements, but its data is not allocated yet
     "linalg.eigvalsh",  # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend
     "linalg.matrix_power",  # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend
-    # "linalg.pinv",  # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen
+    # "linalg.pinv",  # Could not run 'aten::pinv.out' with arguments from the 'Meta' backend
     "linalg.matrix_rank.hermitian",  # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend
     "linalg.pinv.hermitian",  # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor
     "linalg.solve",  # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend
diff --git a/test/test_python_dispatch.py b/test/test_python_dispatch.py
index 98fbabff11e..515ce435b72 100644
--- a/test/test_python_dispatch.py
+++ b/test/test_python_dispatch.py
@@ -2520,7 +2520,7 @@ def forward(self, x_1):
                 self.last_args = args
                 return func(*args, **kwargs)
 
-        # Value that could not be intepreted as signed int64
+        # Value that could not be interpreted as signed int64
         uarg = 2**63 + 1
         with DummyMode() as m:
             a = torch.full((3, 3), uarg, dtype=torch.uint64)
diff --git a/test/test_quantization.py b/test/test_quantization.py
index 01006e3f6e2..c36c20bb0ca 100644
--- a/test/test_quantization.py
+++ b/test/test_quantization.py
@@ -51,7 +51,7 @@ from quantization.eager.test_quantize_eager_qat import TestQuantizeEagerQAT  # n
 from quantization.eager.test_quantize_eager_qat import TestQuantizeEagerQATNumerics  # noqa: F401
 # 3. Eager mode fusion passes
 from quantization.eager.test_fuse_eager import TestFuseEager  # noqa: F401
-# 4. Testing model numerics between quanitzed and FP32 models
+# 4. Testing model numerics between quantized and FP32 models
 from quantization.eager.test_model_numerics import TestModelNumericsEager  # noqa: F401
 # 5. Tooling: numeric_suite
 from quantization.eager.test_numeric_suite_eager import TestNumericSuiteEager  # noqa: F401
diff --git a/test/test_scaled_matmul_cuda.py b/test/test_scaled_matmul_cuda.py
index 4fb48fe6ccf..204153e971b 100644
--- a/test/test_scaled_matmul_cuda.py
+++ b/test/test_scaled_matmul_cuda.py
@@ -426,7 +426,7 @@ def data_to_nvfp4_with_global_scale(x, block_size):
     # Per-tensor max
     global_max = x.abs().max()
 
-    # Contants
+    # Constants
     # Global encoding scale for block-scales
     S_enc = FP4_MAX_VAL * F8E4M3_MAX_VAL / global_max
     S_dec = 1. / S_enc
diff --git a/test/test_shape_ops.py b/test/test_shape_ops.py
index b8bb32b658f..24c8122d5ae 100644
--- a/test/test_shape_ops.py
+++ b/test/test_shape_ops.py
@@ -262,7 +262,7 @@ class TestShapeOps(TestCase):
             expected = xn.diagonal(*args)
             self.assertEqual(expected.shape, result.shape)
             self.assertEqual(expected, result)
-        # test non-continguous
+        # test non-contiguous
         xp = x.permute(1, 2, 3, 0)
         result = torch.diagonal(xp, 0, -2, -1)
         expected = xp.numpy().diagonal(0, -2, -1)
diff --git a/test/test_sparse.py b/test/test_sparse.py
index 7776cf8abbf..5150dab4b7c 100644
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@@ -1333,7 +1333,7 @@ class TestSparse(TestSparseBase):
         res_sparse = t.to_sparse().index_select(0, idx_empty)
         self.assertEqual(res_dense, res_sparse)
 
-        # non-contigous index
+        # non-contiguous index
         idx = torch.randint(low=0, high=5, size=(10, 2), device=device)[:, 0]
 
         def run_test(sizes):
diff --git a/test/test_stateless.py b/test/test_stateless.py
index d24194ed460..e8217f2caea 100644
--- a/test/test_stateless.py
+++ b/test/test_stateless.py
@@ -186,7 +186,7 @@ class TestStatelessFunctionalAPI(TestCase):
         cur_rm = module.running_mean
         self.assertEqual(cur_rm, prev_rm)
         self.assertEqual(rm, torch.full((10,), 12.8))
-        # Now run functional without reparametrization and check that the module has
+        # Now run functional without reparameterization and check that the module has
         # been updated
         functional_call(module, {}, x)
         self.assertEqual(module.running_mean, torch.full((10,), 12.8))
diff --git a/test/test_tensorexpr.py b/test/test_tensorexpr.py
index 57be409ab6b..628e45ed8eb 100644
--- a/test/test_tensorexpr.py
+++ b/test/test_tensorexpr.py
@@ -705,7 +705,7 @@ class TestTensorExprFuser(BaseTestClass):
                 #    d = to_bf16(to_fp32(a) + to_fp32(b) + to_fp32(c))
                 # Hence, we simulate NNC computation by feeding fp32 tensors and converting
                 # the result tensor back to bf16. The simulation could avoid the numeric
-                # deviation to simplify the result comprasion
+                # deviation to simplify the result comparison
                 y = warmup_and_run_forward(traced, rand_a.float(), rand_b.float())
                 if torch_fn not in cmp_fns:
                     y = y.bfloat16()
diff --git a/test/test_transformers.py b/test/test_transformers.py
index 2dae5e5da11..4dea4312469 100644
--- a/test/test_transformers.py
+++ b/test/test_transformers.py
@@ -4320,8 +4320,8 @@ class TestSDPAXpuOnly(NNTestCase):
             _ = F.scaled_dot_product_attention(q, k, v)
 
     def test_default_priority_order(self, device):
-        # The default priority order of xpu is overrideable, math, flash, efficient, cudnn
-        # For xpu backend, we need to make sure that overrideable > math > flash
+        # The default priority order of xpu is overridable, math, flash, efficient, cudnn
+        # For xpu backend, we need to make sure that overridable > math > flash
         dtype = torch.bfloat16
         shape = SdpaShape(1, 1, 1, 1)
         make_tensor = partial(torch.rand, shape, device=device, dtype=dtype)
diff --git a/test/torch_np/numpy_tests/core/test_dtype.py b/test/torch_np/numpy_tests/core/test_dtype.py
index 13e42f4b7b4..19b41d877ca 100644
--- a/test/torch_np/numpy_tests/core/test_dtype.py
+++ b/test/torch_np/numpy_tests/core/test_dtype.py
@@ -87,7 +87,7 @@ class TestBuiltin(TestCase):
             assert_raises(TypeError, np.dtype, "l8")
             assert_raises(TypeError, np.dtype, "L8")
 
-    # XXX: what is 'q'? on my 64-bit ubuntu maching it's int64, same as 'l'
+    # XXX: what is 'q'? on my 64-bit ubuntu matching it's int64, same as 'l'
     #       if np.dtype('q').itemsize == 8:
     #           assert_raises(TypeError, np.dtype, 'q4')
     #           assert_raises(TypeError, np.dtype, 'Q4')
diff --git a/test/torch_np/numpy_tests/core/test_einsum.py b/test/torch_np/numpy_tests/core/test_einsum.py
index a2810808682..45c1d974748 100644
--- a/test/torch_np/numpy_tests/core/test_einsum.py
+++ b/test/torch_np/numpy_tests/core/test_einsum.py
@@ -976,7 +976,7 @@ class TestEinsum(TestCase):
         # Test originally added to cover broken float16 path: gh-20305
         # Likely most are covered elsewhere, at least partially.
         dtype = np.dtype(dtype)
-        # Simple test, designed to excersize most specialized code paths,
+        # Simple test, designed to exercise most specialized code paths,
         # note the +0.5 for floats.  This makes sure we use a float value
         # where the results must be exact.
         arr = (np.arange(7) + 0.5).astype(dtype)
@@ -1160,7 +1160,7 @@ class TestEinsum(TestCase):
     @xfail  # (reason="order='F' not supported")
     def test_output_order(self):
         # Ensure output order is respected for optimize cases, the below
-        # conraction should yield a reshaped tensor view
+        # contraction should yield a reshaped tensor view
         # gh-16415
 
         a = np.ones((2, 3, 5), order="F")
diff --git a/test/torch_np/numpy_tests/core/test_indexing.py b/test/torch_np/numpy_tests/core/test_indexing.py
index 91dae968683..16d89c03219 100644
--- a/test/torch_np/numpy_tests/core/test_indexing.py
+++ b/test/torch_np/numpy_tests/core/test_indexing.py
@@ -375,7 +375,7 @@ class TestIndexing(TestCase):
         assert_array_equal(a[idx], idx)
 
         # this case must not go into the fast path, note that idx is
-        # a non-contiuguous none 1D array here.
+        # a non-contiguous none 1D array here.
         a[idx] = -1
         res = np.arange(6)
         res[0] = -1
diff --git a/test/torch_np/numpy_tests/core/test_multiarray.py b/test/torch_np/numpy_tests/core/test_multiarray.py
index ba19b62e821..fc2c3435907 100644
--- a/test/torch_np/numpy_tests/core/test_multiarray.py
+++ b/test/torch_np/numpy_tests/core/test_multiarray.py
@@ -900,7 +900,7 @@ class TestScalarIndexing(TestCase):
 
         assert_raises(IndexError, subscript, a, (np.newaxis, 0))
 
-        # this assersion fails because 50 > NPY_MAXDIMS = 32
+        # this assertion fails because 50 > NPY_MAXDIMS = 32
         # assert_raises(IndexError, subscript, a, (np.newaxis,)*50)
 
     @xfail  # (reason="pytorch disallows overlapping assignments")
@@ -3283,7 +3283,7 @@ class TestArgmax(TestCase):
                     ([np.nan, 0, 1, 2, 3], 0),
                     ([np.nan, 0, np.nan, 2, 3], 0),
                     # To hit the tail of SIMD multi-level(x4, x1) inner loops
-                    # on variant SIMD widthes
+                    # on variant SIMD widths
                     ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
                     ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
                     ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
@@ -3392,7 +3392,7 @@ class TestArgmin(TestCase):
                     ([np.nan, 0, 1, 2, 3], 0),
                     ([np.nan, 0, np.nan, 2, 3], 0),
                     # To hit the tail of SIMD multi-level(x4, x1) inner loops
-                    # on variant SIMD widthes
+                    # on variant SIMD widths
                     ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
                     ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
                     ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
diff --git a/test/torch_np/numpy_tests/core/test_numerictypes.py b/test/torch_np/numpy_tests/core/test_numerictypes.py
index f8ec84069b6..29b42de7c07 100644
--- a/test/torch_np/numpy_tests/core/test_numerictypes.py
+++ b/test/torch_np/numpy_tests/core/test_numerictypes.py
@@ -30,7 +30,7 @@ skip = functools.partial(skipif, True)
 
 
 @xpassIfTorchDynamo_np  # (
-#    reason="We do not disctinguish between scalar and array types."
+#    reason="We do not distinguish between scalar and array types."
 #    " Thus, scalars can upcast arrays."
 # )
 class TestCommonType(TestCase):
diff --git a/test/torch_np/numpy_tests/lib/test_function_base.py b/test/torch_np/numpy_tests/lib/test_function_base.py
index 13dba55837c..7256d81e4c6 100644
--- a/test/torch_np/numpy_tests/lib/test_function_base.py
+++ b/test/torch_np/numpy_tests/lib/test_function_base.py
@@ -3361,42 +3361,42 @@ class TestPercentile(TestCase):
         assert_equal(np.percentile(a, 0.3), np.nan)
         assert_equal(np.percentile(a, 0.3).ndim, 0)
 
-        # axis0 zerod
+        # axis0 zeroed
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0)
         b[2, 3] = np.nan
         b[1, 2] = np.nan
         assert_equal(np.percentile(a, 0.3, 0), b)
 
-        # axis0 not zerod
+        # axis0 not zeroed
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 0)
         b[:, 2, 3] = np.nan
         b[:, 1, 2] = np.nan
         assert_equal(np.percentile(a, [0.3, 0.6], 0), b)
 
-        # axis1 zerod
+        # axis1 zeroed
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1)
         b[1, 3] = np.nan
         b[1, 2] = np.nan
         assert_equal(np.percentile(a, 0.3, 1), b)
-        # axis1 not zerod
+        # axis1 not zeroed
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1)
         b[:, 1, 3] = np.nan
         b[:, 1, 2] = np.nan
         assert_equal(np.percentile(a, [0.3, 0.6], 1), b)
 
-        # axis02 zerod
+        # axis02 zeroed
         b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2))
         b[1] = np.nan
         b[2] = np.nan
         assert_equal(np.percentile(a, 0.3, (0, 2)), b)
-        # axis02 not zerod
+        # axis02 not zeroed
         b = np.percentile(
             np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2)
         )
         b[:, 1] = np.nan
         b[:, 2] = np.nan
         assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b)
-        # axis02 not zerod with method='nearest'
+        # axis02 not zeroed with method='nearest'
         b = np.percentile(
             np.arange(24, dtype=float).reshape(2, 3, 4),
             [0.3, 0.6],
diff --git a/test/torch_np/test_ndarray_methods.py b/test/torch_np/test_ndarray_methods.py
index f94b03f1f6e..b25faac56cb 100644
--- a/test/torch_np/test_ndarray_methods.py
+++ b/test/torch_np/test_ndarray_methods.py
@@ -399,7 +399,7 @@ class TestArgmax(TestCase):
                     ([np.nan, 0, 1, 2, 3], 0),
                     ([np.nan, 0, np.nan, 2, 3], 0),
                     # To hit the tail of SIMD multi-level(x4, x1) inner loops
-                    # on variant SIMD widthes
+                    # on variant SIMD widths
                     ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
                     ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
                     ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
@@ -534,7 +534,7 @@ class TestArgmin(TestCase):
                     ([np.nan, 0, 1, 2, 3], 0),
                     ([np.nan, 0, np.nan, 2, 3], 0),
                     # To hit the tail of SIMD multi-level(x4, x1) inner loops
-                    # on variant SIMD widthes
+                    # on variant SIMD widths
                     ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1),
                     ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1),
                     ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1),
diff --git a/test/torch_np/test_scalars_0D_arrays.py b/test/torch_np/test_scalars_0D_arrays.py
index 24bed1c239b..0a21e5fb97b 100644
--- a/test/torch_np/test_scalars_0D_arrays.py
+++ b/test/torch_np/test_scalars_0D_arrays.py
@@ -68,7 +68,7 @@ class TestArrayScalars(TestCase):
         assert product.shape == (3,)
         assert_equal(product, [42, 42 * 2, 42 * 3])
 
-        # repeat with right-mulitply
+        # repeat with right-multiply
         product = lst * value
         assert isinstance(product, np.ndarray)
         assert product.shape == (3,)
diff --git a/test/typing/pass/arithmetic_ops.py b/test/typing/pass/arithmetic_ops.py
index 556ef90523e..f0d6cc6fd9f 100644
--- a/test/typing/pass/arithmetic_ops.py
+++ b/test/typing/pass/arithmetic_ops.py
@@ -19,7 +19,7 @@ assert_type(-TENSOR, Tensor)
 assert_type(~TENSOR, Tensor)
 
 #
-# Binary ops that return a bolean
+# Binary ops that return a boolean
 #
 
 # Operator ==