diff --git a/test/conftest.py b/test/conftest.py index 078e4b3b2b8..de5818bda8f 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -238,7 +238,7 @@ def pytest_pycollect_makemodule(module_path, path, parent) -> Module: @pytest.hookimpl(hookwrapper=True) def pytest_report_teststatus(report, config): - # Add the test time to the verbose output, unforunately I don't think this + # Add the test time to the verbose output, unfortunately I don't think this # includes setup or teardown pluggy_result = yield if not isinstance(report, pytest.TestReport): diff --git a/test/cpp/api/autograd.cpp b/test/cpp/api/autograd.cpp index b7e75acb659..b3d47b71e9a 100644 --- a/test/cpp/api/autograd.cpp +++ b/test/cpp/api/autograd.cpp @@ -584,7 +584,7 @@ TEST(CustomAutogradTest, MarkDirty) { } }; - // Clone here because modifying leafs inplace is not allowed + // Clone here because modifying leaves inplace is not allowed auto x = torch::randn({5, 5}, torch::requires_grad()).clone(); auto version_before = x._version(); auto out = MyFunction::apply(x); diff --git a/test/cpp/api/parallel.cpp b/test/cpp/api/parallel.cpp index 1ec7c463a59..0dab78c3a28 100644 --- a/test/cpp/api/parallel.cpp +++ b/test/cpp/api/parallel.cpp @@ -264,7 +264,7 @@ TEST_F(ParallelTest, DataParallelNumericalEquivalence_MultiCUDA) { input += i; input_dp += i; - // non-prallel training + // non-parallel training torch::optim::SGD optim(model->parameters(), torch::optim::SGDOptions(0.1)); auto output = model->forward(input); auto loss = torch::mse_loss(output, torch::zeros_like(output)); diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/README.md b/test/cpp_extensions/open_registration_extension/torch_openreg/README.md index 3fcc0f1c211..422aa4c8da2 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/README.md +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/README.md @@ -188,7 +188,7 @@ Please refer to [this](https://docs.pytorch.org/docs/main/accelerator/index.html - Device-agnostic APIs - Memory Management - Generator - - Distrubuted + - Distributed - Custom Tensor&Storage - ... - **Improve Tests**: Add more test cases related to the integration mechanism. diff --git a/test/distributed/checkpoint/test_planner.py b/test/distributed/checkpoint/test_planner.py index 86bed29de99..16f7089206e 100644 --- a/test/distributed/checkpoint/test_planner.py +++ b/test/distributed/checkpoint/test_planner.py @@ -216,7 +216,7 @@ class TestSavePlan(TestCase): # Number of plans should remain unchanged self.assertEqual(len(all_plans), len(deduped_plans)) - # Numer of items in the deduped plans should be less than the original plans + # Number of items in the deduped plans should be less than the original plans for new_plan, old_plan in zip(deduped_plans, all_plans): self.assertFalse(_compare_save_plans(new_plan, old_plan)) self.assertTrue(len(new_plan.items) < len(old_plan.items)) diff --git a/test/distributed/tensor/test_attention.py b/test/distributed/tensor/test_attention.py index 66d80f60455..64d86ba3c12 100644 --- a/test/distributed/tensor/test_attention.py +++ b/test/distributed/tensor/test_attention.py @@ -158,7 +158,7 @@ class RingAttentionTest(DTensorTestBase): # parameters because when require_grad is True, resize_ is not # allowed. But requires_grad of cp_q, cp_k, and cp_v are False # now. So we can just use context_parallel() to shard q, k, v. - # In reality, context_paralle() should be used to shard the input. + # In reality, context_parallel() should be used to shard the input. # In reality, context_parallel() should only be used to shard # the model inputs (batch). @@ -701,7 +701,7 @@ class CPFlexAttentionTest(DTensorTestBase): ) # TODO: change this for-loop to run_subtests - # Use a for-loop instead of run_subtests because we need to intialize the mask + # Use a for-loop instead of run_subtests because we need to initialize the mask # for each subtest. This can be baked into self._test_cp_flex_attention as # a str argument denoting mask type. for batch_size, max_seq_len, lb_type in itertools.product( diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index c74f8143cc8..c117bc810b1 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -4901,7 +4901,7 @@ class NCCLTraceTest(NCCLTraceTestBase): for p2p_op_idx, input_sizes in zip( range(first_op, coalesced_op, 1), op_sizes_per_coalesce ): - # the indivudal ops inside the coalescing group the individual op metadata, + # the individual ops inside the coalescing group the individual op metadata, # but not the timing info coming from the actual coalesced kernel profiling_name = ( "nccl:recv 0<-1" if self.rank == 0 else "nccl:send 1->0" diff --git a/test/distributed/test_nvshmem.py b/test/distributed/test_nvshmem.py index 8cee8b2a046..8c6d40ced07 100644 --- a/test/distributed/test_nvshmem.py +++ b/test/distributed/test_nvshmem.py @@ -398,7 +398,7 @@ class NVSHMEMAll2AllTest(MultiProcContinuousTest): nsplits, dtype=torch.int64, device=self.device ).copy_(inp_splits) # 2 rows: output splits, output offsets - # Initiallizing all values to -1 to check if they are updated + # Initializing all values to -1 to check if they are updated out_splits_offsets = symm_mem.empty( (2, nsplits), dtype=torch.int64, device=self.device ).fill_(-1) @@ -503,7 +503,7 @@ class NVSHMEMAll2AllTest(MultiProcContinuousTest): (2, nsplits), dtype=torch.int64, device=self.device ) # 2 rows: output splits, output offsets - # Initiallizing all values to -1 to check if they are updated + # Initializing all values to -1 to check if they are updated out_splits_offsets = symm_mem.empty( (2, nsplits), dtype=torch.int64, device=self.device ).fill_(-1) @@ -617,7 +617,7 @@ def dispatch_then_combine(device, align: int, group) -> None: inp_splits ) # 2 rows: output splits, output offsets - # Initiallizing all values to -1 to check if they are updated + # Initializing all values to -1 to check if they are updated out_splits_offsets = symm_mem.empty( (2, nsplits), dtype=torch.int64, device=device ).fill_(-1) @@ -625,7 +625,7 @@ def dispatch_then_combine(device, align: int, group) -> None: # Buffers for combine combine_out = symm_mem.empty(max_out_numel, dtype=dtype, device=device).fill_(-1) # 2 rows: output splits, output offsets - # Initiallizing all values to -1 to check if they are updated + # Initializing all values to -1 to check if they are updated combine_out_splits_offsets = symm_mem.empty( (2, nsplits), dtype=torch.int64, device=device ).fill_(-1) diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py index 1ea6faebf66..f589339f194 100644 --- a/test/distributed/test_symmetric_memory.py +++ b/test/distributed/test_symmetric_memory.py @@ -274,7 +274,7 @@ class SymmetricMemoryTest(MultiProcContinuousTest): self.assertTrue(buf.eq(peer_rank + world.size() // 2).all()) -# We move AsyncTP tests to a seperate test suite because 1) Async TP ops are not +# We move AsyncTP tests to a separate test suite because 1) Async TP ops are not # the core symmetric memory APIs, they are more like applications, 2) # MultiProcContinuousTest will skip all the following tests if a test fails ( # we should fix this too). We still want to get the test signals for the core @@ -621,7 +621,7 @@ class AsyncTPTest(MultiProcContinuousTest): # [READ ME FIRST] # The `SymmMemEmptySetDeviceTest` suite parameterizes whether user sets the -# device before calling symm_mem.emtpy. Either way should work. +# device before calling symm_mem.empty. Either way should work. # However, since `set_device` is persistent, we cannot use the # `MultiProcContinuousTest` template because the next function will be # "contaminated", leading to flaky tests (e.g. hang). Therefore, we use diff --git a/test/dynamo/cpython/3_13/mathdata/ieee754.txt b/test/dynamo/cpython/3_13/mathdata/ieee754.txt index a8b8a0a2148..3e986cdb102 100644 --- a/test/dynamo/cpython/3_13/mathdata/ieee754.txt +++ b/test/dynamo/cpython/3_13/mathdata/ieee754.txt @@ -51,7 +51,7 @@ nan >>> INF / INF nan -However unambigous operations with inf return inf: +However unambiguous operations with inf return inf: >>> INF * INF inf >>> 1.5 * INF diff --git a/test/dynamo/cpython/3_13/test_itertools.py b/test/dynamo/cpython/3_13/test_itertools.py index fe32a3491d1..b33fa6b878d 100644 --- a/test/dynamo/cpython/3_13/test_itertools.py +++ b/test/dynamo/cpython/3_13/test_itertools.py @@ -1711,7 +1711,7 @@ class TestBasicOps(__TestCase): t3 = tnew(t1) self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc')) - # test that tee objects are weak referencable + # test that tee objects are weak referenceable a, b = tee(range(10)) p = weakref.proxy(a) self.assertEqual(getattr(p, '__class__'), type(b)) @@ -2243,7 +2243,7 @@ class TestPurePythonRoughEquivalents(__TestCase): t3 = tnew(t1) self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc')) - # test that tee objects are weak referencable + # test that tee objects are weak referenceable a, b = tee(range(10)) p = weakref.proxy(a) self.assertEqual(getattr(p, '__class__'), type(b)) diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py index 5d5d2281b89..c6138f7574f 100644 --- a/test/dynamo/test_repros.py +++ b/test/dynamo/test_repros.py @@ -5760,7 +5760,7 @@ def forward(self, s77 : torch.SymInt, s27 : torch.SymInt, L_x_ : torch.Tensor): self.assertEqual(func(x, 0), opt_func(x, 0)) def test_grad(self): - # Write to `grad` or `_grad` should reflecte in reading from the other, + # Write to `grad` or `_grad` should reflective in reading from the other, # and should be codegen-ed. def fn(x, y): x._grad = y + 1 diff --git a/test/export/test_export.py b/test/export/test_export.py index 6a9aff1477a..762ad512ae3 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -3955,7 +3955,7 @@ def forward(self, causal_mask, fill_value): def test_export_custom_op_lib(self): ops_registered_before = set(torch.ops.mylib) - # Assert warning for CompositeImplictAutograd op + # Assert warning for CompositeImplicitAutograd op with torch.library._scoped_library("mylib", "FRAGMENT") as lib: lib.define("foo123(Tensor x) -> Tensor") lib.impl("foo123", lambda x: x.sin(), "CompositeImplicitAutograd") diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py index 0e1eb0140bb..2f68cdf4794 100644 --- a/test/export/test_serialize.py +++ b/test/export/test_serialize.py @@ -2000,7 +2000,7 @@ class TestSaveLoad(TestCase): def test_save_load_with_multiple_empty_tensors(self) -> None: # Test scenario where models have multiple empty tensors - # but with differnt data types. + # but with different data types. class M(torch.nn.Module): def __init__(self): super().__init__() diff --git a/test/functorch/attn_ft.py b/test/functorch/attn_ft.py index c5130e5f8a2..d2aabe51ec9 100644 --- a/test/functorch/attn_ft.py +++ b/test/functorch/attn_ft.py @@ -115,7 +115,7 @@ class BertSelfAttention(nn.Module): # we can then use that as an indirect index into the embedding table values to look up the features for that index # this is just a `gather` primitive op. The resulting tensor will - # have all the dimensions of embeddeding_idx (query_sequence x key_sequence), + # have all the dimensions of embedding_idx (query_sequence x key_sequence), # plus all the dimensions of `embed` that were not indirectly accessed (`embedding_range`). # this form of indirect indexing is more straightforward than either advanced indexing or torch.gather which both # have a lot of dependencies on the positions of indexing tensors. diff --git a/test/functorch/discover_coverage.py b/test/functorch/discover_coverage.py index 6d9d9e7e8a7..2ffdfec1e86 100644 --- a/test/functorch/discover_coverage.py +++ b/test/functorch/discover_coverage.py @@ -36,7 +36,7 @@ def get_public_overridable_apis(pytorch_root="/raid/rzou/pt/debug-cpu"): for module, module_name, src in public_docs: with open(f"{pytorch_root}/{src}") as f: lines = f.readlines() - # APIs eitehr begin with 4 spaces or ".. autofunction::" + # APIs either begin with 4 spaces or ".. autofunction::" api_lines1 = [line.strip() for line in lines if line.startswith(" " * 4)] api_lines2 = [ line.strip()[len(".. autofunction:: ") :] diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py index 840ead0a281..29b69322d2f 100644 --- a/test/functorch/test_aotdispatch.py +++ b/test/functorch/test_aotdispatch.py @@ -6399,7 +6399,7 @@ def forward(self, primals_1, primals_2, primals_3): # Important pieces of the graph: # - 4 total dense outputs. - # This corresponds to the fact that each user fwd inpt (a, b) + # This corresponds to the fact that each user fwd input (a, b) # will get a gradient that is a TwoTensor subclass, # so (mul_2, mul_3) will be wrapped into a.grad # and (div_1, div_2) will be wrapped into b.grad @@ -8395,7 +8395,7 @@ aot_autograd_module_failures = set( # implementation not traceable or that there is a bug in AOTAutograd. torch.nn.TransformerEncoder, # DataDependentOutputException: aten.eq compares a mask input # to a causal mask tensor, to see if Boolean is_causal should be set - # for TrnasformerEncoder layers, MHA and sdp custom kernels + # for TransformerEncoder layers, MHA and sdp custom kernels torch.nn.Transformer, # DataDependentOutputException: aten.equal compares a mask input # to a causal mask tensor, to see if Boolean is_causal should be set # for TransformerEncoder layers, MHA and sdp custom kernels diff --git a/test/functorch/test_control_flow.py b/test/functorch/test_control_flow.py index 8009654b90d..5bfd1f200dd 100644 --- a/test/functorch/test_control_flow.py +++ b/test/functorch/test_control_flow.py @@ -1236,7 +1236,7 @@ def forward(self, pred_1, x_1): from torch.fx.passes.shape_prop import _extract_tensor_metadata, TensorMetadata # This is a helper function that extracts the metadata from the tensor and - # sets the requries_grad flag to false. This is needed as we compare the + # sets the requires_grad flag to false. This is needed as we compare the # metadata of the operands and the gradients def _extract_tensor_metadata_except_requires_grad(arg): metadata = _extract_tensor_metadata(arg) diff --git a/test/fx/test_fx_param_shape_control_flow.py b/test/fx/test_fx_param_shape_control_flow.py index 8972540076f..f3485f94a95 100644 --- a/test/fx/test_fx_param_shape_control_flow.py +++ b/test/fx/test_fx_param_shape_control_flow.py @@ -118,7 +118,7 @@ class TestConstParamShapeInControlFlow(TestCase): graph1_node_targets = [n.target for n in traced_graph.nodes] graph2_node_targets = [n.target for n in traced_graph2.nodes] - # the second graph has an exta relu function call node + # the second graph has an extra relu function call node assert torch.mm in graph1_node_targets and torch.mm in graph2_node_targets assert ( torch.relu not in graph1_node_targets and torch.relu in graph2_node_targets diff --git a/test/fx/test_fx_xform_observer.py b/test/fx/test_fx_xform_observer.py index d9dcb8504ba..8db18f0c55e 100644 --- a/test/fx/test_fx_xform_observer.py +++ b/test/fx/test_fx_xform_observer.py @@ -181,7 +181,7 @@ class TestGraphTransformObserver(TestCase): @torch._inductor.config.patch("trace.provenance_tracking_level", 1) def test_graph_transform_observer_replace(self): - # the node sohuld should not be duplicated + # the node should should not be duplicated class Model(torch.nn.Module): def forward(self, x): y = x + 1 diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py index abcaf6649d9..a1e5aa3cebc 100644 --- a/test/inductor/test_flex_attention.py +++ b/test/inductor/test_flex_attention.py @@ -1865,7 +1865,7 @@ class TestFlexAttention(InductorTestCase): requires_grad=True, ) query, key, value = make_tensor(), make_tensor(), make_tensor() - # floor_div is not decomposed in decompostion_table is empty + # floor_div is not decomposed in decomposition_table is empty attention = functools.partial(flex_attention, score_mod=score_mod_func) gm = make_fx(attention, decomposition_table={})(query, key, value) self.assertExpectedInline( diff --git a/test/inductor/test_flex_decoding.py b/test/inductor/test_flex_decoding.py index a794f5e6e52..995262b0f21 100644 --- a/test/inductor/test_flex_decoding.py +++ b/test/inductor/test_flex_decoding.py @@ -1188,7 +1188,7 @@ class TestFlexDecoding(InductorTestCase): requires_grad=True, ) query, key, value = make_q(), make_kv(), make_kv() - # floor_div is not decomposed in decompostion_table is empty + # floor_div is not decomposed in decomposition_table is empty attention = functools.partial(flex_attention, score_mod=score_mod_func) gm = make_fx(attention, decomposition_table={})(query, key, value) self.assertExpectedInline( diff --git a/test/inductor/test_fxir_backend.py b/test/inductor/test_fxir_backend.py index c17d0bf19b5..72eb37c1e1b 100644 --- a/test/inductor/test_fxir_backend.py +++ b/test/inductor/test_fxir_backend.py @@ -1128,7 +1128,7 @@ class TestReplaceFloorDiv(InductorTestCase): replaced = replace_floor_div(expr) # Check that all floor's were replaced. - # We shoud have no more new FloorDiv's than floor's in the original expression, + # We should have no more new FloorDiv's than floor's in the original expression, # although we can have less due to simplification. self.assertEqual(replaced.count(sympy.floor), 0) self.assertLessEqual( diff --git a/test/inductor/test_loop_ordering.py b/test/inductor/test_loop_ordering.py index efe0fbfc283..c77b3574b22 100644 --- a/test/inductor/test_loop_ordering.py +++ b/test/inductor/test_loop_ordering.py @@ -231,7 +231,7 @@ class LoopOrderingTest(TestCase): return x.to(torch.float32) return x - # Wordaround the issue that call allclose on fp8 tensor triggers error + # Workaround the issue that call allclose on fp8 tensor triggers error # RuntimeError: "mul_cuda" not implemented for 'Float8_e4m3fn' expect = tree_map(_cast, expect) actual = tree_map(_cast, actual) @@ -547,7 +547,7 @@ class LoopOrderingTest(TestCase): # A small amount of extra memory access for: # - store output for the first reduction - # - load input for the second redution + # - load input for the second reduction # - store output for the second reduction expected_numbytes += (M * 2 + 1) * x.itemsize diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index aa140e4c0cb..420a9ee8292 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -384,7 +384,7 @@ class TestMaxAutotune(TestCase): a[:] = torch.randn((M, K), dtype=torch.float16) b = torch.empty_strided((K, N), (1, K), dtype=torch.float16, device=GPU_TYPE) b[:] = torch.randn((K, N), dtype=torch.float16) - # allocate an output with a stride not divisble by 16, so it can't satisfy TMA alignment checks. + # allocate an output with a stride not divisible by 16, so it can't satisfy TMA alignment checks. out = torch.empty_strided((M, N), (N, 1), dtype=torch.float16, device=GPU_TYPE) with ( diff --git a/test/inductor/test_utils.py b/test/inductor/test_utils.py index 7d23457732a..c3dcd966df3 100644 --- a/test/inductor/test_utils.py +++ b/test/inductor/test_utils.py @@ -74,7 +74,7 @@ class TestUtils(TestCase): self.assertEqual(expr.is_integer, None) self.assertEqual(expr.is_nonnegative, None) # replace abs(x) with y - # propagte abs(x) sympy properties. + # propagate abs(x) sympy properties. result = sympy_subs(expr, {expr: Symbol("y")}) self.assertEqual(result.name, "y") self.assertEqual(result.is_integer, None) diff --git a/test/jit/test_backend_nnapi.py b/test/jit/test_backend_nnapi.py index 042c82eca80..3e79b257131 100644 --- a/test/jit/test_backend_nnapi.py +++ b/test/jit/test_backend_nnapi.py @@ -17,7 +17,7 @@ from torch.testing._internal.common_utils import ( # hacky way to skip these tests in fbcode: # during test execution in fbcode, test_nnapi is available during test discovery, # but not during test execution. So we can't try-catch here, otherwise it'll think -# it sees tests but then fails when it tries to actuall run them. +# it sees tests but then fails when it tries to actually run them. if not IS_FBCODE: from test_nnapi import TestNNAPI diff --git a/test/jit/test_cuda.py b/test/jit/test_cuda.py index c781c1e4c57..8cfe63faa0e 100644 --- a/test/jit/test_cuda.py +++ b/test/jit/test_cuda.py @@ -292,7 +292,7 @@ class TestCUDA(JitTestCase): default_stream_id: int user_stream_id: int - # The test aims at checking different stream proporties. + # The test aims at checking different stream properties. @torch.jit.script def test_get_stream(): device_index = torch.cuda.current_device() @@ -499,7 +499,7 @@ class TestCUDA(JitTestCase): # Record the CUDA event for operation torch.mm on the current stream # and then test if the elapsed time is greater than 0. This test is also - # an adaption from eager mdoe CUDA tests available at test/test_cuda.py + # an adaption from eager mode CUDA tests available at test/test_cuda.py @torch.jit.script def test_event(): device_index = torch.cuda.current_device() diff --git a/test/jit/test_freezing.py b/test/jit/test_freezing.py index ca1172a2ce7..91ecf6f3629 100644 --- a/test/jit/test_freezing.py +++ b/test/jit/test_freezing.py @@ -563,7 +563,7 @@ class TestFreezing(JitTestCase): self.assertTrue(mf.hasattr("sub1")) self.assertTrue(mf.sub1.hasattr("a")) self.assertFalse(mf.sub1.hasattr("b")) - # sub2 is fully folded becasue self.sub1 and self.sub2.sub are not alias (Scripting bug) + # sub2 is fully folded because self.sub1 and self.sub2.sub are not alias (Scripting bug) self.assertFalse(mf.hasattr("sub2")) input = torch.randn(2, 2) output = m.forward(input) diff --git a/test/jit/test_peephole.py b/test/jit/test_peephole.py index 914d423a519..12b9c3f1834 100644 --- a/test/jit/test_peephole.py +++ b/test/jit/test_peephole.py @@ -152,7 +152,7 @@ class TestPeephole(JitTestCase): self.run_pass("peephole", test.graph) FileCheck().check_not("prim::unchecked_cast").run(test.graph) - # refinement not optimzied out + # refinement not optimized out def is_int_tensor(x): scalar = x.item() if isinstance(scalar, int): diff --git a/test/jit/test_upgraders.py b/test/jit/test_upgraders.py index 22d05052b4f..c2228b2de85 100644 --- a/test/jit/test_upgraders.py +++ b/test/jit/test_upgraders.py @@ -151,7 +151,7 @@ class TestUpgraders(JitTestCase): version = self._load_model_version(loaded_func) self.assertTrue(version == 5) - # make sure we preserve old behaviou + # make sure we preserve old behaviour torch._C._calculate_package_version_based_on_upgraders(current_flag_value) def test_aten_linspace(self): diff --git a/test/lazy/test_extract_compiled_graph.py b/test/lazy/test_extract_compiled_graph.py index 1ea0219066d..844b9fef1af 100644 --- a/test/lazy/test_extract_compiled_graph.py +++ b/test/lazy/test_extract_compiled_graph.py @@ -195,7 +195,7 @@ def maketest(module_cls, exception_msg_pattern=None, ctxmgr=None): class OptimizeTest(unittest.TestCase): test_sub = maketest(ModuleSub) # Same as test_sub but force aten::sub to fallback - # We expect an exception caught because of LTC fallabck. + # We expect an exception caught because of LTC fallback. test_ltc_fallback = maketest( ModuleSub, exception_msg_pattern="fallback.*aten::sub", diff --git a/test/lazy/test_ts_opinfo.py b/test/lazy/test_ts_opinfo.py index 7c467dc6241..3e065395153 100644 --- a/test/lazy/test_ts_opinfo.py +++ b/test/lazy/test_ts_opinfo.py @@ -164,7 +164,7 @@ class TestLazyTensor(JitTestCase): if mark_step: torch._lazy.mark_step() - # y and x should contiue to be aliased after the mark_step call. + # y and x should continue to be aliased after the mark_step call. y.add_(1) return x diff --git a/test/mobile/model_test/README.md b/test/mobile/model_test/README.md index 87c9f9bc910..f176a746c26 100644 --- a/test/mobile/model_test/README.md +++ b/test/mobile/model_test/README.md @@ -81,7 +81,7 @@ python test/mobile/model_test/gen_test_model.py ios The test coverage is based on the number of root ops tested in these test models. The full list of generated ops can be found in: https://github.com/pytorch/pytorch/blob/master/test/mobile/model_test/coverage.yaml -In additional, the simulator tests will also report the percentage of Meta's production ops that are covered. The list of production ops changes overtime, so a Meta employee needs to regularly udpate the list it using +In additional, the simulator tests will also report the percentage of Meta's production ops that are covered. The list of production ops changes overtime, so a Meta employee needs to regularly update the list it using ``` python test/mobile/model_test/update_production_ops.py ~/fbsource/xplat/pytorch_models/build/all_mobile_model_configs.yaml ``` diff --git a/test/mobile/model_test/update_production_ops.py b/test/mobile/model_test/update_production_ops.py index b4549a585e1..dbec56e6426 100644 --- a/test/mobile/model_test/update_production_ops.py +++ b/test/mobile/model_test/update_production_ops.py @@ -16,10 +16,10 @@ with open(sys.argv[1]) as input_yaml_file: model_infos = yaml.safe_load(input_yaml_file) for info in model_infos: for op in info["root_operators"]: - # aggregate occurance per op + # aggregate occurrence per op root_operators[op] = 1 + (root_operators.get(op, 0)) for op in info["traced_operators"]: - # aggregate occurance per op + # aggregate occurrence per op traced_operators[op] = 1 + (traced_operators.get(op, 0)) # merge dtypes for each kernel for kernal, dtypes in info["kernel_metadata"].items(): diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py index 3c3b3f53e52..8c1e1ee7a69 100644 --- a/test/nn/test_convolution.py +++ b/test/nn/test_convolution.py @@ -1009,7 +1009,7 @@ class TestConvolutionNN(NNTestCase): @unittest.skipIf(not TEST_CUDNN, "needs cudnn") def test_conv_cudnn_memory_layout_dominance(self): # desired behavior here is to have the memory_layout of conv.weight to - # dominante the layout of output. + # dominant the layout of output. # which is not the same as current behavior, we'll fix this in # following up PRs and remove the `expectedFailure` tag input = torch.randint( @@ -3599,7 +3599,7 @@ class TestConvolutionNNDeviceType(NNTestCase): input_format=input_format, weight_format=weight_format, ) - # test when input chanels is 1 and not converted to channels last + # test when input channel is 1 and not converted to channels last helper( nn.Conv2d, 2, diff --git a/test/nn/test_parametrization.py b/test/nn/test_parametrization.py index da83ed26caa..aee8d4df50e 100644 --- a/test/nn/test_parametrization.py +++ b/test/nn/test_parametrization.py @@ -1395,7 +1395,7 @@ class TestNNParametrization(NNTestCase): eval_out0 = wrapped_m(input) # assert eval gives same result as last training iteration self.assertEqual(eval_out0, last_train_out) - # assert doing more iteartion in eval don't change things + # assert doing more iteration in eval don't change things self.assertEqual(eval_out0, wrapped_m(input)) self.assertEqual(last_train_u, spectral_norm_m._u) self.assertEqual(last_train_v, spectral_norm_m._v) @@ -1440,7 +1440,7 @@ class TestNNParametrization(NNTestCase): class SplitAndCat(nn.Module): def right_inverse(self, x): - # split the tensor in two halfs + # split the tensor in two halves return torch.split(x, x.shape[1] // 2) def forward(self, x0, x1): diff --git a/test/nn/test_pruning.py b/test/nn/test_pruning.py index a2ca609af6e..51078cbcf64 100644 --- a/test/nn/test_pruning.py +++ b/test/nn/test_pruning.py @@ -894,14 +894,14 @@ class TestPruningNN(NNTestCase): prune.l1_unstructured(l, "weight_ih_l0", 0.5) assert sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights) == 3 - # Removing the pruning reparametrization restores the Parameter + # Removing the pruning reparameterization restores the Parameter prune.remove(l, "weight_ih_l0") assert sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights) == 4 - # Make sure that, upon removal of the reparametrization, the + # Make sure that, upon removal of the reparameterization, the # `._parameters` and `.named_parameters` contain the right params. # Specifically, the original weight ('weight_ih_l0') should be placed - # back in the parameters, while the reparametrization component + # back in the parameters, while the reparameterization component # ('weight_ih_l0_orig') should be removed. assert "weight_ih_l0" in l._parameters assert l._parameters["weight_ih_l0"] is not None diff --git a/test/onnx/autograd_helper.py b/test/onnx/autograd_helper.py index 4a3a3eca384..23d42f7b63c 100644 --- a/test/onnx/autograd_helper.py +++ b/test/onnx/autograd_helper.py @@ -3,7 +3,7 @@ import torch -# Autograd funtion that is a replica of the autograd funtion in +# Autograd function that is a replica of the autograd function in # test_utility_funs.py (test_autograd_module_name) class CustomFunction(torch.autograd.Function): @staticmethod diff --git a/test/onnx/exporter/test_api.py b/test/onnx/exporter/test_api.py index a81b7106084..2fefd592ecf 100644 --- a/test/onnx/exporter/test_api.py +++ b/test/onnx/exporter/test_api.py @@ -574,7 +574,7 @@ class TestCustomTranslationTable(common_utils.TestCase): def test_01_specialization_with_run_decomp_is_supported(self): # Phi3RMSNorm changes and redo shape inference after `run_decompositions` call - # We ned this test to make sure everything we do on fx graph is covered by + # We need this test to make sure everything we do on fx graph is covered by # backed_size_oblivious class Phi3RMSNorm(torch.nn.Module): def __init__(self, hidden_size, eps=1e-6): diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py index 5c11682deed..a474d71d49b 100644 --- a/test/onnx/test_pytorch_onnx_onnxruntime.py +++ b/test/onnx/test_pytorch_onnx_onnxruntime.py @@ -1935,7 +1935,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime): class DivModule(torch.nn.Module): def forward(self, x, y): # Add transpose to hide shape/type information - # Otherwise shape and type are still avaiable from input. + # Otherwise shape and type are still available from input. x = x.transpose(1, 2) y = y.transpose(1, 2) return x / y, torch.true_divide(x, y) @@ -3878,7 +3878,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime): def test_topk_smallest_unsorted(self): class MyModule(torch.nn.Module): def forward(self, x, k): - # When sorted=False, order of elements in the outout tensors + # When sorted=False, order of elements in the output tensors # are not expected to match between PyTorch and ORT topk_unsorted = torch.topk(x, k, largest=False, sorted=False) topk_sorted = torch.topk(x, k, largest=False, sorted=True) @@ -4361,7 +4361,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime): super().__init__() self.weight = torch.nn.Buffer(torch.ones(5)) # torch.nn.Embedding is converted to ONNX::Gather. - # Constant folding will be triggerred for constant inputs. + # Constant folding will be triggered for constant inputs. # This pattern is common for constant mask inputs in transformer models. self.embed = torch.nn.Embedding(8, 3) @@ -5389,7 +5389,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime): input = torch.randn(7, 3, 5) self._argmin_argmax_model(input) - # Argmin and Argmax with "select_last_index" is not supprted before opset 12 + # Argmin and Argmax with "select_last_index" is not supported before opset 12 # "select_last_index" was added in opset 12 to deal with corner case where the # same value appears multiple times in the tensor @skipIfUnsupportedMinOpsetVersion(12) @@ -10511,7 +10511,7 @@ class TestONNXRuntime(onnx_test_common._TestONNXRuntime): amax = torch.ones(4) scale = amax / 127.0 zero_point = torch.zeros_like(amax, dtype=torch.int) - # Quantize twice to test differnet branches + # Quantize twice to test different branches y = torch.fake_quantize_per_channel_affine( input, scale, zero_point, 1, 0, 255 ) diff --git a/test/onnx/torchlib/ops_test_data.py b/test/onnx/torchlib/ops_test_data.py index 6adb43044e5..6dd3a39a8d6 100644 --- a/test/onnx/torchlib/ops_test_data.py +++ b/test/onnx/torchlib/ops_test_data.py @@ -275,7 +275,7 @@ def _empty_input_wrangler( def _grid_sample_input_wrangler( args: list[Any], kwargs: dict[str, Any] ) -> tuple[list[Any], dict[str, Any]]: - # Convert string attriute to int as input + # Convert string attribute to int as input inter_mode_options = {"bilinear": 0, "nearest": 1, "bicubic": 2} padding_mode_options = {"zeros": 0, "border": 1, "reflection": 2} args.append(inter_mode_options[kwargs["mode"]]) diff --git a/test/package/generate_bc_packages.py b/test/package/generate_bc_packages.py index 6fa5e81ef5a..52acca1d946 100644 --- a/test/package/generate_bc_packages.py +++ b/test/package/generate_bc_packages.py @@ -11,7 +11,7 @@ torch.package.package_exporter._gate_torchscript_serialization = False def generate_bc_packages(): - """Function to create packages for testing backwards compatiblity""" + """Function to create packages for testing backwards compatibility""" if not IS_FBCODE or IS_SANDCASTLE: from package_a.test_nn_module import TestNnModule diff --git a/test/package/test_load_bc_packages.py b/test/package/test_load_bc_packages.py index 2536f81aaaa..4280736d6e3 100644 --- a/test/package/test_load_bc_packages.py +++ b/test/package/test_load_bc_packages.py @@ -17,7 +17,7 @@ packaging_directory = f"{Path(__file__).parent}/package_bc" class TestLoadBCPackages(PackageTestCase): - """Tests for checking loading has backwards compatiblity""" + """Tests for checking loading has backwards compatibility""" @skipIf( IS_FBCODE or IS_SANDCASTLE, diff --git a/test/package/test_misc.py b/test/package/test_misc.py index 850dec67681..897d250bc67 100644 --- a/test/package/test_misc.py +++ b/test/package/test_misc.py @@ -196,7 +196,7 @@ class TestMisc(PackageTestCase): "Tests that use temporary files are disabled in fbcode", ) def test_load_python_version_from_package(self): - """Tests loading a package with a python version embdded""" + """Tests loading a package with a python version embedded""" importer1 = PackageImporter( f"{Path(__file__).parent}/package_e/test_nn_module.pt" ) diff --git a/test/package/test_model.py b/test/package/test_model.py index 09b10a1ea2f..ea0d2c0788b 100644 --- a/test/package/test_model.py +++ b/test/package/test_model.py @@ -97,7 +97,7 @@ class ModelTest(PackageTestCase): # how they want to save it but the 'server' can always # use the same API to load the package. - # The convension is for each model to provide a + # The convention is for each model to provide a # 'model' package with a 'load' function that actual # reads the model out of the archive. @@ -123,7 +123,7 @@ class ModelTest(PackageTestCase): import torch_package_importer as resources # server knows to call model.load() to get the model, - # maybe in the future it passes options as arguments by convension + # maybe in the future it passes options as arguments by convention def load(): return resources.load_pickle('model', 'pickled') """ diff --git a/test/profiler/test_execution_trace.py b/test/profiler/test_execution_trace.py index 2cd51136ab4..3a174b1d66a 100644 --- a/test/profiler/test_execution_trace.py +++ b/test/profiler/test_execution_trace.py @@ -43,7 +43,7 @@ from torch.utils._triton import has_triton # This causes an issue in the multithreading test because we check all events # in that test with their tids. The events that correspond to these lingering # threads all have TID of (uint64_t)(-1) which is invalid. -# The work around is turnning off monitoring thread when tqdm is loaded. +# The work around is turning off monitoring thread when tqdm is loaded. # Since these are unit tests, it is safe to turn off monitor thread. try: import tqdm diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py index e30e0812b1d..b30d25ec9af 100644 --- a/test/profiler/test_profiler.py +++ b/test/profiler/test_profiler.py @@ -82,7 +82,7 @@ if TYPE_CHECKING: # This causes an issue in the multithreading test because we check all events # in that test with their tids. The events that correspond to these lingering # threads all have TID of (uint64_t)(-1) which is invalid. -# The work around is turnning off monitoring thread when tqdm is loaded. +# The work around is turning off monitoring thread when tqdm is loaded. # Since these are unit tests, it is safe to turn off monitor thread. try: import tqdm diff --git a/test/profiler/test_record_function.py b/test/profiler/test_record_function.py index 03a15f29907..26a6c0edf80 100644 --- a/test/profiler/test_record_function.py +++ b/test/profiler/test_record_function.py @@ -21,7 +21,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase # This causes an issue in the multithreading test because we check all events # in that test with their tids. The events that correspond to these lingering # threads all have TID of (uint64_t)(-1) which is invalid. -# The work around is turnning off monitoring thread when tqdm is loaded. +# The work around is turning off monitoring thread when tqdm is loaded. # Since these are unit tests, it is safe to turn off monitor thread. try: import tqdm diff --git a/test/profiler/test_torch_tidy.py b/test/profiler/test_torch_tidy.py index efbd4b8189d..a0f41114e91 100644 --- a/test/profiler/test_torch_tidy.py +++ b/test/profiler/test_torch_tidy.py @@ -20,7 +20,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase # This causes an issue in the multithreading test because we check all events # in that test with their tids. The events that correspond to these lingering # threads all have TID of (uint64_t)(-1) which is invalid. -# The work around is turnning off monitoring thread when tqdm is loaded. +# The work around is turning off monitoring thread when tqdm is loaded. # Since these are unit tests, it is safe to turn off monitor thread. try: import tqdm @@ -425,7 +425,7 @@ class TestTorchTidyProfiler(TestCase): self.assertEqual(state[0][0], "momentum_buffer") self.assertEqual(state[0][1].id, weight_momenumtum_id) - # Check that we handle first step (lazy initalization) and steady state. + # Check that we handle first step (lazy initialization) and steady state. check(cold_start=True) check(cold_start=False) diff --git a/test/quantization/bc/test_backward_compatibility.py b/test/quantization/bc/test_backward_compatibility.py index 911c26defe2..01c546a95a5 100644 --- a/test/quantization/bc/test_backward_compatibility.py +++ b/test/quantization/bc/test_backward_compatibility.py @@ -68,7 +68,7 @@ def get_filenames(self, subname): class TestSerialization(TestCase): - """Test backward compatiblity for serialization and numerics""" + """Test backward compatibility for serialization and numerics""" # Copy and modified from TestCase.assertExpected def _test_op( diff --git a/test/quantization/core/experimental/test_linear.py b/test/quantization/core/experimental/test_linear.py index 6a46b4fc3cc..df668248476 100644 --- a/test/quantization/core/experimental/test_linear.py +++ b/test/quantization/core/experimental/test_linear.py @@ -14,7 +14,7 @@ class TestNonUniformObserver(unittest.TestCase): # weight: fp tensor weight = 1000 * torch.rand(4, 4) - # activtion: fp32 tensor with ~ integer values + # activation: fp32 tensor with ~ integer values activation = torch.randint(low=0, high=255, size=(4, 4), dtype=torch.float) # calculate result from calling linear forward method @@ -41,7 +41,7 @@ class TestNonUniformObserver(unittest.TestCase): # weight: fp tensor weight = 1000 * torch.rand(5, 3) - # activtion: fp32 tensor with ~ integer values + # activation: fp32 tensor with ~ integer values # note: transpose of activation matrix will have dimension (3, 5) activation = torch.randint(low=0, high=255, size=(5, 3), dtype=torch.float) diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py index d8a35264f7d..21330aa7fc0 100644 --- a/test/quantization/core/test_quantized_op.py +++ b/test/quantization/core/test_quantized_op.py @@ -225,7 +225,7 @@ class TestQuantizedOps(TestCase): `output_is_observed`: if specified and is True, we'll append extra output_scale/output_zero_point keyword argument when calling quantized op """ - # Retrives the default parameters from X. + # Retrieves the default parameters from X. X, (scale, zero_point, torch_type) = X if not isinstance(X, torch.Tensor): X = torch.from_numpy(X) @@ -3584,7 +3584,7 @@ class TestDynamicQuantizedOps(TestCase): def test_wrapped_fbgemm_pack_gemm_matrix_fp16_pt2_compliant(self): # We are not using opcheck over here because the output for the op we're testing # (_quantized.wrapped_fbgemm_pack_gemm_matrix_fp16) is not deterministic - # due to the C-struct it's procuding. This would fail the check when we're trying + # due to the C-struct it's producing. This would fail the check when we're trying # to match the result between compiled and eager version. # # This is only a temporary solution, long term, we should be able to support PT2 @@ -5572,7 +5572,7 @@ class TestQuantizedConv(TestCase): ) act_qdtypes = [torch.quint8] - # Only qnnpack qengine supportes qint8 + # Only qnnpack qengine supports qint8 if qengine_is_qnnpack() and torch.backends.xnnpack.enabled: act_qdtypes.append(torch.qint8) @@ -5653,7 +5653,7 @@ class TestQuantizedConv(TestCase): ) act_qdtypes = [torch.quint8] - # Only qnnpack qengine supportes qint8 + # Only qnnpack qengine supports qint8 if qengine_is_qnnpack() and torch.backends.xnnpack.enabled: act_qdtypes.append(torch.qint8) @@ -6084,7 +6084,7 @@ class TestQuantizedConv(TestCase): ) act_qdtypes = [torch.quint8] - # Only qnnpack qengine supportes qint8 + # Only qnnpack qengine supports qint8 if qengine_is_qnnpack() and torch.backends.xnnpack.enabled: act_qdtypes.append(torch.qint8) @@ -6210,7 +6210,7 @@ class TestQuantizedConv(TestCase): bias=use_bias ) act_qdtypes = [torch.quint8] - # Only qnnpack qengine supportes qint8 + # Only qnnpack qengine supports qint8 if qengine_is_qnnpack() and torch.backends.xnnpack.enabled: act_qdtypes.append(torch.qint8) @@ -6515,7 +6515,7 @@ class TestQuantizedConv(TestCase): qconv = torch.ops.quantized.conv1d act_qdtypes = [torch.quint8] - # Only qnnpack qengine supportes qint8 + # Only qnnpack qengine supports qint8 if qengine_is_qnnpack() and torch.backends.xnnpack.enabled: act_qdtypes.append(torch.qint8) @@ -6586,7 +6586,7 @@ class TestQuantizedConv(TestCase): qconv = torch.ops.quantized.conv1d_relu act_qdtypes = [torch.quint8] - # Only qnnpack qengine supportes qint8 + # Only qnnpack qengine supports qint8 if qengine_is_qnnpack() and torch.backends.xnnpack.enabled: act_qdtypes.append(torch.qint8) diff --git a/test/quantization/core/test_quantized_tensor.py b/test/quantization/core/test_quantized_tensor.py index 65633dbf37f..d0c00ad61ba 100644 --- a/test/quantization/core/test_quantized_tensor.py +++ b/test/quantization/core/test_quantized_tensor.py @@ -765,7 +765,7 @@ class TestQuantizedTensor(TestCase): qr = torch.quantize_per_tensor(r, scale, zero_point, dtype=dtype) qr = qr.transpose(0, 1) rqr = qr.dequantize() - # compare transpose + dequantized result with orignal transposed result + # compare transpose + dequantized result with original transposed result self.assertTrue(np.allclose(r.cpu().numpy().transpose([1, 0, 2, 3]), rqr.cpu().numpy(), atol=2 / scale)) qr = torch.quantize_per_tensor(r, scale, zero_point, dtype=dtype) @@ -1210,7 +1210,7 @@ class TestQuantizedTensor(TestCase): if device == 'cpu': self.assertFalse(torch.equal(b, c)) - # a case can't view non-contiguos Tensor + # a case can't view non-contiguous Tensor a_int = torch.randint(0, 100, [1, 2, 3, 4], device=device, dtype=dtype) a = torch._make_per_tensor_quantized_tensor(a_int, scale=scale, zero_point=zero_point) b = a.transpose(1, 2) # swaps 2nd and 3rd dimension diff --git a/test/quantization/fx/test_model_report_fx.py b/test/quantization/fx/test_model_report_fx.py index 51bce95e30a..58c88c48734 100644 --- a/test/quantization/fx/test_model_report_fx.py +++ b/test/quantization/fx/test_model_report_fx.py @@ -946,7 +946,7 @@ class TestFxModelReportClass(QuantizationTestCase): model_report = ModelReport(model_prep, test_detector_set) - # prepare the model for callibration + # prepare the model for calibration prepared_for_callibrate_model = model_report.prepare_detailed_calibration() # see whether observers properly in regular nn.Module @@ -985,7 +985,7 @@ class TestFxModelReportClass(QuantizationTestCase): elif isinstance(detector, DynamicStaticDetector): self.assertEqual(len(detector_obs_of_interest_fqns), 4) - # ensure that we can prepare for callibration only once + # ensure that we can prepare for calibration only once with self.assertRaises(ValueError): prepared_for_callibrate_model = model_report.prepare_detailed_calibration() @@ -1037,7 +1037,7 @@ class TestFxModelReportClass(QuantizationTestCase): model_full = TwoThreeOps() model_single = TwoThreeOps() - # prepare and callibrate two different instances of same model + # prepare and calibrate two different instances of same model # prepare the model example_input = model_full.get_example_inputs()[0] current_backend = torch.backends.quantized.engine @@ -1052,11 +1052,11 @@ class TestFxModelReportClass(QuantizationTestCase): # initialize another with a single detector set model_report_single = ModelReport(model_prep_single, single_detector_set) - # prepare the models for callibration + # prepare the models for calibration prepared_for_callibrate_model_full = model_report_full.prepare_detailed_calibration() prepared_for_callibrate_model_single = model_report_single.prepare_detailed_calibration() - # now callibrate the two models + # now calibrate the two models num_iterations = 10 for i in range(num_iterations): example_input = torch.tensor(torch.randint(100, (1, 3, 3, 3)), dtype=torch.float) @@ -1109,12 +1109,12 @@ class TestFxModelReportClass(QuantizationTestCase): model = TwoThreeOps() - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper( model, detector_set, model.get_example_inputs()[0] ) - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1162,12 +1162,12 @@ class TestFxModelReportClass(QuantizationTestCase): model = TwoThreeOps() - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper( model, detector_set, model.get_example_inputs()[0] ) - # now we actually callibrate the models + # now we actually calibrate the models example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1192,7 +1192,7 @@ class TestFxModelReportClass(QuantizationTestCase): self.assertEqual(len(qconfig_mapping.module_name_qconfigs), 2) # only two linears, make sure per channel min max for weight since fbgemm - # also static distribution since a simple single callibration + # also static distribution since a simple single calibration for key in qconfig_mapping.module_name_qconfigs: config = qconfig_mapping.module_name_qconfigs[key] self.assertEqual(config.weight, default_per_channel_weight_observer) @@ -1220,12 +1220,12 @@ class TestFxModelReportClass(QuantizationTestCase): model = TwoThreeOps() - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper( model, detector_set, model.get_example_inputs()[0] ) - # now we actually callibrate the models + # now we actually calibrate the models example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1319,7 +1319,7 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase): detector_set = {InputWeightEqualizationDetector(0.5)} - # get tst model and callibrate + # get tst model and calibrate non_fused = self._get_prepped_for_calibration_model(self.TwoBlockComplexNet(), detector_set) fused = self._get_prepped_for_calibration_model(self.TwoBlockComplexNet(), detector_set, fused=True) @@ -1365,12 +1365,12 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase): test_input_weight_detector = InputWeightEqualizationDetector(0.4) detector_set = {test_input_weight_detector} model = self.TwoBlockComplexNet() - # prepare the model for callibration + # prepare the model for calibration prepared_for_callibrate_model, model_report = self._get_prepped_for_calibration_model( model, detector_set ) - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1430,7 +1430,7 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase): self.assertEqual(global_max, max(dimension_max)) input_ratio = torch.sqrt((per_channel_max - per_channel_min) / (global_max - global_min)) - # ensure comparision stat passed back is sqrt of range ratios + # ensure comparison stat passed back is sqrt of range ratios # need to get the weight ratios first # make sure per channel min and max are as expected @@ -1474,10 +1474,10 @@ class TestFxDetectInputWeightEqualization(QuantizationTestCase): test_input_weight_detector = InputWeightEqualizationDetector(0.4) detector_set = {test_input_weight_detector} model = self.ReluOnly() - # prepare the model for callibration + # prepare the model for calibration prepared_for_callibrate_model, model_report = self._get_prepped_for_calibration_model(model, detector_set) - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1531,7 +1531,7 @@ class TestFxDetectOutliers(QuantizationTestCase): def _get_prepped_for_calibration_model(self, model, detector_set, use_outlier_data=False): r"""Returns a model that has been prepared for callibration and corresponding model_report""" - # call the general helper function to callibrate + # call the general helper function to calibrate example_input = model.get_example_inputs()[0] # if we specifically want to test data with outliers replace input @@ -1550,7 +1550,7 @@ class TestFxDetectOutliers(QuantizationTestCase): detector_set = {OutlierDetector(reference_percentile=0.95)} - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model( self.LargeBatchModel(param_size=128), detector_set ) @@ -1594,12 +1594,12 @@ class TestFxDetectOutliers(QuantizationTestCase): detector_set = {outlier_detector, dynamic_static_detector} model = self.LargeBatchModel(param_size=param_size) - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model( model, detector_set ) - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1644,12 +1644,12 @@ class TestFxDetectOutliers(QuantizationTestCase): detector_set = {outlier_detector} model = self.LargeBatchModel(param_size=param_size) - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model( model, detector_set ) - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1694,16 +1694,16 @@ class TestFxDetectOutliers(QuantizationTestCase): detector_set = {outlier_detector} model = self.LargeBatchModel(param_size=param_size) - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = self._get_prepped_for_calibration_model( model, detector_set, use_outlier_data=True ) - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_outlier_inputs()[0] example_input = example_input.to(torch.float) - # now callibrate minimum 30 times to make it above minimum threshold + # now calibrate minimum 30 times to make it above minimum threshold for i in range(30): example_input = model.get_outlier_inputs()[0] example_input = example_input.to(torch.float) @@ -1764,7 +1764,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase): r""" Callibrates the passed in model, generates report, and returns the visualizer """ - # now we actually callibrate the model + # now we actually calibrate the model example_input = model.get_example_inputs()[0] example_input = example_input.to(torch.float) @@ -1796,7 +1796,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase): model = TwoThreeOps() - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper( model, detector_set, model.get_example_inputs()[0] ) @@ -1843,7 +1843,7 @@ class TestFxModelReportVisualizer(QuantizationTestCase): model = TwoThreeOps() - # get tst model and callibrate + # get tst model and calibrate prepared_for_callibrate_model, mod_report = _get_prepped_for_calibration_model_helper( model, detector_set, model.get_example_inputs()[0] ) @@ -1953,7 +1953,7 @@ def _get_prepped_for_calibration_model_helper(model, detector_set, example_input model_report = ModelReport(model_prep, detector_set) - # prepare the model for callibration + # prepare the model for calibration prepared_for_callibrate_model = model_report.prepare_detailed_calibration() return (prepared_for_callibrate_model, model_report) diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py index f6f1128e422..c54c741bcec 100644 --- a/test/quantization/fx/test_quantize_fx.py +++ b/test/quantization/fx/test_quantize_fx.py @@ -1221,7 +1221,7 @@ class TestQuantizeFx(QuantizationTestCase): def checkSerDeser(model, is_dynamic): for module_name in ("linear", "conv"): if hasattr(model, module_name): - # make sure seralization works + # make sure serialization works state_dict = copy.deepcopy(model.state_dict()) all_keys = _get_keys(module_name, is_dynamic) for key in all_keys: @@ -1484,7 +1484,7 @@ class TestQuantizeFx(QuantizationTestCase): def checkSerDeser(model, is_dynamic): module_name = "deconv" if hasattr(model, module_name): - # make sure seralization works + # make sure serialization works state_dict = copy.deepcopy(model.state_dict()) all_keys = _get_keys(module_name, is_dynamic) for key in all_keys: @@ -1569,7 +1569,7 @@ class TestQuantizeFx(QuantizationTestCase): def checkSerDeser(model, is_dynamic): module_name = "deconv" if hasattr(model, module_name): - # make sure seralization works + # make sure serialization works state_dict = copy.deepcopy(model.state_dict()) all_keys = _get_keys(module_name, is_dynamic) for key in all_keys: diff --git a/test/quantization/jit/test_quantize_jit.py b/test/quantization/jit/test_quantize_jit.py index c71f7182b70..ec7618fb551 100644 --- a/test/quantization/jit/test_quantize_jit.py +++ b/test/quantization/jit/test_quantize_jit.py @@ -2926,7 +2926,7 @@ class TestQuantizeJitOps(QuantizationTestCase): m._c, "forward", {"": qconfig}, inplace=False ) ) - # Checking the model before fianlize contain unfused patterns + # Checking the model before finalize contain unfused patterns # that numerically matches the model after quantize by checking # number of aten::quantize_per_tensor functions # conv has 3 quantize_per_tensor for activations and 1 for weight diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py index f6d3eae2332..25db7d97d9d 100644 --- a/test/quantization/pt2e/test_quantize_pt2e.py +++ b/test/quantization/pt2e/test_quantize_pt2e.py @@ -1682,7 +1682,7 @@ class TestQuantizePT2E(PT2EQuantizationTestCase): qconfig_mapping.set_object_type(torch.nn.Linear, dynamic_qconfig) # Had to turn off check against fx because fx quant workflow does not seem # to propagate observers for permute node for this model. - # Suprisingly it does propagate it for EmbeddingConvLinearModule + # Surprisingly it does propagate it for EmbeddingConvLinearModule # TODO: Figure out the right behavior for propagation self._test_quantizer( m_eager, @@ -2253,7 +2253,7 @@ class TestQuantizePT2E(PT2EQuantizationTestCase): model = prepare_qat_pt2e(model, composed_quantizer) cur = time.time() # print("prepare time:", cur - prev) - # Without Calibraiton, scale/zero value will have an initialized value of 1.0 + # Without Calibration, scale/zero value will have an initialized value of 1.0 # Per channel quantization needs a proper scale/zero shape/value to work properly. # So we need to run calibration before converting to quantized model. model(*example_inputs) diff --git a/test/quantization/pt2e/test_x86inductor_quantizer.py b/test/quantization/pt2e/test_x86inductor_quantizer.py index 9e2e690c21d..dfd591cb941 100644 --- a/test/quantization/pt2e/test_x86inductor_quantizer.py +++ b/test/quantization/pt2e/test_x86inductor_quantizer.py @@ -2464,11 +2464,11 @@ class TestQuantizePT2EX86Inductor(X86InductorQuantTestCase): torch.ops.quantized_decomposed.dequantize_per_channel.default: 2, } node_list = [ - # Q/DQ for first lienar + # Q/DQ for first linear torch.ops.quantized_decomposed.quantize_per_tensor.default, torch.ops.quantized_decomposed.dequantize_per_tensor.default, torch.ops.aten.linear.default, - # Q/DQ for second lienar + # Q/DQ for second linear torch.ops.quantized_decomposed.quantize_per_tensor.default, torch.ops.quantized_decomposed.dequantize_per_tensor.default, torch.ops.aten.linear.default, diff --git a/test/quantization/pt2e/test_xnnpack_quantizer.py b/test/quantization/pt2e/test_xnnpack_quantizer.py index 3baec3f8004..6b9acaaf741 100644 --- a/test/quantization/pt2e/test_xnnpack_quantizer.py +++ b/test/quantization/pt2e/test_xnnpack_quantizer.py @@ -1062,7 +1062,7 @@ class TestXNNPACKQuantizerModels(PT2EQuantizationTestCase): # the result matches exactly after prepare # Note: this currently will always be true since we are inserting observers # the check becomes useful when we add qat examples - # but we can still manully inspect the printed observers to make sure + # but we can still manually inspect the printed observers to make sure # it matches self.assertEqual(after_prepare_result, after_prepare_result_fx) self.assertEqual( diff --git a/test/run_test.py b/test/run_test.py index ca17754b33f..4b7030d4615 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -1626,7 +1626,7 @@ def get_selected_tests(options) -> list[str]: if options.xpu: selected_tests = exclude_tests(XPU_BLOCKLIST, selected_tests, "on XPU") else: - # Exclude all xpu specifc tests otherwise + # Exclude all xpu specific tests otherwise options.exclude.extend(XPU_TEST) # Filter to only run onnx tests when --onnx option is specified diff --git a/test/test_autograd.py b/test/test_autograd.py index bebe89e0965..ee6d9c09282 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -5896,7 +5896,7 @@ Done""", @staticmethod def backward(ctx, grad): - # Create a sparse tensor with non-contigous indices and values + # Create a sparse tensor with non-contiguous indices and values # and return as grad. v = torch.rand(1, 3) i = torch.ones(1, 1, dtype=torch.long) diff --git a/test/test_fx.py b/test/test_fx.py index 6d581f9d41d..4c4a6d8c619 100644 --- a/test/test_fx.py +++ b/test/test_fx.py @@ -204,7 +204,7 @@ def side_effect_func(x: torch.Tensor): class TestFX(JitTestCase): def setUp(self): super().setUp() - # Checking for mutable operations whil tracing is feature flagged + # Checking for mutable operations while tracing is feature flagged # Enable it in testing but not by default self.orig_tracer_mutable_flag = ( torch.fx.proxy.TracerBase.check_mutable_operations @@ -4198,7 +4198,7 @@ def run_getitem_target(): class TestOperatorSignatures(JitTestCase): def setUp(self): - # Checking for mutable operations whil tracing is feature flagged + # Checking for mutable operations while tracing is feature flagged # Enable it in testing but not by default self.orig_tracer_mutable_flag = ( torch.fx.proxy.TracerBase.check_mutable_operations @@ -4241,7 +4241,7 @@ class TestFXAPIBackwardCompatibility(JitTestCase): super().setUp() self.maxDiff = None - # Checking for mutable operations whil tracing is feature flagged + # Checking for mutable operations while tracing is feature flagged # Enable it in testing but not by default self.orig_tracer_mutable_flag = ( torch.fx.proxy.TracerBase.check_mutable_operations @@ -4597,7 +4597,7 @@ class TestFXAPIBackwardCompatibility(JitTestCase): class TestFunctionalTracing(JitTestCase): def setUp(self): super().setUp() - # Checking for mutable operations whil tracing is feature flagged + # Checking for mutable operations while tracing is feature flagged # Enable it in testing but not by default self.orig_tracer_mutable_flag = ( torch.fx.proxy.TracerBase.check_mutable_operations diff --git a/test/test_indexing.py b/test/test_indexing.py index 99d84a65abc..cca7a21165d 100644 --- a/test/test_indexing.py +++ b/test/test_indexing.py @@ -247,7 +247,7 @@ class TestIndexing(TestCase): x[ri([0, 2, 4]),], torch.tensor([5, 4, 3], dtype=dtype, device=device) ) - # Only validates indexing and setting for Halfs + # Only validates indexing and setting for Halves if dtype == torch.half: reference = consec((10,)) validate_indexing(reference) diff --git a/test/test_linalg.py b/test/test_linalg.py index 16bbf47ec48..01a6dd5c8ec 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -4842,7 +4842,7 @@ class TestLinalg(TestCase): self.assertTrue(torch.cuda.tunable.record_untuned_is_enabled()) make_arg = partial(make_tensor, device=device, dtype=dtype) - # offline tuning only handles matmuls on two dimensionsal tensors + # offline tuning only handles matmuls on two dimensional tensors # matmul that require broadcasting are # not supported either. # Below we check the different transA and transB combinations. @@ -4871,7 +4871,7 @@ class TestLinalg(TestCase): continue # offline tuning only handles batched matmuls on - # three dimensionsal tensors + # three dimensional tensors # matmul that require broadcasting are # not supported either. # Below we check the different transA and transB combinations. diff --git a/test/test_mkldnn.py b/test/test_mkldnn.py index e2ec92fc8da..4e1ef44bb31 100644 --- a/test/test_mkldnn.py +++ b/test/test_mkldnn.py @@ -1520,7 +1520,7 @@ class TestMkldnn(TestCase): h = torch.randn(num_layers * num_directions, batch_size, hidden_size, dtype=torch.float32) c = torch.randn(num_layers * num_directions, batch_size, hidden_size, dtype=torch.float32) if fp16: - # TODO add traing support when oneDNN support lstm FP16 training + # TODO add training support when oneDNN support lstm FP16 training training = False model = torch.nn.LSTM(input_size, hidden_size, num_layers, bidirectional=bidirectional, bias=bias, dropout=dropout, batch_first=batch_first).float() diff --git a/test/test_modules.py b/test/test_modules.py index e587c67815c..2f881c89b78 100644 --- a/test/test_modules.py +++ b/test/test_modules.py @@ -328,7 +328,7 @@ class TestModule(TestCase): def _retain_grad(self, obj): # gradients needs to be retained to check for grad. This is useful when - # non-leafs are present in the graph. + # non-leaves are present in the graph. def inner_retain_grad(obj): if obj.requires_grad: obj.retain_grad() diff --git a/test/test_mps.py b/test/test_mps.py index 3dd4c8261ff..83d5b46d468 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -7842,7 +7842,7 @@ class TestMPS(TestCaseMPS): shape = (2, 3, 4, 5, 6) x = torch.rand(shape, device="mps") self.assertNotEqual(x[0], x[1]) - # Check that normal distributino is not affected by the same + # Check that normal distributions is not affected by the same y = torch.normal(torch.zeros(shape, device="mps"), torch.ones(shape, device="mps")) self.assertNotEqual(y[0], y[1]) @@ -12644,7 +12644,7 @@ class TestConsistency(TestCaseMPS): self.assertEqual(out_mps, out_cpu) def test_fmax_mixed_dtypes(self, device): - # Regression tesing for https://github.com/pytorch/pytorch/issues/149951 + # Regression testing for https://github.com/pytorch/pytorch/issues/149951 # fmax and fmin are implemented as binary metal shaders and they were implemented # with the assumption that both args have the same dtype x = torch.rand((3, 3), device=device, dtype=torch.float32) diff --git a/test/test_nn.py b/test/test_nn.py index cb755992ffc..eac0d887c42 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -1809,17 +1809,17 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""") num_params - 1, ) - # Removing the weight norm reparametrization restores the Parameter + # Removing the weight norm reparameterization restores the Parameter l = torch.nn.utils.remove_weight_norm(l, name=name) self.assertEqual( sum(isinstance(p, torch.nn.Parameter) for p in l._flat_weights), num_params, ) - # Make sure that, upon removal of the reparametrization, the + # Make sure that, upon removal of the reparameterization, the # `._parameters` and `.named_parameters` contain the right params. # Specifically, the original weight ('weight_ih_l0') should be placed - # back in the parameters, while the reparametrization components + # back in the parameters, while the reparameterization components # ('weight_ih_l0_v' and 'weight_ih_l0_g') should be removed. self.assertTrue(name in l._parameters) self.assertIsNotNone(l._parameters[name]) @@ -7308,7 +7308,7 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""") count_tensor ) - # Test batch_norm_backward_elemt gives the same answer for all + # Test batch_norm_backward_element gives the same answer for all # combinations of contiguous as channels_last input for a, b in [ (torch.channels_last, torch.contiguous_format), diff --git a/test/test_ops.py b/test/test_ops.py index 3ec023f3d67..7427de04bf8 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -2647,7 +2647,7 @@ fake_skips = ( "linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet "linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend "linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend - # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen + # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backend "linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend "linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor "linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend diff --git a/test/test_python_dispatch.py b/test/test_python_dispatch.py index 98fbabff11e..515ce435b72 100644 --- a/test/test_python_dispatch.py +++ b/test/test_python_dispatch.py @@ -2520,7 +2520,7 @@ def forward(self, x_1): self.last_args = args return func(*args, **kwargs) - # Value that could not be intepreted as signed int64 + # Value that could not be interpreted as signed int64 uarg = 2**63 + 1 with DummyMode() as m: a = torch.full((3, 3), uarg, dtype=torch.uint64) diff --git a/test/test_quantization.py b/test/test_quantization.py index 01006e3f6e2..c36c20bb0ca 100644 --- a/test/test_quantization.py +++ b/test/test_quantization.py @@ -51,7 +51,7 @@ from quantization.eager.test_quantize_eager_qat import TestQuantizeEagerQAT # n from quantization.eager.test_quantize_eager_qat import TestQuantizeEagerQATNumerics # noqa: F401 # 3. Eager mode fusion passes from quantization.eager.test_fuse_eager import TestFuseEager # noqa: F401 -# 4. Testing model numerics between quanitzed and FP32 models +# 4. Testing model numerics between quantized and FP32 models from quantization.eager.test_model_numerics import TestModelNumericsEager # noqa: F401 # 5. Tooling: numeric_suite from quantization.eager.test_numeric_suite_eager import TestNumericSuiteEager # noqa: F401 diff --git a/test/test_scaled_matmul_cuda.py b/test/test_scaled_matmul_cuda.py index 4fb48fe6ccf..204153e971b 100644 --- a/test/test_scaled_matmul_cuda.py +++ b/test/test_scaled_matmul_cuda.py @@ -426,7 +426,7 @@ def data_to_nvfp4_with_global_scale(x, block_size): # Per-tensor max global_max = x.abs().max() - # Contants + # Constants # Global encoding scale for block-scales S_enc = FP4_MAX_VAL * F8E4M3_MAX_VAL / global_max S_dec = 1. / S_enc diff --git a/test/test_shape_ops.py b/test/test_shape_ops.py index b8bb32b658f..24c8122d5ae 100644 --- a/test/test_shape_ops.py +++ b/test/test_shape_ops.py @@ -262,7 +262,7 @@ class TestShapeOps(TestCase): expected = xn.diagonal(*args) self.assertEqual(expected.shape, result.shape) self.assertEqual(expected, result) - # test non-continguous + # test non-contiguous xp = x.permute(1, 2, 3, 0) result = torch.diagonal(xp, 0, -2, -1) expected = xp.numpy().diagonal(0, -2, -1) diff --git a/test/test_sparse.py b/test/test_sparse.py index 7776cf8abbf..5150dab4b7c 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -1333,7 +1333,7 @@ class TestSparse(TestSparseBase): res_sparse = t.to_sparse().index_select(0, idx_empty) self.assertEqual(res_dense, res_sparse) - # non-contigous index + # non-contiguous index idx = torch.randint(low=0, high=5, size=(10, 2), device=device)[:, 0] def run_test(sizes): diff --git a/test/test_stateless.py b/test/test_stateless.py index d24194ed460..e8217f2caea 100644 --- a/test/test_stateless.py +++ b/test/test_stateless.py @@ -186,7 +186,7 @@ class TestStatelessFunctionalAPI(TestCase): cur_rm = module.running_mean self.assertEqual(cur_rm, prev_rm) self.assertEqual(rm, torch.full((10,), 12.8)) - # Now run functional without reparametrization and check that the module has + # Now run functional without reparameterization and check that the module has # been updated functional_call(module, {}, x) self.assertEqual(module.running_mean, torch.full((10,), 12.8)) diff --git a/test/test_tensorexpr.py b/test/test_tensorexpr.py index 57be409ab6b..628e45ed8eb 100644 --- a/test/test_tensorexpr.py +++ b/test/test_tensorexpr.py @@ -705,7 +705,7 @@ class TestTensorExprFuser(BaseTestClass): # d = to_bf16(to_fp32(a) + to_fp32(b) + to_fp32(c)) # Hence, we simulate NNC computation by feeding fp32 tensors and converting # the result tensor back to bf16. The simulation could avoid the numeric - # deviation to simplify the result comprasion + # deviation to simplify the result comparison y = warmup_and_run_forward(traced, rand_a.float(), rand_b.float()) if torch_fn not in cmp_fns: y = y.bfloat16() diff --git a/test/test_transformers.py b/test/test_transformers.py index 2dae5e5da11..4dea4312469 100644 --- a/test/test_transformers.py +++ b/test/test_transformers.py @@ -4320,8 +4320,8 @@ class TestSDPAXpuOnly(NNTestCase): _ = F.scaled_dot_product_attention(q, k, v) def test_default_priority_order(self, device): - # The default priority order of xpu is overrideable, math, flash, efficient, cudnn - # For xpu backend, we need to make sure that overrideable > math > flash + # The default priority order of xpu is overridable, math, flash, efficient, cudnn + # For xpu backend, we need to make sure that overridable > math > flash dtype = torch.bfloat16 shape = SdpaShape(1, 1, 1, 1) make_tensor = partial(torch.rand, shape, device=device, dtype=dtype) diff --git a/test/torch_np/numpy_tests/core/test_dtype.py b/test/torch_np/numpy_tests/core/test_dtype.py index 13e42f4b7b4..19b41d877ca 100644 --- a/test/torch_np/numpy_tests/core/test_dtype.py +++ b/test/torch_np/numpy_tests/core/test_dtype.py @@ -87,7 +87,7 @@ class TestBuiltin(TestCase): assert_raises(TypeError, np.dtype, "l8") assert_raises(TypeError, np.dtype, "L8") - # XXX: what is 'q'? on my 64-bit ubuntu maching it's int64, same as 'l' + # XXX: what is 'q'? on my 64-bit ubuntu matching it's int64, same as 'l' # if np.dtype('q').itemsize == 8: # assert_raises(TypeError, np.dtype, 'q4') # assert_raises(TypeError, np.dtype, 'Q4') diff --git a/test/torch_np/numpy_tests/core/test_einsum.py b/test/torch_np/numpy_tests/core/test_einsum.py index a2810808682..45c1d974748 100644 --- a/test/torch_np/numpy_tests/core/test_einsum.py +++ b/test/torch_np/numpy_tests/core/test_einsum.py @@ -976,7 +976,7 @@ class TestEinsum(TestCase): # Test originally added to cover broken float16 path: gh-20305 # Likely most are covered elsewhere, at least partially. dtype = np.dtype(dtype) - # Simple test, designed to excersize most specialized code paths, + # Simple test, designed to exercise most specialized code paths, # note the +0.5 for floats. This makes sure we use a float value # where the results must be exact. arr = (np.arange(7) + 0.5).astype(dtype) @@ -1160,7 +1160,7 @@ class TestEinsum(TestCase): @xfail # (reason="order='F' not supported") def test_output_order(self): # Ensure output order is respected for optimize cases, the below - # conraction should yield a reshaped tensor view + # contraction should yield a reshaped tensor view # gh-16415 a = np.ones((2, 3, 5), order="F") diff --git a/test/torch_np/numpy_tests/core/test_indexing.py b/test/torch_np/numpy_tests/core/test_indexing.py index 91dae968683..16d89c03219 100644 --- a/test/torch_np/numpy_tests/core/test_indexing.py +++ b/test/torch_np/numpy_tests/core/test_indexing.py @@ -375,7 +375,7 @@ class TestIndexing(TestCase): assert_array_equal(a[idx], idx) # this case must not go into the fast path, note that idx is - # a non-contiuguous none 1D array here. + # a non-contiguous none 1D array here. a[idx] = -1 res = np.arange(6) res[0] = -1 diff --git a/test/torch_np/numpy_tests/core/test_multiarray.py b/test/torch_np/numpy_tests/core/test_multiarray.py index ba19b62e821..fc2c3435907 100644 --- a/test/torch_np/numpy_tests/core/test_multiarray.py +++ b/test/torch_np/numpy_tests/core/test_multiarray.py @@ -900,7 +900,7 @@ class TestScalarIndexing(TestCase): assert_raises(IndexError, subscript, a, (np.newaxis, 0)) - # this assersion fails because 50 > NPY_MAXDIMS = 32 + # this assertion fails because 50 > NPY_MAXDIMS = 32 # assert_raises(IndexError, subscript, a, (np.newaxis,)*50) @xfail # (reason="pytorch disallows overlapping assignments") @@ -3283,7 +3283,7 @@ class TestArgmax(TestCase): ([np.nan, 0, 1, 2, 3], 0), ([np.nan, 0, np.nan, 2, 3], 0), # To hit the tail of SIMD multi-level(x4, x1) inner loops - # on variant SIMD widthes + # on variant SIMD widths ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1), ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1), ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1), @@ -3392,7 +3392,7 @@ class TestArgmin(TestCase): ([np.nan, 0, 1, 2, 3], 0), ([np.nan, 0, np.nan, 2, 3], 0), # To hit the tail of SIMD multi-level(x4, x1) inner loops - # on variant SIMD widthes + # on variant SIMD widths ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1), ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1), ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1), diff --git a/test/torch_np/numpy_tests/core/test_numerictypes.py b/test/torch_np/numpy_tests/core/test_numerictypes.py index f8ec84069b6..29b42de7c07 100644 --- a/test/torch_np/numpy_tests/core/test_numerictypes.py +++ b/test/torch_np/numpy_tests/core/test_numerictypes.py @@ -30,7 +30,7 @@ skip = functools.partial(skipif, True) @xpassIfTorchDynamo_np # ( -# reason="We do not disctinguish between scalar and array types." +# reason="We do not distinguish between scalar and array types." # " Thus, scalars can upcast arrays." # ) class TestCommonType(TestCase): diff --git a/test/torch_np/numpy_tests/lib/test_function_base.py b/test/torch_np/numpy_tests/lib/test_function_base.py index 13dba55837c..7256d81e4c6 100644 --- a/test/torch_np/numpy_tests/lib/test_function_base.py +++ b/test/torch_np/numpy_tests/lib/test_function_base.py @@ -3361,42 +3361,42 @@ class TestPercentile(TestCase): assert_equal(np.percentile(a, 0.3), np.nan) assert_equal(np.percentile(a, 0.3).ndim, 0) - # axis0 zerod + # axis0 zeroed b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0) b[2, 3] = np.nan b[1, 2] = np.nan assert_equal(np.percentile(a, 0.3, 0), b) - # axis0 not zerod + # axis0 not zeroed b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 0) b[:, 2, 3] = np.nan b[:, 1, 2] = np.nan assert_equal(np.percentile(a, [0.3, 0.6], 0), b) - # axis1 zerod + # axis1 zeroed b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1) b[1, 3] = np.nan b[1, 2] = np.nan assert_equal(np.percentile(a, 0.3, 1), b) - # axis1 not zerod + # axis1 not zeroed b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1) b[:, 1, 3] = np.nan b[:, 1, 2] = np.nan assert_equal(np.percentile(a, [0.3, 0.6], 1), b) - # axis02 zerod + # axis02 zeroed b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2)) b[1] = np.nan b[2] = np.nan assert_equal(np.percentile(a, 0.3, (0, 2)), b) - # axis02 not zerod + # axis02 not zeroed b = np.percentile( np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2) ) b[:, 1] = np.nan b[:, 2] = np.nan assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) - # axis02 not zerod with method='nearest' + # axis02 not zeroed with method='nearest' b = np.percentile( np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], diff --git a/test/torch_np/test_ndarray_methods.py b/test/torch_np/test_ndarray_methods.py index f94b03f1f6e..b25faac56cb 100644 --- a/test/torch_np/test_ndarray_methods.py +++ b/test/torch_np/test_ndarray_methods.py @@ -399,7 +399,7 @@ class TestArgmax(TestCase): ([np.nan, 0, 1, 2, 3], 0), ([np.nan, 0, np.nan, 2, 3], 0), # To hit the tail of SIMD multi-level(x4, x1) inner loops - # on variant SIMD widthes + # on variant SIMD widths ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1), ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1), ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1), @@ -534,7 +534,7 @@ class TestArgmin(TestCase): ([np.nan, 0, 1, 2, 3], 0), ([np.nan, 0, np.nan, 2, 3], 0), # To hit the tail of SIMD multi-level(x4, x1) inner loops - # on variant SIMD widthes + # on variant SIMD widths ([1] * (2 * 5 - 1) + [np.nan], 2 * 5 - 1), ([1] * (4 * 5 - 1) + [np.nan], 4 * 5 - 1), ([1] * (8 * 5 - 1) + [np.nan], 8 * 5 - 1), diff --git a/test/torch_np/test_scalars_0D_arrays.py b/test/torch_np/test_scalars_0D_arrays.py index 24bed1c239b..0a21e5fb97b 100644 --- a/test/torch_np/test_scalars_0D_arrays.py +++ b/test/torch_np/test_scalars_0D_arrays.py @@ -68,7 +68,7 @@ class TestArrayScalars(TestCase): assert product.shape == (3,) assert_equal(product, [42, 42 * 2, 42 * 3]) - # repeat with right-mulitply + # repeat with right-multiply product = lst * value assert isinstance(product, np.ndarray) assert product.shape == (3,) diff --git a/test/typing/pass/arithmetic_ops.py b/test/typing/pass/arithmetic_ops.py index 556ef90523e..f0d6cc6fd9f 100644 --- a/test/typing/pass/arithmetic_ops.py +++ b/test/typing/pass/arithmetic_ops.py @@ -19,7 +19,7 @@ assert_type(-TENSOR, Tensor) assert_type(~TENSOR, Tensor) # -# Binary ops that return a bolean +# Binary ops that return a boolean # # Operator ==