diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b88247df27..2bbb8797b78 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -374,7 +374,7 @@ cmake_dependent_option( "Build the lazy Torchscript backend, not compatible with mobile builds" ON "NOT INTERN_BUILD_MOBILE" OFF) cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF) -cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" +cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin folder" OFF "USE_CUDA" OFF) cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON "CPU_AARCH64" OFF) diff --git a/c10/metal/special_math.h b/c10/metal/special_math.h index 29a45ff4c30..d80dfea9f03 100644 --- a/c10/metal/special_math.h +++ b/c10/metal/special_math.h @@ -1,4 +1,4 @@ -// Implementation of specal math functions for Metal +// Implementation of special math functions for Metal #pragma once #include #include diff --git a/c10/mobile/CPUProfilingAllocator.cpp b/c10/mobile/CPUProfilingAllocator.cpp index d01cdd2b1d2..c484811db91 100644 --- a/c10/mobile/CPUProfilingAllocator.cpp +++ b/c10/mobile/CPUProfilingAllocator.cpp @@ -34,7 +34,7 @@ struct MemEvent { bool overlaps(const MemBlock& a, const MemBlock& b) { // two blocks dont overlap if // |---a--------|--------------b--------| - // strat_a end_a <= start_b end_b + // start_a end_a <= start_b end_b return !( (a.end_offset <= b.start_offset) || (b.end_offset <= a.start_offset)); } diff --git a/c10/util/Bitset.h b/c10/util/Bitset.h index 782cefbd922..f1d521bd7e5 100644 --- a/c10/util/Bitset.h +++ b/c10/util/Bitset.h @@ -33,7 +33,7 @@ struct bitset final { constexpr bitset() noexcept = default; constexpr bitset(const bitset&) noexcept = default; constexpr bitset(bitset&&) noexcept = default; - // there is an issure for gcc 5.3.0 when define default function as constexpr + // there is an issue for gcc 5.3.0 when define default function as constexpr // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68754. bitset& operator=(const bitset&) noexcept = default; bitset& operator=(bitset&&) noexcept = default; diff --git a/caffe2/serialize/crc_alt.h b/caffe2/serialize/crc_alt.h index 9d1c4f1dc7d..8c3e85df46a 100644 --- a/caffe2/serialize/crc_alt.h +++ b/caffe2/serialize/crc_alt.h @@ -38,7 +38,7 @@ uint32_t crc32_combine (uint32_t crcA, uint32_t crcB, size_t lengthB); /// compute CRC32 (bitwise algorithm) uint32_t crc32_bitwise (const void* data, size_t length, uint32_t previousCrc32 = 0); -/// compute CRC32 (half-byte algoritm) +/// compute CRC32 (half-byte algorithm) uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32 = 0); #ifdef CRC32_USE_LOOKUP_TABLE_BYTE @@ -96,7 +96,7 @@ uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previo #define __BIG_ENDIAN 4321 #endif -// define endianess and some integer data types +// define endianness and some integer data types #if defined(_MSC_VER) || defined(__MINGW32__) // Windows always little endian #define __BYTE_ORDER __LITTLE_ENDIAN @@ -168,7 +168,7 @@ namespace /// zlib's CRC32 polynomial const uint32_t Polynomial = 0xEDB88320; - /// swap endianess + /// swap endianness static inline uint32_t swap(uint32_t x) { #if defined(__GNUC__) || defined(__clang__) @@ -229,7 +229,7 @@ uint32_t crc32_bitwise(const void* data, size_t length, uint32_t previousCrc32) } -/// compute CRC32 (half-byte algoritm) +/// compute CRC32 (half-byte algorithm) uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32) { uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF @@ -662,7 +662,7 @@ uint32_t crc32_combine(uint32_t crcA, uint32_t crcB, size_t lengthB) // - if you append length(B) zeros to A and call it A' (think of it as AAAA000) // and prepend length(A) zeros to B and call it B' (think of it as 0000BBB) // then exists a C' = A' ^ B' - // - remember: if you XOR someting with zero, it remains unchanged: X ^ 0 = X + // - remember: if you XOR something with zero, it remains unchanged: X ^ 0 = X // - that means C' = A concat B so that crc(A concat B) = crc(C') = crc(A') ^ crc(B') // - the trick is to compute crc(A') based on crc(A) // and crc(B') based on crc(B) diff --git a/caffe2/serialize/inline_container.h b/caffe2/serialize/inline_container.h index 47bd7886dc9..7c13b2d6ec5 100644 --- a/caffe2/serialize/inline_container.h +++ b/caffe2/serialize/inline_container.h @@ -76,7 +76,7 @@ typedef struct mz_zip_archive mz_zip_archive; // 2) Writing with 1-pass sequential access // -> We must take care not to require updating values that have already // been written. We place the variable-length index at the end and do -// not put any indicies into the header to fulfill this constraint. +// not put any index into the header to fulfill this constraint. // The model.json, which contains all the metadata information, // should be written as the last file. One reason is that the size of tensor diff --git a/caffe2/serialize/inline_container_test.cc b/caffe2/serialize/inline_container_test.cc index 489751522fb..785e93a0a22 100644 --- a/caffe2/serialize/inline_container_test.cc +++ b/caffe2/serialize/inline_container_test.cc @@ -519,7 +519,7 @@ TEST(PyTorchStreamWriterAndReader, SaveAndLoadWithAllocator) { std::tie(data_ptr, size) = reader.getRecord("key1", &overrideAllocator); EXPECT_EQ(overrideAllocator.getAllocatedBytes(), kBytes1); EXPECT_EQ(baseAllocator.getAllocatedBytes(), allocBytes); - // allcoate with base allocator + // allocate with base allocator std::tie(data_ptr, size) = reader.getRecord("key1"); EXPECT_EQ(overrideAllocator.getAllocatedBytes(), kBytes1); EXPECT_EQ(baseAllocator.getAllocatedBytes(), allocBytes + kBytes1); diff --git a/setup.py b/setup.py index a980a5f3521..31e78d0245d 100644 --- a/setup.py +++ b/setup.py @@ -1106,7 +1106,7 @@ class build_ext(setuptools.command.build_ext.build_ext): continue self.copy_file(source_lib, target_lib) # Delete old rpath and add @loader_lib to the rpath - # This should prevent delocate from attempting to package another instance + # This should prevent deallocate from attempting to package another instance # of OpenMP library in torch wheel as well as loading two libomp.dylib into # the address space, as libraries are cached by their unresolved names install_name_tool_args = [ diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index 38b3bf4eca2..df2366e3324 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -1060,7 +1060,7 @@ class OutputGraph(OutputGraphCommon): def module_key_name(*names: Any) -> str: # create a new unique name name = "_".join(map(str, names)) - # Strip _buffers[..]/_parmeters[..]/_modules[..] names + # Strip _buffers[..]/_parameters[..]/_modules[..] names name = re.sub( r"\._(?:modules|parameters|buffers)\[(['\"])([^'\"\]]+)\1\]", r".\2", name ) @@ -2217,7 +2217,7 @@ class OutputGraph(OutputGraphCommon): backend_fake_mode = torch._subclasses.FakeTensorMode( shape_env=old_fake_mode.shape_env, ) - # TODO(voz): Ostensibily, this should be scoped and + # TODO(voz): Ostensibly, this should be scoped and # restore back to old_fake_mode, but doing so currently violates # a lot of fake_tensor ownership assumptions and runs afoul of detect_fake_mode self.tracing_context.fake_mode = backend_fake_mode @@ -3414,7 +3414,7 @@ class SubgraphTracer(fx.Tracer): if proxy in self.lifted_freevars: return self.lifted_freevars[proxy] - # We first lift proxy to parent's graph then lift to current grpah's input + # We first lift proxy to parent's graph then lift to current graph's input # so that when we bind symints of the sizes in current graph, those symints # would already be lifted as inputs to parent graph. if proxy.tracer != self.parent: @@ -3462,7 +3462,7 @@ class SubgraphTracer(fx.Tracer): def track_produced_symints( self, example_value: Any, e_proxy: Union[LazyProxy, torch.fx.Proxy] ) -> None: - # When binding the symbols in an exmaple_value, we bind the symbols + # When binding the symbols in an example_value, we bind the symbols # to the proxy's associated Tracer instead of current tracer. # This is because: # 1. We may be calling wrap_tensors during speculate_subgraph because diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index d6deeb5abe3..95b428a62c4 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -2089,7 +2089,7 @@ class InstructionTranslatorBase( def _raise_exception_variable(self, val: VariableTracker) -> NoReturn: # User can raise exception in 2 ways # 1) raise exception type - raise NotImplementedError - # 2) raise exception instance - raise NotImplemetedError("foo") + # 2) raise exception instance - raise NotImplementedError("foo") # 1) when user raises exception type val = self._create_exception_type(val) @@ -2140,7 +2140,7 @@ class InstructionTranslatorBase( try: self._raise_exception_variable(val) finally: - # Update __cause__/__supppress_context__ in the raised exception + # Update __cause__/__suppress_context__ in the raised exception curr_exc = self.exn_vt_stack.get_current_exception() cause = self._create_exception_type(from_vt) curr_exc.call_setattr(self, ConstantVariable("__cause__"), cause) # type: ignore[arg-type, union-attr, assignment] @@ -2417,8 +2417,8 @@ class InstructionTranslatorBase( # Users can check exception in 3 ways # 1) except NotImplementedError --> BuiltinVariable - # 2) except CustomException --> UserDefinedExceptionClasVariable - # 3) except (NotImplemetedError, AttributeError) -> TupleVariable + # 2) except CustomException --> UserDefinedExceptionClassVariable + # 3) except (NotImplementedError, AttributeError) -> TupleVariable if not isinstance( expected_exc_types, diff --git a/torch/_dynamo/variables/dicts.py b/torch/_dynamo/variables/dicts.py index d0448e0c500..4f1f84a55b0 100644 --- a/torch/_dynamo/variables/dicts.py +++ b/torch/_dynamo/variables/dicts.py @@ -54,7 +54,7 @@ if TYPE_CHECKING: from torch._dynamo.symbolic_convert import InstructionTranslator -# [Adding a new supported class within the keys of ConstDictVarialble] +# [Adding a new supported class within the keys of ConstDictVariable] # - Add its tracker type to is_hashable # - (perhaps) Define how it is compared in _HashableTracker._eq_impl diff --git a/torch/_export/converter.py b/torch/_export/converter.py index 1a928f011bb..ae8b7c2786c 100644 --- a/torch/_export/converter.py +++ b/torch/_export/converter.py @@ -765,7 +765,7 @@ class TS2FXGraphConverter: raise ValueError(f"Unsupported JitType ({input_type}) when get device") def convert_prim_GetAttr(self, node: torch._C.Node): - # Build fully qulified name + # Build fully qualified name attr_fqn = get_attribute_fqn_from_ts_node(self.name_to_attribute_fqn, node) output_name = node.output().debugName() self.name_to_attribute_fqn[output_name] = attr_fqn @@ -1455,7 +1455,7 @@ DEBUG: (TORCH_LOGS="+export" ), additionally ) gm = graph_converter.convert() - # Post-proccessing step to deal with quantized operators. + # Post-processing step to deal with quantized operators. replace_quantized_ops_with_standard_ops(gm) log.info("GraphModule: %s", gm.print_readable(print_output=False)) diff --git a/torch/_export/utils.py b/torch/_export/utils.py index cc7cbee8dff..fe5de55a189 100644 --- a/torch/_export/utils.py +++ b/torch/_export/utils.py @@ -1477,7 +1477,7 @@ def register_module_as_pytree_input_node(cls: type[torch.nn.Module]) -> None: flattened, _ = flatten_fn(obj) # NOTE: This helper function will replicate an nn.Module in the exactly same - # structure to be used together with _reparametrize_module. This will + # structure to be used together with _reparameterize_module. This will # create a clone of the module with the new parameters and buffers without # affecting the original module. def copy_module(mod: torch.nn.Module): diff --git a/torch/_functorch/_aot_autograd/autograd_cache.py b/torch/_functorch/_aot_autograd/autograd_cache.py index f60bf274b8f..e9df75de7a8 100644 --- a/torch/_functorch/_aot_autograd/autograd_cache.py +++ b/torch/_functorch/_aot_autograd/autograd_cache.py @@ -771,7 +771,7 @@ class GenericAOTAutogradCacheEntry(Generic[TForward, TBackward]): maybe_subclass_meta: Optional[SubclassMeta] num_fw_outs_saved_for_bw: Optional[int] - # Used by RuntimeWrapepr + # Used by RuntimeWrapper indices_of_inps_to_detach: list[int] # Time taken to trace/compile the forward diff --git a/torch/_functorch/compile_utils.py b/torch/_functorch/compile_utils.py index cdf2e1855a0..49a1adacab6 100644 --- a/torch/_functorch/compile_utils.py +++ b/torch/_functorch/compile_utils.py @@ -99,7 +99,7 @@ def fx_graph_cse(fx_g: torch.fx.graph.Graph): # so it's not worth CSEing. or get_aten_target(n) is aten.empty or n in nodes_that_alias_outputs - # This CSE pass currently doesn't handle re-propogation of unbacked + # This CSE pass currently doesn't handle re-propagation of unbacked # meta where it'll sometimes eliminate a _local_scalar_dense but not # replace the meta of downstream users. eg. one bug we've seen is: # diff --git a/torch/_functorch/config.py b/torch/_functorch/config.py index 89fd9076191..3dd2529b1b1 100644 --- a/torch/_functorch/config.py +++ b/torch/_functorch/config.py @@ -20,7 +20,7 @@ from torch.utils._config_module import Config, install_config_module # [@compile_ignored: debug] _save_config_ignore = [ - # callable not serializeable + # callable not serializable "joint_custom_pass", ] diff --git a/torch/_higher_order_ops/auto_functionalize.py b/torch/_higher_order_ops/auto_functionalize.py index 68942ee0b90..3f93036836e 100644 --- a/torch/_higher_order_ops/auto_functionalize.py +++ b/torch/_higher_order_ops/auto_functionalize.py @@ -44,7 +44,7 @@ class SchemaHolder: return cls(pytree.tree_unflatten([], tree_spec).schema) -# regsiter_constant allows us to get a tree_spec from pytree.tree_flatten(SchemaHolder(FunctionSchema)). +# register_constant allows us to get a tree_spec from pytree.tree_flatten(SchemaHolder(FunctionSchema)). # The tree_spec is proxable in the graph and we can get back the schema via # schema = pytree.tree_unflatten([], tree_spec).schema pytree.register_constant(SchemaHolder) diff --git a/torch/_higher_order_ops/scan.py b/torch/_higher_order_ops/scan.py index 2c3067f2cce..852339d11ec 100644 --- a/torch/_higher_order_ops/scan.py +++ b/torch/_higher_order_ops/scan.py @@ -312,7 +312,7 @@ def generic_scan(operator, init, xs, dim=0, additional_inputs=()): out_tensor_mask = get_tensor_mask(dummy_out) dummy_out_masked = mask_list(out_tensor_mask, dummy_out) - # Pre-alocate + # Pre-allocate # outs -> Output matrix # idxs -> Index matrix for scatter_ # out: (num_elems, M, N, ...) diff --git a/torch/_higher_order_ops/utils.py b/torch/_higher_order_ops/utils.py index e734bd4df5e..9028964baa9 100644 --- a/torch/_higher_order_ops/utils.py +++ b/torch/_higher_order_ops/utils.py @@ -708,7 +708,7 @@ def _stack_pytree(pytrees): # is partitioned into in order to recover it in saved_tensors_and_symints. # # In saved_tensors_and_symints, we can recover the original args by: -# iterating over the pos list and pop one item from the front of paritioned_args[pos[i]]. +# iterating over the pos list and pop one item from the front of partitioned_args[pos[i]]. # We use t_idx and s_idx to keep track of the next index of the item we are going to pop for the two lists. def save_tensors_and_symints_for_backward(ctx, args): assert all( diff --git a/torch/_higher_order_ops/while_loop.py b/torch/_higher_order_ops/while_loop.py index 4ada93c6e47..1fe172433e5 100644 --- a/torch/_higher_order_ops/while_loop.py +++ b/torch/_higher_order_ops/while_loop.py @@ -660,7 +660,7 @@ class WhileLoopStackOutputOp(HigherOrderOperator): # # gx = gy0 * bw(y0, x), # -# where gy0 denotes the graident of loss with respect to y0, and bw(y0, x) denotes the graident of y0 with +# where gy0 denotes the gradient of loss with respect to y0, and bw(y0, x) denotes the gradient of y0 with # respect to x. Note that bw can be computed from forward body_fn easily using torch.autograd.grad. # We could substitute the unknowns gy0, gy1, ..., with chain rule until gy4: # @@ -769,7 +769,7 @@ class WhileLoopAutogradOp(torch.autograd.Function): # Note [Handle inputs that're not differentiable] # When a forward input is non-differentiable e.g. a symint or an integer tensor, their gradients # will be None. However, we don't want to return None in the subgraph because this complicates the - # inductor codegen, where we need to do a non-unform treatment for None and tensors. + # inductor codegen, where we need to do a non-uniform treatment for None and tensors. # So we set up masks and filter the None gradients so that only tensors are returned from each step. carries_tensor_masks = [ bool(isinstance(t, torch.Tensor) and t.dtype.is_floating_point) diff --git a/torch/_inductor/fx_passes/ddp_fusion.py b/torch/_inductor/fx_passes/ddp_fusion.py index 8f55d670058..0cecccabef2 100644 --- a/torch/_inductor/fx_passes/ddp_fusion.py +++ b/torch/_inductor/fx_passes/ddp_fusion.py @@ -348,7 +348,7 @@ def _scatter_fused_allreduce_waits( # Some descendant users of the orig_comm_blocks may be scheduled before # the fused all_reduce. For example, the user nodes of the very first # all_reduce may be scheduled before the second all_reduce. Since the - # fused all_reduce is inserted right after the last all_reudce, the + # fused all_reduce is inserted right after the last all_reduce, the # order can be wrong. # `incorrect_order_nodes` records these nodes. diff --git a/torch/_inductor/fx_passes/mkldnn_fusion.py b/torch/_inductor/fx_passes/mkldnn_fusion.py index addc6e1ea8e..9bb69bdd11c 100644 --- a/torch/_inductor/fx_passes/mkldnn_fusion.py +++ b/torch/_inductor/fx_passes/mkldnn_fusion.py @@ -991,7 +991,7 @@ if torch._C._has_mkldnn: def _recover_linear(): # convert reshape+linear+reshape to a single linear for applying fusion path. - # concat_linear (pass_number=0) -> mkldnn_linear_pack (pass_numer=1) -> _recover_linear(pass_number=2) + # concat_linear (pass_number=0) -> mkldnn_linear_pack (pass_number=1) -> _recover_linear(pass_number=2) @register_freezing_graph_pattern( CallFunction( aten.reshape.default, diff --git a/torch/_inductor/fx_passes/post_grad.py b/torch/_inductor/fx_passes/post_grad.py index bc5e03ea44f..330fb34e8bb 100644 --- a/torch/_inductor/fx_passes/post_grad.py +++ b/torch/_inductor/fx_passes/post_grad.py @@ -585,7 +585,7 @@ def decompose_scan_to_while_loop(gm: torch.fx.GraphModule): # NOTE [Pre-allocate scan's output buffer] # In order to pre-allocate the output buffer for ys, we rely on the meta of scan's fx_node. # However, the meta consists of concrete symints, we need to bind those symints with - # proxies in order to trace the torch.empyt_strided call correctly. + # proxies in order to trace the torch.empty_strided call correctly. # # Also note that basic free symbols of tensor's shapes are guaranteed to be lifted as subgraph inputs # in dynamo so we can always re-construct the sym expression from placeholders. diff --git a/torch/_inductor/output_code.py b/torch/_inductor/output_code.py index 214b52a7491..6f1e192d46f 100644 --- a/torch/_inductor/output_code.py +++ b/torch/_inductor/output_code.py @@ -677,7 +677,7 @@ class CompiledFxGraph(OutputCode): ] else: # On the forward we don't know whether or not - # boxed_foward_device_index is set yet + # boxed_forward_device_index is set yet boxed_forward_device_index = graph_kwargs.get( "boxed_forward_device_index", None ) diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py index f88278df88a..61c9f286021 100644 --- a/torch/_inductor/runtime/triton_heuristics.py +++ b/torch/_inductor/runtime/triton_heuristics.py @@ -530,7 +530,7 @@ class CachingAutotuner(KernelInterface): # = regs_per_multiprocessor / (nreg * 32 * num_warps) # < regs_per_multiprocessor / ((regs_per_multiprocessor / max_threads_per_multi_processor) * 32 * num_warps) # = max_threads_per_multi_processor / (32 * num_warps) - # Using a tigher upper bound can reveal more optimization opportunities. + # Using a tighter upper bound can reveal more optimization opportunities. max_blocks_per_sm = max( device_prop.regs_per_multiprocessor // nreg_per_block, 1 ) diff --git a/torch/_library/triton.py b/torch/_library/triton.py index 761279743f3..dc55cb9b349 100644 --- a/torch/_library/triton.py +++ b/torch/_library/triton.py @@ -215,7 +215,7 @@ def triton_op( # the exported program to be high-level and serializable. If we decompose # the custom op to a functional hop and make it a node in exported program, # we need to figure out ways of serializing the hop and its arguments, which can be triton.jited - # functions and triton dtypes. This is undesireble because: + # functions and triton dtypes. This is undesirable because: # - it can be tedious to maintain a layer that serializes the jited function (e.g. with a string) and dtypes. # - exported program will contain the implementation detail (e.g. triton source code) for a specific # backend (GPU), which is probably at a wrong level of abstraction. diff --git a/torch/_ops.py b/torch/_ops.py index 95f78ca7f32..cc7b3ffe2f0 100644 --- a/torch/_ops.py +++ b/torch/_ops.py @@ -530,7 +530,7 @@ class HigherOrderOperator(OperatorBase, abc.ABC): dispatch_key_set = _compute_keyset(args, kwargs, self.non_fallthrough_keys) return self.dispatch(dispatch_key_set.highestPriorityTypeId(), *args, **kwargs) - # NOTE [HigherOrderOprator Schema] + # NOTE [HigherOrderOperator Schema] # Each invocation of a HigherOrderOperator (hop) should have its own schema because # the subgraphs and the arguments can be different even for the same hop. # diff --git a/torch/_refs/__init__.py b/torch/_refs/__init__.py index 58a6e8c3c2a..9224643fe55 100644 --- a/torch/_refs/__init__.py +++ b/torch/_refs/__init__.py @@ -3155,7 +3155,7 @@ def flatten(a: TensorLikeType, start_dim: int = 0, end_dim: int = -1) -> TensorL # Tries to take a view # TODO: we could look at directing collapse_view to skip its meta function here (unsafe_collapse_view) - # Unbacked semnatics: if validty of in-place flattening is undecided we copy. + # Unbacked semantics: if validity of in-place flattening is undecided we copy. new_shape, _new_strides = prims._collapse_view_helper( a, start_dim, end_dim, must_be_valid=None ) diff --git a/torch/ao/quantization/fx/_lower_to_native_backend.py b/torch/ao/quantization/fx/_lower_to_native_backend.py index fa8e7d53e6b..ab6e489d53e 100644 --- a/torch/ao/quantization/fx/_lower_to_native_backend.py +++ b/torch/ao/quantization/fx/_lower_to_native_backend.py @@ -523,7 +523,7 @@ def fold_weight( del original_weights_lookup[str(lookup_counter)] lookup_counter += 1 elif prepack_node is not None: - # remove the foled node + # remove the fold node continue else: # copy other nodes diff --git a/torch/ao/quantization/observer.py b/torch/ao/quantization/observer.py index 20b1252f1be..48c9a708a5d 100644 --- a/torch/ao/quantization/observer.py +++ b/torch/ao/quantization/observer.py @@ -1213,7 +1213,7 @@ class HistogramObserver(UniformQuantizationObserverBase): boundaries_new_histogram = torch.linspace( update_min, update_max, self.bins + 1, device=update_min.device ).to(histogram.device) - # this maps the mid-poits of the histogram to the new histogram's space + # this maps the mid-points of the histogram to the new histogram's space bucket_assignments = ( torch.bucketize(mid_points_histogram, boundaries_new_histogram, right=True) - 1 diff --git a/torch/ao/quantization/pt2e/lowering.py b/torch/ao/quantization/pt2e/lowering.py index 742549dedcf..b4380ca1960 100644 --- a/torch/ao/quantization/pt2e/lowering.py +++ b/torch/ao/quantization/pt2e/lowering.py @@ -12,7 +12,7 @@ def lower_pt2e_quantized_to_x86( model: torch.fx.GraphModule, example_inputs: tuple[torch.Tensor, ...], ) -> torch.fx.GraphModule: - """Lower a PT2E-qantized model to x86 backend. + """Lower a PT2E-quantized model to x86 backend. Args: * `model` (torch.fx.GraphModule): a model quantized by PT2E quantization flow. diff --git a/torch/csrc/autograd/FunctionsManual.cpp b/torch/csrc/autograd/FunctionsManual.cpp index ff58cfd18ee..fc71ac37e91 100644 --- a/torch/csrc/autograd/FunctionsManual.cpp +++ b/torch/csrc/autograd/FunctionsManual.cpp @@ -4568,7 +4568,7 @@ std::tuple linalg_solve_triangular_backward( if (!grad.defined() || (!A_requires_grad && !B_requires_grad)) { return std::make_tuple(Tensor{}, Tensor{}); } - // We always need to comput G_B + // We always need to compute G_B const Tensor A_H = A.mH(); const Tensor G_B = at::linalg_solve_triangular(A_H, grad, !upper, left, unitriangular); diff --git a/torch/csrc/cuda/Module.cpp b/torch/csrc/cuda/Module.cpp index 971f4071afa..b14323a47bf 100644 --- a/torch/csrc/cuda/Module.cpp +++ b/torch/csrc/cuda/Module.cpp @@ -1035,7 +1035,7 @@ PyObject* THCPModule_cudaGetSyncDebugMode(PyObject* self, PyObject* noargs) { //////////////////////////////////////////////////////////////////////////////// static void registerCudaDeviceProperties(PyObject* module) { - // Add _cudaDevicePropertires class to torch._C + // Add _cudaDeviceProperties class to torch._C auto m = py::handle(module).cast(); // CUuuid is defined in either cuda.h or driver_types.h // hipified to hipUUID which is defined in hip_runtime_api.h diff --git a/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp b/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp index 77dd36b778a..efec39e9eb7 100644 --- a/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp +++ b/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp @@ -103,7 +103,7 @@ class StoreExchange { size_t seq_id_ = 0; }; -// Teturns a pointer of virtual address that is mapped to the physical memory +// Returns a pointer of virtual address that is mapped to the physical memory // held by the handle. void map_block( void** ptr, diff --git a/torch/csrc/distributed/rpc/request_callback_no_python.cpp b/torch/csrc/distributed/rpc/request_callback_no_python.cpp index ef645675af2..46fb40801d2 100644 --- a/torch/csrc/distributed/rpc/request_callback_no_python.cpp +++ b/torch/csrc/distributed/rpc/request_callback_no_python.cpp @@ -72,7 +72,7 @@ c10::intrusive_ptr RequestCallbackNoPython::processMessage( auto retFuture = rrefsReadyFuture->thenAsync( [this, - // std::function must be copyable, hence hae to cast the unique_ptr to + // std::function must be copyable, hence has to cast the unique_ptr to // a shared_ptr here. rpc = std::shared_ptr(std::move(rpc)), messageType = request.type(), diff --git a/torch/csrc/distributed/rpc/rpc_agent.h b/torch/csrc/distributed/rpc/rpc_agent.h index e353c548054..f4b3c586978 100644 --- a/torch/csrc/distributed/rpc/rpc_agent.h +++ b/torch/csrc/distributed/rpc/rpc_agent.h @@ -240,7 +240,7 @@ class TORCH_API RpcAgent { // should be profiled or not. void enableGILProfiling(bool flag); - // Retrieve wheher we should profile GIL wait times or not. + // Retrieve whether we should profile GIL wait times or not. bool isGILProfilingEnabled(); // Set type resolver that will be passed to JIT pickler to resolver type Ptr diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index bdcaf71c05d..ac07dc47c55 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -3534,7 +3534,7 @@ class RootGuardManager : public GuardManager { void add_no_tensor_aliasing_guard( std::shared_ptr no_tensor_aliasing_guard) { - // stash a pointer to the _no_tensor_alising_guard + // stash a pointer to the _no_tensor_aliasing_guard _no_tensor_aliasing_guard = no_tensor_aliasing_guard; this->add_relational_guard_resetter(std::move(no_tensor_aliasing_guard)); } diff --git a/torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h b/torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h index 3a2e91c37c9..e2b5e04fc45 100644 --- a/torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h +++ b/torch/csrc/inductor/aoti_runtime/sycl_runtime_wrappers.h @@ -143,7 +143,7 @@ static std::unique_ptr _createKernel( sycl::range<3> localRange(localRangeZ, localRangeY, localRangeX); sycl::nd_range<3> parallelWorkSize(globalRange, localRange); if (sharedMemory) { - // numParams from sycl info = user provided args + sharedMemroyBuffer + // numParams from sycl info = user provided args + sharedMemoryBuffer numParams -= 1; } // Submit the imported kernel. diff --git a/torch/csrc/inductor/cpp_prefix.h b/torch/csrc/inductor/cpp_prefix.h index 8ae212d3d3d..decdef52a1d 100644 --- a/torch/csrc/inductor/cpp_prefix.h +++ b/torch/csrc/inductor/cpp_prefix.h @@ -14,7 +14,7 @@ // Because AOTInductor generated code will copy-paste this cpp_prefix.h for // the CPU backend, we have to make sure the used headers are implemented // in a header-only way, i.e. all the function and class definitions are -// in .h files instead of .cpp files, to avoid ABI backward-compatiblity +// in .h files instead of .cpp files, to avoid ABI backward-compatibility // breakage. #include diff --git a/torch/csrc/jit/OVERVIEW.md b/torch/csrc/jit/OVERVIEW.md index 1ef0522d217..2dd563302fd 100644 --- a/torch/csrc/jit/OVERVIEW.md +++ b/torch/csrc/jit/OVERVIEW.md @@ -441,7 +441,7 @@ The following sections look into each the stages in the script frontend in detai [frontend/tree.h](frontend/tree.h) -Our frontends produce ASTs in the form of Tree objects. Trees are similar to [s-expressions](https://en.wikipedia.org/wiki/S-expression). Leafs (i.e. Atoms) are always strings. Compound trees have a `kind` (e.g `TK_CONST` or `TK_IDENT` defined in [lexer.h](frontend/lexer.h)) and a list of sub-trees. For instance, the Tree for `z.sigmoid() - (x + y)` is: +Our frontends produce ASTs in the form of Tree objects. Trees are similar to [s-expressions](https://en.wikipedia.org/wiki/S-expression). Leaves (i.e. Atoms) are always strings. Compound trees have a `kind` (e.g `TK_CONST` or `TK_IDENT` defined in [lexer.h](frontend/lexer.h)) and a list of sub-trees. For instance, the Tree for `z.sigmoid() - (x + y)` is: ``` (- diff --git a/torch/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp b/torch/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp index a5a331d15c2..18c1bc62b8c 100644 --- a/torch/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp +++ b/torch/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp @@ -121,7 +121,7 @@ class NnapiBackend : public PyTorchBackendInterface { shape_compute_module.run_method("prepare", ser_model, inputs) .toTensorList(); - // Create and initialize NnapiComilation object + // Create and initialize NnapiCompilation object comp_ = std::make_unique(); auto weights = dict.at("weights").toTensorVector(); comp_->init(ser_model, weights); diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.cpp b/torch/csrc/jit/mobile/flatbuffer_loader.cpp index 103fadaf3a5..0fb50a5d5dd 100644 --- a/torch/csrc/jit/mobile/flatbuffer_loader.cpp +++ b/torch/csrc/jit/mobile/flatbuffer_loader.cpp @@ -379,7 +379,7 @@ std::unique_ptr FlatbufferLoader::parseFunction( function->append_type(getOrCreateTypeAnnotations(i)); } - // 3. If upgrader is needed, change change the OP instrunction to CALL + // 3. If upgrader is needed, change change the OP instruction to CALL // instruction (In next PR, use_upgrader will be parsed to parseInstruction // function and do the actual change) if (use_upgrader) { diff --git a/torch/csrc/jit/mobile/import.cpp b/torch/csrc/jit/mobile/import.cpp index 6a0ba7e038e..ab05e48143e 100644 --- a/torch/csrc/jit/mobile/import.cpp +++ b/torch/csrc/jit/mobile/import.cpp @@ -391,7 +391,7 @@ void BytecodeDeserializer::parseMethods( debug_handles_m_tuple, function.get()); - // 3. If upgrader is needed, change change the OP instrunction to CALL + // 3. If upgrader is needed, change change the OP instruction to CALL // instruction (In next PR, use_upgrader will be parsed to parseInstruction // function and do the actual change) if (use_upgrader) { diff --git a/torch/csrc/jit/passes/bailout_graph.cpp b/torch/csrc/jit/passes/bailout_graph.cpp index 7f8d7eedbe6..5bea5e42c0d 100644 --- a/torch/csrc/jit/passes/bailout_graph.cpp +++ b/torch/csrc/jit/passes/bailout_graph.cpp @@ -196,7 +196,7 @@ struct BailOutGraphBuilderForNode { std::shared_ptr buildBailOutGraphFrom(Node* n) { // add graph inputs for guard's input // and loop counts for loops `n` is contained in - // to make sure we can line bailout grap's inputs up properly + // to make sure we can line bailout graph's inputs up properly // with arguments to this BailOut node. for (auto bi : n->inputs()) { getOrAddInputForValue(bi); diff --git a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp index 8df57982bc3..a1bcc8d85b7 100644 --- a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp +++ b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp @@ -1230,7 +1230,7 @@ void removeDequantizeFromInputs(const std::unordered_set& inputs) { TORCH_INTERNAL_ASSERT( dequantized_val->uses().size() == 1, "Expect to have one dequantize node for each use"); - // Replace useses of dequantized_val with the input of + // Replace uses of dequantized_val with the input of // dequantize node dequantized_val->replaceAllUsesWith(dequantize_node->inputs()[0]); dequantize_node->removeAllInputs(); diff --git a/torch/csrc/jit/serialization/callstack_debug_info_serialization.cpp b/torch/csrc/jit/serialization/callstack_debug_info_serialization.cpp index 5c4c65b24ae..4194e5201ce 100644 --- a/torch/csrc/jit/serialization/callstack_debug_info_serialization.cpp +++ b/torch/csrc/jit/serialization/callstack_debug_info_serialization.cpp @@ -162,7 +162,7 @@ InlinedCallStackPtr InlinedCallStackDeserializer::deserialize( } cached_inlined_callstacks_[tup] = cs_ptr; // Invoking move constructor - // It is not clear if copy-ellision can happen since + // It is not clear if copy-elision can happen since // cs_ptr is copied into map above. // This is to help avoid ref count update return cs_ptr; diff --git a/torch/csrc/jit/serialization/pickler_helper.cpp b/torch/csrc/jit/serialization/pickler_helper.cpp index 66b51b07f80..c1d6794ded8 100644 --- a/torch/csrc/jit/serialization/pickler_helper.cpp +++ b/torch/csrc/jit/serialization/pickler_helper.cpp @@ -106,7 +106,7 @@ std::array< GetBackendMetaSerialization() { // The array to save function pointer for BackendMeta serialization. // key is the DeviceType, value is std::pair obj. - // value.first represent get function and value.seconde represent set function + // value.first represent get function and value.second represent set function static std::array< std::optional>, at::COMPILE_TIME_MAX_DEVICE_TYPES> diff --git a/torch/csrc/lazy/core/lazy_graph_executor.cpp b/torch/csrc/lazy/core/lazy_graph_executor.cpp index 754894e6096..c440357f9e1 100644 --- a/torch/csrc/lazy/core/lazy_graph_executor.cpp +++ b/torch/csrc/lazy/core/lazy_graph_executor.cpp @@ -830,7 +830,7 @@ std::shared_ptr LazyGraphExecutor:: const SyncTensorsConfig& config) { SyncTensorCollection coll = CollectSyncTensors(*tensors, config); if (coll.indices.empty()) { - /* Enure previous execution is complete before exiting this + /* Ensure previous execution is complete before exiting this * function */ TensorCollectionBarrier(&coll); return nullptr; diff --git a/torch/csrc/profiler/collection.cpp b/torch/csrc/profiler/collection.cpp index 133951dd817..138ba8c58b0 100644 --- a/torch/csrc/profiler/collection.cpp +++ b/torch/csrc/profiler/collection.cpp @@ -915,7 +915,7 @@ void passEventsToKineto( // on-demand Kineto activity handling. Enabling this path // for Profiler API could cause side effects as much has changed since. // Make a surgical fix here until we holistically assess the on-demand - // vs API path framentation, which has been snowballing in complexity + // vs API path fragmentation, which has been snowballing in complexity // and thus flakiness. if (config.global()) { e->kineto_activity_ = activity; diff --git a/torch/csrc/xpu/Module.cpp b/torch/csrc/xpu/Module.cpp index 5398700e932..ff5e82af42f 100644 --- a/torch/csrc/xpu/Module.cpp +++ b/torch/csrc/xpu/Module.cpp @@ -261,7 +261,7 @@ static PyObject* THXPModule_resetAccumulatedMemoryStats( // XPU module initialization static void registerXpuDeviceProperties(PyObject* module) { - // Add _xpuDevicePropertires class to torch._C + // Add _xpuDeviceProperties class to torch._C using namespace c10::xpu; auto get_device_type = [](const DeviceProp& prop) { std::ostringstream stream; diff --git a/torch/distributed/checkpoint/_checkpointer.py b/torch/distributed/checkpoint/_checkpointer.py index d54de9092a9..13b0d627a36 100644 --- a/torch/distributed/checkpoint/_checkpointer.py +++ b/torch/distributed/checkpoint/_checkpointer.py @@ -17,7 +17,7 @@ __all__: list[str] = [] class _Checkpointer: - """This base class specefies a high level API for saving and loading + """This base class specifies a high level API for saving and loading distributed `state_dict` 's. It provides an abstraction over the low-level APIs provided by :py:mod:`torch.distributed.checkpoint.storage`, essentially calling :py:meth: `torch.distributed.state_dict_saver.save` and diff --git a/torch/distributed/checkpoint/format_utils.py b/torch/distributed/checkpoint/format_utils.py index 129b7cf570c..912f983fe2a 100644 --- a/torch/distributed/checkpoint/format_utils.py +++ b/torch/distributed/checkpoint/format_utils.py @@ -80,7 +80,7 @@ class BroadcastingTorchSaveReader(StorageReader): planner = cast(DefaultLoadPlanner, planner) # data is read in on the coordinator rank, and broadcast afterwards - # this incurrs a communication cost, but it avoids having to load + # this incurs a communication cost, but it avoids having to load # the entire checkpoint on each rank, hopefully preventing OOM issues # TODO: read on each host, instead of only the coordinator if self.is_coordinator: diff --git a/torch/distributed/pipelining/stage.py b/torch/distributed/pipelining/stage.py index c18c4d6f678..62746899451 100644 --- a/torch/distributed/pipelining/stage.py +++ b/torch/distributed/pipelining/stage.py @@ -252,7 +252,7 @@ class _PipelineStageBase(ABC): self._outputs_meta = tuple(outputs_meta) # type: ignore[assignment] def get_outputs_meta(self) -> tuple[torch.Tensor, ...]: - """Get the output metadata (meta tensors) reprensenting the outputs of this stage""" + """Get the output metadata (meta tensors) representing the outputs of this stage""" assert self._outputs_meta is not None, ( "Attempted to get_outputs_meta() without configuring output meta" ) @@ -723,7 +723,7 @@ class _PipelineStageBase(ABC): ) self._validate_fwd_outputs(output_tuple) - # We return the original user-provied output, not normalized to tuple. + # We return the original user-provided output, not normalized to tuple. # See [Note: pipeline model output type] return output @@ -1188,7 +1188,7 @@ class _PipelineStage(_PipelineStageBase): # No need to send back to rank 0 # - If user.target is stage_backward: # No need to send assuming submod output is stored locally or - # should be re-calucated in case of activation checkpointing + # should be re-calculated in case of activation checkpointing return None def _create_act_send_info(self): diff --git a/torch/distributed/tensor/_ops/_einsum_strategy.py b/torch/distributed/tensor/_ops/_einsum_strategy.py index 506103d70a5..14dcb395b9b 100644 --- a/torch/distributed/tensor/_ops/_einsum_strategy.py +++ b/torch/distributed/tensor/_ops/_einsum_strategy.py @@ -45,7 +45,7 @@ class EinsumDims: for input_dim in input_dims: dim_char_set.update(input_dim) - # get a determinisitc order of all dim chars + # get a deterministic order of all dim chars all_dim_chars = sorted(dim_char_set) # parse input and output dimensions diff --git a/torch/distributed/tensor/_ops/_tensor_ops.py b/torch/distributed/tensor/_ops/_tensor_ops.py index e3134c26a91..43722c11c2a 100644 --- a/torch/distributed/tensor/_ops/_tensor_ops.py +++ b/torch/distributed/tensor/_ops/_tensor_ops.py @@ -484,7 +484,7 @@ def replicate_tensor_dim( def gen_slice_scatter_strategy(op_schema: OpSchema) -> StrategyType: # 1. number of dimensions in input and src need to match. # 2. number of elements on all non-dim need to match between input and src. - # 3. numer of elements in src in dim need to match the slice size. + # 3. number of elements in src in dim need to match the slice size. # Given the above: # - We suggest for src to follow the sharding of input, except on the scatter dimension, # where our best bet for now is to make them replicated as a fall-back. diff --git a/torch/distributed/tensor/_redistribute.py b/torch/distributed/tensor/_redistribute.py index 463c34c8fb4..a407ba6ca91 100644 --- a/torch/distributed/tensor/_redistribute.py +++ b/torch/distributed/tensor/_redistribute.py @@ -592,7 +592,7 @@ class DTensorRedistributePlanner: current = current_placements[mesh_dim] target = target_placements[mesh_dim] # If target is not Shard, we can directly redistribute since we - # are traversing from innner to outer placements here + # are traversing from inner to outer placements here if isinstance(target, Shard): # If target is Shard, check for nested sharding on the # tensor dim BEFORE the current mesh_dim diff --git a/torch/export/_trace.py b/torch/export/_trace.py index 786391f4bc7..3e7e9d8d991 100644 --- a/torch/export/_trace.py +++ b/torch/export/_trace.py @@ -922,7 +922,7 @@ def _export_to_aten_ir( if decompose_custom_triton_ops else _disable_custom_triton_op_functional_decomposition ) - # This _reparametrize_module makes sure inputs and module.params/buffers have the same fake_mode, + # This _reparameterize_module makes sure inputs and module.params/buffers have the same fake_mode, # otherwise aot_export_module will error out because it sees a mix of fake_modes. # And we want aot_export_module to use the fake_tensor mode in dynamo to keep the pipeline easy to reason about. with ExitStack() as stack: @@ -1843,7 +1843,7 @@ def _export_to_aten_ir_make_fx( ) return gm, sig - # This _reparametrize_module makes sure inputs and module.params/buffers have the same fake_mode, + # This _reparameterize_module makes sure inputs and module.params/buffers have the same fake_mode, # otherwise aot_export_module will error out because it sees a mix of fake_modes. # And we want aot_export_module to use the fake_tensor mode in dynamo to keep the pipeline easy to reason about. with ExitStack() as stack: diff --git a/torch/export/exported_program.py b/torch/export/exported_program.py index 0373f7669e0..235f72741b4 100644 --- a/torch/export/exported_program.py +++ b/torch/export/exported_program.py @@ -281,7 +281,7 @@ def _split_decomp_table_to_cia_and_python_decomp( for op in list(decomp_table.keys()): # TODO we are silently allowing non-safe(non-functional) ops through a crack # due to core aten decomp table having non-functional entries. Once we have - # a tigher check around core aten decomp, we should warn users about them. + # a tighter check around core aten decomp, we should warn users about them. # Tracking issue: (https://github.com/pytorch/pytorch/issues/135759) # if it is a valid CIA op we can mess with in export, we check if it is: diff --git a/torch/functional.py b/torch/functional.py index 3054f54b7cd..013832d59cf 100644 --- a/torch/functional.py +++ b/torch/functional.py @@ -1829,7 +1829,7 @@ def norm( # noqa: F811 return _VF.norm(input, p, dim=_dim, keepdim=keepdim) # type: ignore[attr-defined] # TODO: when https://github.com/pytorch/pytorch/issues/33782 is fixed - # remove the overloads where dim is an int and replace with BraodcastingList1 + # remove the overloads where dim is an int and replace with BroadcastingList1 # and remove next four lines, replace _dim with dim if dim is not None: if isinstance(dim, (int, torch.SymInt)): diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index fbc28e94dd5..010539d9ff7 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -4522,7 +4522,7 @@ class ShapeEnv: # The order of checking the guards matters. In this specific example: # If True branch guard check precedes False branch and for True branch, y.size(0) check precedes x == True, - # we may have an unnecessary shape speciliazation for y. + # we may have an unnecessary shape specialization for y. def _maybe_specialize_sym_int_with_hint( self, maybe_sym: IntLikeType ) -> IntLikeType: @@ -5830,7 +5830,7 @@ class ShapeEnv: def issue_guard(guard: ShapeGuard) -> None: expr = self.simplify(guard.expr) - # Avoid re-issueing the same guard. + # Avoid re-issuing the same guard. if expr in issued: return diff --git a/torch/fx/experimental/unification/multipledispatch/conflict.py b/torch/fx/experimental/unification/multipledispatch/conflict.py index 44a893ad56a..181e0e8dd16 100644 --- a/torch/fx/experimental/unification/multipledispatch/conflict.py +++ b/torch/fx/experimental/unification/multipledispatch/conflict.py @@ -118,7 +118,7 @@ def edge(a, b, tie_breaker=hash): """A should be checked before B Tie broken by tie_breaker, defaults to ``hash`` """ - # A either supercedes B and B does not supercede A or if B does then call + # A either supersedes B and B does not supersede A or if B does then call # tie_breaker return supercedes(a, b) and ( not supercedes(b, a) or tie_breaker(a) > tie_breaker(b) diff --git a/torch/nativert/OVERVIEW.md b/torch/nativert/OVERVIEW.md index bfe97c9aefc..d8a7d255d92 100644 --- a/torch/nativert/OVERVIEW.md +++ b/torch/nativert/OVERVIEW.md @@ -282,7 +282,7 @@ RuntimeConfigs { Constant folding is the process of finding all of the constant-evaluable subgraphs, evaluating them at startup, and then storing their results as -constants as opposed to re-evaluting them every time. +constants as opposed to re-evaluating them every time. To enable constant folding, you can set the following configurations. diff --git a/torch/nested/_internal/sdpa.py b/torch/nested/_internal/sdpa.py index 4e8d430e845..fe385dc5c76 100644 --- a/torch/nested/_internal/sdpa.py +++ b/torch/nested/_internal/sdpa.py @@ -438,7 +438,7 @@ def _view_as_dense( # # this is because needs_broadcast indicates that the batch_size is 1 # # and hence there is only 1 value for seq_len # # (2) The cum_seq_lens are given by [0, {*}_t.size(1), 2 * {*}_t.size(1), -# # ..., outut_batch_size * {*}_t.size(1)] +# # ..., output_batch_size * {*}_t.size(1)] # # (3) Nnz_{*} is given by output_batch_size * {*}_t.size(1) # if q_batch_size_needs_broadcast or not q_t.is_nested: diff --git a/torch/nn/functional.py b/torch/nn/functional.py index f8c59ea967b..bc1e873c428 100644 --- a/torch/nn/functional.py +++ b/torch/nn/functional.py @@ -2229,7 +2229,7 @@ def gumbel_softmax( ).scatter_(dim, index, 1.0) ret = y_hard - y_soft.detach() + y_soft else: - # Reparametrization trick. + # Reparameterization trick. ret = y_soft return ret diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py index 54c8638c57f..e0923fb7864 100644 --- a/torch/nn/modules/conv.py +++ b/torch/nn/modules/conv.py @@ -1471,7 +1471,7 @@ class _LazyConvXdMixin(LazyModuleMixin): raise NotImplementedError -# LazyConv1d defines weight as a Tensor but derived class defines it as UnitializeParameter +# LazyConv1d defines weight as a Tensor but derived class defines it as UninitializeParameter class LazyConv1d(_LazyConvXdMixin, Conv1d): # type: ignore[misc] r"""A :class:`torch.nn.Conv1d` module with lazy initialization of the ``in_channels`` argument. @@ -1543,7 +1543,7 @@ class LazyConv1d(_LazyConvXdMixin, Conv1d): # type: ignore[misc] return 1 -# LazyConv2d defines weight as a Tensor but derived class defines it as UnitializeParameter +# LazyConv2d defines weight as a Tensor but derived class defines it as UninitializeParameter class LazyConv2d(_LazyConvXdMixin, Conv2d): # type: ignore[misc] r"""A :class:`torch.nn.Conv2d` module with lazy initialization of the ``in_channels`` argument. @@ -1615,7 +1615,7 @@ class LazyConv2d(_LazyConvXdMixin, Conv2d): # type: ignore[misc] return 2 -# LazyConv3d defines weight as a Tensor but derived class defines it as UnitializeParameter +# LazyConv3d defines weight as a Tensor but derived class defines it as UninitializeParameter class LazyConv3d(_LazyConvXdMixin, Conv3d): # type: ignore[misc] r"""A :class:`torch.nn.Conv3d` module with lazy initialization of the ``in_channels`` argument. @@ -1688,7 +1688,7 @@ class LazyConv3d(_LazyConvXdMixin, Conv3d): # type: ignore[misc] return 3 -# LazyConvTranspose1d defines weight as a Tensor but derived class defines it as UnitializeParameter +# LazyConvTranspose1d defines weight as a Tensor but derived class defines it as UninitializeParameter class LazyConvTranspose1d(_LazyConvXdMixin, ConvTranspose1d): # type: ignore[misc] r"""A :class:`torch.nn.ConvTranspose1d` module with lazy initialization of the ``in_channels`` argument. @@ -1760,7 +1760,7 @@ class LazyConvTranspose1d(_LazyConvXdMixin, ConvTranspose1d): # type: ignore[mi return 1 -# LazyConvTranspose2d defines weight as a Tensor but derived class defines it as UnitializeParameter +# LazyConvTranspose2d defines weight as a Tensor but derived class defines it as UninitializeParameter class LazyConvTranspose2d(_LazyConvXdMixin, ConvTranspose2d): # type: ignore[misc] r"""A :class:`torch.nn.ConvTranspose2d` module with lazy initialization of the ``in_channels`` argument. @@ -1832,7 +1832,7 @@ class LazyConvTranspose2d(_LazyConvXdMixin, ConvTranspose2d): # type: ignore[mi return 2 -# LazyConvTranspose3d defines weight as a Tensor but derived class defines it as UnitializeParameter +# LazyConvTranspose3d defines weight as a Tensor but derived class defines it as UninitializeParameter class LazyConvTranspose3d(_LazyConvXdMixin, ConvTranspose3d): # type: ignore[misc] r"""A :class:`torch.nn.ConvTranspose3d` module with lazy initialization of the ``in_channels`` argument. diff --git a/torch/nn/utils/prune.py b/torch/nn/utils/prune.py index 99a1439ec5c..3c1a8000859 100644 --- a/torch/nn/utils/prune.py +++ b/torch/nn/utils/prune.py @@ -144,7 +144,7 @@ class BasePruningMethod(ABC): method = _get_composite_method(cls, module, name, *args, **kwargs) # at this point we have no forward_pre_hooks but we could have an - # active reparametrization of the tensor if another pruning method + # active reparameterization of the tensor if another pruning method # had been applied (in which case `method` would be a PruningContainer # and not a simple pruning method). diff --git a/torch/onnx/_internal/fx/type_utils.py b/torch/onnx/_internal/fx/type_utils.py index 968f6932801..072f9f10e26 100644 --- a/torch/onnx/_internal/fx/type_utils.py +++ b/torch/onnx/_internal/fx/type_utils.py @@ -164,7 +164,7 @@ _TORCH_DTYPE_TO_ABBREVIATION = { SYM_VALUE_TYPE = Union[torch.SymInt, torch.SymFloat, torch.SymBool] META_VALUE_TYPE = Union[fake_tensor.FakeTensor, SYM_VALUE_TYPE, int, float, bool] -# NOTE: Belows are from torch/fx/node.py +# NOTE: Below are from torch/fx/node.py BaseArgumentTypes = Union[ str, int, diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py index fde4f396b2b..c88f7ad45c7 100644 --- a/torch/testing/_internal/common_quantization.py +++ b/torch/testing/_internal/common_quantization.py @@ -810,7 +810,7 @@ class QuantizationTestCase(TestCase): b = io.BytesIO() torch.save(model_dict, b) b.seek(0) - # weights_only=False as we sometimes get a ScriptObect here (weird) + # weights_only=False as we sometimes get a ScriptObject here (weird) loaded_dict = torch.load(b, weights_only=False) loaded_model.load_state_dict(loaded_dict) ref_out = ref_model(*x) diff --git a/torchgen/_autoheuristic/README.md b/torchgen/_autoheuristic/README.md index 2241785c298..091011d3f47 100644 --- a/torchgen/_autoheuristic/README.md +++ b/torchgen/_autoheuristic/README.md @@ -3,7 +3,7 @@ AutoHeuristic is a framework that allows one to use results from autotuning to l ## How to use AutoHeuristic In general, the following steps have to performed: -- The AutoHeursitic constructor has to be called. +- The AutoHeuristic constructor has to be called. - A script that runs benchmarks in order to collect training data has to be implemented. - The train_decision.py (if you want to learn a decision tree) or train_regression.py (if you want to learn a regression tree) script has to be run in order to learn the heuristic and generate it to code. diff --git a/torchgen/gen_aoti_c_shim.py b/torchgen/gen_aoti_c_shim.py index 65161200256..ead2a2a1cf4 100644 --- a/torchgen/gen_aoti_c_shim.py +++ b/torchgen/gen_aoti_c_shim.py @@ -678,7 +678,7 @@ def gen_aoti_c_shim_files( # Use "aten" as the device name when dispatch_key is Generic device_name = "aten" if dispatch_key is None else dispatch_key.lower() - # header files were checked in for ABI-compatiblilty checking + # header files were checked in for ABI-compatibility checking header_file_name = f"c_shim_{device_name}.h" new_header = gen_aoti_c_shim( fallback_native_functions,