Fix typos (#30606)

Summary: Should be non-semantic. Uses https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines to find likely typos. Pull Request resolved: https://github.com/pytorch/pytorch/pull/30606 Differential Revision: D18763028 Pulled By: mrshenli fbshipit-source-id: 896515a2156d062653408852e6c04b429fc5955c
2025-12-06 12:20:52 +01:00 · 2019-12-02 20:15:54 -08:00 · 2019-12-02 20:15:54 -08:00 · e7fe64f6a6
commit e7fe64f6a6
parent 0bebfe2143
142 changed files with 154 additions and 154 deletions
--- a/.jenkins/caffe2/build.sh
+++ b/.jenkins/caffe2/build.sh
@ -186,7 +186,7 @@ if [ "$(uname)" == "Linux" ]; then
  build_args+=("USE_REDIS=ON")
 fi

-# Use a speciallized onnx namespace in CI to catch hardcoded onnx namespace
+# Use a specialized onnx namespace in CI to catch hardcoded onnx namespace
 build_args+=("ONNX_NAMESPACE=ONNX_NAMESPACE_FOR_C2_CI")

 ###############################################################################
--- a/aten/src/ATen/core/MT19937RNGEngine.h
+++ b/aten/src/ATen/core/MT19937RNGEngine.h
@ -37,7 +37,7 @@ constexpr uint32_t LMASK = 0x7fffffff;
 * Time to get 100000000 std::mt19937 randoms with std::uniform_real_distribution = 0.352087s
 * Time to get 100000000 std::mt19937 randoms with at::uniform_real_distribution = 0.419454s
 * 
- * std::mt19937 is faster when used in conjuction with std::uniform_real_distribution,
+ * std::mt19937 is faster when used in conjunction with std::uniform_real_distribution,
 * however we can't use std::uniform_real_distribution because of this bug:
 * http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524. Plus, even if we used
 * std::uniform_real_distribution and filtered out the 1's, it is a different algorithm
--- a/aten/src/ATen/core/PhiloxRNGEngine.h
+++ b/aten/src/ATen/core/PhiloxRNGEngine.h
@ -122,10 +122,10 @@ public:
    uint32_t nlo = static_cast<uint32_t>(n);
    uint32_t nhi = static_cast<uint32_t>(n >> 32);
    counter[0] += nlo;
-    // if overflow in x has occured, carry over to nhi
+    // if overflow in x has occurred, carry over to nhi
    if (counter[0] < nlo) {
      nhi++;
-      // if overflow in nhi has occured during carry over,
+      // if overflow in nhi has occurred during carry over,
      // propagate that overflow to y and exit to increment z
      // otherwise return
      counter[1] += nhi;
@ -135,7 +135,7 @@ public:
        }
      }
    } else {
-      // if overflow in y has occured during addition,
+      // if overflow in y has occurred during addition,
      // exit to increment z
      // otherwise return
      counter[1] += nhi;
--- a/aten/src/ATen/core/op_registration/op_registration.h
+++ b/aten/src/ATen/core/op_registration/op_registration.h
@ -344,7 +344,7 @@ public:
      // its cache each time the kernel is looked up from the dispatch table.
      // A lambda with a capture would be global and share its capture between all kernel lookups.
      // So, instead of making users having to think about it (including the thread-safety
-      // issues this causes), let's just forbid stateful lambdas alltogether.
+      // issues this causes), let's just forbid stateful lambdas altogether.
      static_assert(guts::is_stateless_lambda<guts::decay_t<Lambda>>::value, "The kernel(x) API for registering a kernel only works for stateless lambdas (i.e. lambdas without captures). If you need a cache, please use the functor based API kernel<Functor>() instead.");

      return std::move(*this).kernel(
--- a/aten/src/ATen/dlpack.h
+++ b/aten/src/ATen/dlpack.h
@ -117,7 +117,7 @@ typedef struct {

 /*!
 * \brief C Tensor object, manage memory of DLTensor. This data structure is
- *  intended to faciliate the borrowing of DLTensor by another framework. It is
+ *  intended to facilitate the borrowing of DLTensor by another framework. It is
 *  not meant to transfer the tensor. When the borrowing framework doesn't need
 *  the tensor, it should call the deleter to notify the host that the resource
 *  is no longer needed.
--- a/aten/src/ATen/native/Linear.cpp
+++ b/aten/src/ATen/native/Linear.cpp
@ -151,7 +151,7 @@ Tensor einsum(std::string eqn, TensorList tensors) {
  // The internal representation of the left hand side fo the equation (with ellipsis expanded) is stored in input_op_idxes.
  // For each operand, we have a vector mapping each dimension to an internal index.
  // We also keep track of the number of occurrences for each letter (to infer a right hand side if not given) and
-  // of the last occurence of each index.
+  // of the last occurrence of each index.
  std::vector<std::vector<int64_t>> input_op_idxes;                   // the parsed operand indices
  std::array<std::int64_t, number_of_letters> num_letter_occurrences; // number of occurrence in the equation of this letter
  num_letter_occurrences.fill(0);
--- a/aten/src/ATen/native/TensorIterator.h
+++ b/aten/src/ATen/native/TensorIterator.h
@ -12,7 +12,7 @@
 #include <ATen/core/EnableNamedTensor.h>

 // TensorIterator is a helper class for element-wise operations, such as
-// arithmetic, comparisions, and trigonometric functions. It handles
+// arithmetic, comparisons, and trigonometric functions. It handles
 // broadcasting and type conversions of operands.
 //
 // This is inspired by NumPy's Array Iterator API (NpyIter).
--- a/aten/src/ATen/native/TensorShape.cpp
+++ b/aten/src/ATen/native/TensorShape.cpp
@ -290,7 +290,7 @@ Tensor diagonal(const Tensor& self, int64_t offset, int64_t dim1_, int64_t dim2_
    storage_offset -= offset * self.stride(dim1);
  }

-  // construct new size and stride: we drop dim1 and dim2 (maximum first for not changing the index of the minumum)
+  // construct new size and stride: we drop dim1 and dim2 (maximum first for not changing the index of the minimum)
  // the new ("joint") dimension is appended to the end of the shape / stride to match numpy semantics
  auto sizes = self.sizes().vec();
  auto strides = self.strides().vec();
--- a/aten/src/ATen/native/UnaryOps.cpp
+++ b/aten/src/ATen/native/UnaryOps.cpp
@ -34,7 +34,7 @@ namespace native {
 // YOU ARE NOT OBLIGED TO USE THESE HELPERS---if you're writing something more specialized, please don't try to make
 // them work for your case, but just write something new instead. Here we use helper functions instead of a flat fat
 // macro that implements everything, because the former allows some simple preprocessing that are unique to some
-// operators (more is forseeable) and is more flexible and elegant than the latter.
+// operators (more is foreseeable) and is more flexible and elegant than the latter.
 template <typename Stub>
 static inline Tensor& unary_op_impl_out(Tensor& result, const Tensor& self, Stub& stub) {
  auto iter = TensorIterator::unary_op(result, self,
--- a/aten/src/ATen/native/cpu/avx_mathfun.h
+++ b/aten/src/ATen/native/cpu/avx_mathfun.h
@ -260,7 +260,7 @@ inline v8sf exp256_ps(v8sf x) {

  tmp = _mm256_floor_ps(fx);

-  /* if greater, substract 1 */
+  /* if greater, subtract 1 */
  //v8sf mask = _mm256_cmpgt_ps(tmp, fx);
  v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS);
  mask = _mm256_and_ps(mask, one);
--- a/aten/src/ATen/native/cuda/Loops.cuh
+++ b/aten/src/ATen/native/cuda/Loops.cuh
@ -8,7 +8,7 @@
 // The gpu_kernel_with_scalars generates specializations that support a
 // single scalar CPU argument, such as from `cuda_tensor + 5`. The CPU scalar
 // is lifted to a kernel paramter instead of copying to device memory.
-// This should be  used in conjuction with TensorIterator::allow_cpu_scalars_,
+// This should be  used in conjunction with TensorIterator::allow_cpu_scalars_,
 // which is the default for TensorIterator::binary_op. Otherwise, all inputs
 // and the output must be on the GPU.
 //
--- a/aten/src/ATen/native/cuda/LossCTC.cu
+++ b/aten/src/ATen/native/cuda/LossCTC.cu
@ -5,7 +5,7 @@
 // 1. Graves et al: http://www.cs.toronto.edu/~graves/icml_2006.pdf
 // We use the equations from above link, but note that [1] has 1-based indexing and we (of course) use 0-based.
 // Graves et al call the probabilities y, we use log_probs (also calling them inputs)
-// A few optimizations (simmilar to those here, but also some I didn't take) are described in
+// A few optimizations (similar to those here, but also some I didn't take) are described in
 // 2. Minmin Sun: http://on-demand.gputechconf.com/gtc/2016/presentation/s6383-minmin-sun-speech-recognition.pdf

 #include <ATen/TensorUtils.h>
--- a/aten/src/ATen/native/cuda/SpectralOps.cu
+++ b/aten/src/ATen/native/cuda/SpectralOps.cu
@ -354,7 +354,7 @@ Tensor _fft_cufft(const Tensor& self, int64_t signal_ndim,
  }

  // Now that we have done error check and data_ptr checks, we delegate all
-  // futher cuFFT parameter computation and plan creation to the helper class
+  // further cuFFT parameter computation and plan creation to the helper class
  // CuFFTConfig in CuFFTUtils.h.

  // If plan caching is enabled, we check the cache. Note that this accesses
--- a/aten/src/ATen/native/cuda/TensorFactories.cu
+++ b/aten/src/ATen/native/cuda/TensorFactories.cu
@ -141,7 +141,7 @@ namespace {
 // iterations. This would give the accurate result, but is relatively slow and
 // is an overkill for most cases where double's precision suffice.
 //
-// If we directly use sqrt to calculate the root, the convertion from int64_t
+// If we directly use sqrt to calculate the root, the conversion from int64_t
 // to double would lose 11 bits precision.
 //
 // The following solution uses sqrt directly for most cases, and would only
--- a/aten/src/ATen/test/test_assert.h
+++ b/aten/src/ATen/test/test_assert.h
@ -27,7 +27,7 @@ static inline void barf(const char *fmt, ...) {
  }

 //note: msg must be a string literal
-//node: In, ##__VA_ARGS '##' supresses the comma if __VA_ARGS__ is empty
+//node: In, ##__VA_ARGS '##' suppresses the comma if __VA_ARGS__ is empty
 #define ASSERTM(cond, msg, ...) \
  if (AT_EXPECT(!(cond), 0)) { \
    barf("%s:%u: %s: Assertion `%s` failed: " msg , __FILE__, __LINE__, __func__, #cond,##__VA_ARGS__); \
--- a/aten/src/TH/THTensorApply.h
+++ b/aten/src/TH/THTensorApply.h
@ -254,7 +254,7 @@
 *    It can hinted by a classical problem: Getting each individual digit from a whole integer(Decimal base).
 *    A N-digit decimal base number could be view as a N-dimension tensor and the sizes of the tensor are 10.
 *    So the value the whole integer is the line index. And the digits could be viewed as the indexes in
- *    different dimentions.
+ *    different dimensions.
 *
 * 2. convert the indexs(coordinates) in the tensor to the memory offset.
 *
--- a/benchmarks/fastrnns/README.md
+++ b/benchmarks/fastrnns/README.md
@ -22,7 +22,7 @@ or run the test independently:

 `python -m fastrnns.bench`

-should give a good comparision, or you can specify the type of model to run
+should give a good comparison, or you can specify the type of model to run

 `python -m fastrnns.bench --rnns cudnn aten jit --group rnns` 

--- a/binaries/benchmark_helper.cc
+++ b/binaries/benchmark_helper.cc
@ -225,7 +225,7 @@ void fillInputBlob(
    if (blob == nullptr) {
      blob = workspace->CreateBlob(tensor_kv.first);
    }
-    // todo: support gpu and make this function a tempalte
+    // todo: support gpu and make this function a template
    int protos_size = tensor_kv.second.protos_size();
    if (protos_size == 1 && iteration > 0) {
      // Do not override the input data if there is only one input data,
--- a/binaries/convert_and_benchmark.cc
+++ b/binaries/convert_and_benchmark.cc
@ -501,7 +501,7 @@ void fillInputBlob(
    if (blob == nullptr) {
      blob = workspace->CreateBlob(tensor_kv.first);
    }
-    // todo: support gpu and make this function a tempalte
+    // todo: support gpu and make this function a template
    int protos_size = tensor_kv.second.protos_size();
    if (protos_size == 1 && iteration > 0) {
      // Do not override the input data if there is only one input data,
--- a/c10/core/TensorTypeId.h
+++ b/c10/core/TensorTypeId.h
@ -50,13 +50,13 @@ enum class TensorTypeId : uint8_t {
  VariableTensorId,

  // TESTING: This is intended to be a generic testing tensor type id.
-  // Don't use it for anything real; its only acceptible use is within a single
+  // Don't use it for anything real; its only acceptable use is within a single
  // process test.  Use it by creating a TensorImpl with this TensorTypeId, and
  // then registering operators to operate on this type id.
  TESTING_ONLY_GenericWrapperTensorId,

  // TESTING: This is intended to be a generic testing tensor type id.
-  // Don't use it for anything real; its only acceptible use is within a ingle
+  // Don't use it for anything real; its only acceptable use is within a ingle
  // process test.  Use it by toggling the mode on and off via
  // TESTING_ONLY_tls_generic_mode_set_enabled and then registering operators
  // to operate on this type id.
--- a/c10/core/TensorTypeSet.h
+++ b/c10/core/TensorTypeSet.h
@ -108,7 +108,7 @@ C10_API std::ostream& operator<<(std::ostream&, TensorTypeSet);

 // Historically, every tensor only had a single TensorTypeId, and it was
 // always something like CPUTensorId and not something weird like VariableId.
-// For the forseeable future, it will still be possible to extract /that/
+// For the foreseeable future, it will still be possible to extract /that/
 // TensorTypeId, and that's what this function does.  It should be used
 // for legacy code that is still using TensorTypeId for things like instanceof
 // checks; if at all possible, refactor the code to stop using TensorTypeId
--- a/c10/test/util/bfloat16_test.cpp
+++ b/c10/test/util/bfloat16_test.cpp
@ -110,7 +110,7 @@ namespace {

  TEST(BFloat16Math, Substraction) {
    // This test verifies that if only first 7 bits of float's mantisa are
-    // changed after substraction, we should have no loss in precision.
+    // changed after subtraction, we should have no loss in precision.

    // input bits
    // S | Exponent | Mantissa
--- a/c10/util/Deprecated.h
+++ b/c10/util/Deprecated.h
@ -86,7 +86,7 @@
 // nvcc has a bug where it doesn't understand __attribute__((deprecated))
 // declarations even when the host compiler supports it. We'll only use this gcc
 // attribute when not cuda, and when using a GCC compiler that doesn't support
-// the c++14 syntax we checked for above (availble in __GNUC__ >= 5)
+// the c++14 syntax we checked for above (available in __GNUC__ >= 5)
 #if !defined(__CUDACC__)
 # define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) using TypeName __attribute__((deprecated)) = TypeThingy;
 #else
--- a/c10/util/Half.h
+++ b/c10/util/Half.h
@ -259,7 +259,7 @@ namespace detail {
           * A normalized single-precision floating-point number is represented as:
           *    FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127)
           * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision
-           * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount.
+           * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same amount.
           *
           * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number
           * is zero, the constructed single-precision number has the value of
--- a/c10/util/tempfile.h
+++ b/c10/util/tempfile.h
@ -69,7 +69,7 @@ struct TempFile {
 };

 /// Attempts to return a temporary file or returns `nullopt` if an error
-/// ocurred.
+/// occurred.
 ///
 /// The file returned follows the pattern
 /// `<tmp-dir>/<name-prefix><random-pattern>`, where `<tmp-dir>` is the value of
--- a/caffe2/contrib/tensorrt/tensorrt_op_trt.cc
+++ b/caffe2/contrib/tensorrt/tensorrt_op_trt.cc
@ -178,7 +178,7 @@ bool TensorRTOp::RunOnDevice() {
  }

  // We need to do the binding at RunOnDevice time because we only know the
-  // exact shapes of the tensors now. In addtion, since TensorRT engine has
+  // exact shapes of the tensors now. In addition, since TensorRT engine has
  // max_batch_size, we need to call that multiple times if input batch size
  // exceeeds this limit.
  CAFFE_ENFORCE_EQ(is_input_.size(), nv_dims_.size());
--- a/caffe2/core/blob_serialization.h
+++ b/caffe2/core/blob_serialization.h
@ -133,7 +133,7 @@ class CAFFE2_API TensorDeserializer : public BlobDeserializerBase {
   * we have to call Deserialize multiple times to get the complete deserialized
   * Tensor, each call will fill part of the Tensor given the segment begin and
   * end information in proto, therefore we have to pass in the Tensor pointer
-   * rather than create a new Tensor everytime.
+   * rather than create a new Tensor every time.
   *
   * Precondition: Tensor must be initialized
   */
--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@ -294,7 +294,7 @@ static void Caffe2SetCUDAMemoryPool() {
 * GPU present during runtime, at global initialization time we will set
 * the CPU memory allocator to allocate pinned memory.
 *
- * NB: This behavior is probably too agressive. We should consider asking users
+ * NB: This behavior is probably too aggressive. We should consider asking users
 * to do on-demand memory pinning (like exposed in PyTorch APIs) instead.
 */
 struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
--- a/caffe2/core/context_gpu.h
+++ b/caffe2/core/context_gpu.h
@ -105,7 +105,7 @@ class CAFFE2_CUDA_API ThreadLocalCUDAObjects {
  cublasHandle_t GetHandle(c10::cuda::CUDAStream cuda_stream) {
    CUDAGuard guard(cuda_stream.device_index());
    // Default construct in the map if it doesn't exist, and return a mutable
-    // refernce to it.
+    // reference to it.
    auto& r = cublas_handles_[cuda_stream];
    if (r == nullptr) {
      CUBLAS_ENFORCE(cublasCreate(&r));
--- a/caffe2/core/net_dag_utils.cc
+++ b/caffe2/core/net_dag_utils.cc
@ -304,7 +304,7 @@ ExecutionChains computeGroups(std::vector<OperatorNode>& orig_nodes) {
    }
  }

-  // We check sync ops on the froniter first and then async ops. This gives us a
+  // We check sync ops on the frontier first and then async ops. This gives us a
  // head start to execute sync ops locally while waiting for async ops to
  // finish.
  std::queue<int> q;
--- a/caffe2/core/nomnigraph/include/nomnigraph/Transformations/SubgraphMatcher.h
+++ b/caffe2/core/nomnigraph/include/nomnigraph/Transformations/SubgraphMatcher.h
@ -121,7 +121,7 @@ class MatchGraph : public Graph<MatchPredicate<GraphType>> {
  // is rooted at the given rootNode.
  // The flag invertGraphTraversal specify if we should follow out edges or
  // in edges. The default is true which is useful for a functional
-  // intepretation of a dataflow graph.
+  // interpretation of a dataflow graph.
  SubgraphMatchResultType isSubgraphMatch(
      typename GraphType::NodeRef root,
      const typename MatchGraph::NodeRef& rootCriteriaRef,
--- a/caffe2/core/operator.h
+++ b/caffe2/core/operator.h
@ -992,7 +992,7 @@ class Operator : public OperatorBase {

      // Clear floating point exception flags before RunOnDevice. We will test
      // exception flags afterwards, and raise an error if an exception has
-      // happend.
+      // happened.
      if (FLAGS_caffe2_operator_throw_if_fp_exceptions ||
          FLAGS_caffe2_operator_throw_if_fp_overflow_exceptions) {
        std::feclearexcept(FE_ALL_EXCEPT);
--- a/caffe2/core/plan_executor.cc
+++ b/caffe2/core/plan_executor.cc
@ -164,7 +164,7 @@ struct CompiledExecutionStep;
 * ExecuteStepRecursive will call call compiled() once before the given
 * execution step is run and keep it alive for the length of its execution.
 * This means that, for steps with create_workspace=true, a child workspace
- * will be created everytime the step is executed, and destroyed right
+ * will be created every time the step is executed, and destroyed right
 * afterwards.
 */
 struct ExecutionStepWrapper {
--- a/caffe2/mobile/contrib/ios/ios_caffe_predictor.h
+++ b/caffe2/mobile/contrib/ios/ios_caffe_predictor.h
@ -15,7 +15,7 @@ class IOS_CAFFE_EXPORT Caffe2IOSPredictor final {
 public:
  /**
   @allowMetalOperators Allow converting eligible operators to Metal GPU framework accelerated
-   operators. Setting this flag to true doesn't gaurantee predictor will be using Metal operators;
+   operators. Setting this flag to true doesn't guarantee predictor will be using Metal operators;
   Client code must check usingMetalOperators flag to determine predictor is using them.
   */
  static Caffe2IOSPredictor* NewCaffe2IOSPredictor(const caffe2::NetDef& init_net,
--- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm
+++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm
@ -150,7 +150,7 @@ class MPSImageWrapper {
     * it is still in use. If the parent wrapper contains a static image, we
     * should create a new command buffer because we use static image so it can
     * survive synchronization(commit of the command buffer), which means if we
-     * pass on the command buffer the command buffer will be commited in
+     * pass on the command buffer the command buffer will be committed in
     * multiple places in the graph. Also since we don't pass on parent's
     * command buffer,we need to synchronize(commit) it since it won't be used
     * in the future.
--- a/caffe2/operators/concat_split_op.h
+++ b/caffe2/operators/concat_split_op.h
@ -235,7 +235,7 @@ bool ConcatOp<Context>::RunOnDevice() {
  auto* output = Output(0);

  // We can override default options(Context::GetDeviceType())
-  // by explictly passing in device type we want
+  // by explicitly passing in device type we want
  Tensor* split = Output(
      1, std::vector<int64_t>(1, InputSize()), at::dtype<int>().device(CPU));
  int* axis_data = split->template mutable_data<int>();
--- a/caffe2/operators/gather_ranges_to_dense_op.cc
+++ b/caffe2/operators/gather_ranges_to_dense_op.cc
@ -64,7 +64,7 @@ are sorted by the corresponding KEY.
        "RANGES",
        "Tensor of int32/int64 ranges, of dims (N, M, 2). "
        "Where N is number of examples and M is a size of each example. "
-        "Last dimention represents a range in the format (start, lengths)")
+        "Last dimension represents a range in the format (start, lengths)")
    .Input(2, "KEY", "Tensor of rank 1 and type int64.")
    .Output(0, "OUTPUT", "1-D tensor of size sum of range lengths")
    .Arg("lengths", "Expected lengths for ranges")
--- a/caffe2/operators/load_save_op_util.cc
+++ b/caffe2/operators/load_save_op_util.cc
@ -42,7 +42,7 @@ void ProcessBlob(
        blob_states[key]
            .seen_chunks_ids.insert(proto.content_chunk_id())
            .second,
-        "Chunk with the same id has occured twice for: ",
+        "Chunk with the same id has occurred twice for: ",
        key);
    CAFFE_ENFORCE(
        proto.content_chunk_id() >= 0 &&
--- a/caffe2/operators/quantized/int8_roi_align_op.h
+++ b/caffe2/operators/quantized/int8_roi_align_op.h
@ -176,7 +176,7 @@ void ROIAlignForward(
      CAFFE_ENFORCE(
          roi_width >= 0 && roi_height >= 0,
          "ROIs in ROIAlign do not have non-negative size!");
-    } else { // backward compatiblity
+    } else { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = std::max(roi_width, (float)1.);
      roi_height = std::max(roi_height, (float)1.);
--- a/caffe2/operators/reducer_functors.h
+++ b/caffe2/operators/reducer_functors.h
@ -314,9 +314,9 @@ struct MaxRangeReducerDef {
      "elements in the input slices. Operation doesn't change the shape of "
      "individual blocks. This implementation imitates torch nn.Max operator. "
      "If the maximum value occurs more than once, the operator will return "
-      "the first occurence of value. When computing the gradient using the "
+      "the first occurrence of value. When computing the gradient using the "
      "backward propagation, the gradient input corresponding to the first "
-      "occurence of the maximum value will be used.";
+      "occurrence of the maximum value will be used.";
 };

 ////////////////////////////////////////////////////////////////////////////////
--- a/caffe2/operators/roi_align_gradient_op.cc
+++ b/caffe2/operators/roi_align_gradient_op.cc
@ -119,7 +119,7 @@ void ROIAlignBackwardFeature(
      CAFFE_ENFORCE(
          roi_width >= 0 && roi_height >= 0,
          "ROIs in ROIAlign do not have non-negative size!");
-    } else { // backward compatiblity
+    } else { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
--- a/caffe2/operators/roi_align_gradient_op.cu
+++ b/caffe2/operators/roi_align_gradient_op.cu
@ -114,7 +114,7 @@ __global__ void RoIAlignBackwardFeature(

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;
-    if (!continuous_coordinate) { // backward compatiblity
+    if (!continuous_coordinate) { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = c10::cuda::compat::max(roi_width, (T)1.);
      roi_height = c10::cuda::compat::max(roi_height, (T)1.);
--- a/caffe2/operators/roi_align_op.cc
+++ b/caffe2/operators/roi_align_op.cc
@ -161,7 +161,7 @@ void ROIAlignForward(
      CAFFE_ENFORCE(
          roi_width >= 0 && roi_height >= 0,
          "ROIs in ROIAlign do not have non-negative size!");
-    } else { // backward compatiblity
+    } else { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
--- a/caffe2/operators/roi_align_op.cu
+++ b/caffe2/operators/roi_align_op.cu
@ -103,7 +103,7 @@ __global__ void RoIAlignForward(

    T roi_width = roi_end_w - roi_start_w;
    T roi_height = roi_end_h - roi_start_h;
-    if (!continuous_coordinate) { // backward compatiblity
+    if (!continuous_coordinate) { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = c10::cuda::compat::max(roi_width, (T)1.);
      roi_height = c10::cuda::compat::max(roi_height, (T)1.);
--- a/caffe2/operators/roi_align_rotated_gradient_op.cu
+++ b/caffe2/operators/roi_align_rotated_gradient_op.cu
@ -111,7 +111,7 @@ __global__ void RoIAlignRotatedBackward(
    T roi_height = offset_bottom_rois[4] * spatial_scale;
    T theta = offset_bottom_rois[5] * M_PI / 180.0;

-    if (!continuous_coordinate) { // backward compatiblity
+    if (!continuous_coordinate) { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = c10::cuda::compat::max(roi_width, (T)1.);
      roi_height = c10::cuda::compat::max(roi_height, (T)1.);
--- a/caffe2/operators/roi_align_rotated_op.cc
+++ b/caffe2/operators/roi_align_rotated_op.cc
@ -171,7 +171,7 @@ void ROIAlignRotatedForward(
      CAFFE_ENFORCE(
          roi_width >= 0 && roi_height >= 0,
          "ROIs in ROIAlign do not have non-negative size!");
-    } else { // backward compatiblity
+    } else { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = std::max(roi_width, (T)1.);
      roi_height = std::max(roi_height, (T)1.);
--- a/caffe2/operators/roi_align_rotated_op.cu
+++ b/caffe2/operators/roi_align_rotated_op.cu
@ -101,7 +101,7 @@ __global__ void RoIAlignRotatedForward(
    T roi_height = offset_bottom_rois[4] * spatial_scale;
    T theta = offset_bottom_rois[5] * M_PI / 180.0;

-    if (!continuous_coordinate) { // backward compatiblity
+    if (!continuous_coordinate) { // backward compatibility
      // Force malformed ROIs to be 1x1
      roi_width = c10::cuda::compat::max(roi_width, (T)1.);
      roi_height = c10::cuda::compat::max(roi_height, (T)1.);
--- a/caffe2/operators/segment_reduction_op.h
+++ b/caffe2/operators/segment_reduction_op.h
@ -1981,7 +1981,7 @@ This op is basically Gather and Lengths{op} fused together.
 INDICES should contain integers in range 0..N-1 where N is the first dimension
 of DATA. INDICES represent which slices of DATA need to be pulled in.

-LENGTHS is a vector that defines slice sizes by first dimention of DATA. Values
+LENGTHS is a vector that defines slice sizes by first dimension of DATA. Values
 belonging to the same segment are aggregated together. sum(LENGTHS) has
 to match INDICES size.

--- a/caffe2/operators/stats_put_ops.cc
+++ b/caffe2/operators/stats_put_ops.cc
@ -30,7 +30,7 @@ OPERATOR_SCHEMA(AveragePut)
        "(*boolean*): whether or not to clamp inputs to the max inputs allowed")
    .Arg(
        "default_value",
-        "(*float*): Optionally provide a default value for recieving empty tensors")
+        "(*float*): Optionally provide a default value for receiving empty tensors")
    .SetDoc(R"DOC(
    Consume a value and pushes it to the global stat registry as an average.

@ -62,7 +62,7 @@ OPERATOR_SCHEMA(IncrementPut)
        "(*boolean*): whether or not to clamp inputs to the max inputs allowed")
    .Arg(
        "default_value",
-        "(*float*): Optionally provide a default value for recieving empty tensors")
+        "(*float*): Optionally provide a default value for receiving empty tensors")
    .SetDoc(R"DOC(
    Consume a value and pushes it to the global stat registry as an sum.

@ -94,7 +94,7 @@ OPERATOR_SCHEMA(StdDevPut)
        "(*boolean*): whether or not to clamp inputs to the max inputs allowed")
    .Arg(
        "default_value",
-        "(*float*): Optionally provide a default value for recieving empty tensors")
+        "(*float*): Optionally provide a default value for receiving empty tensors")
    .SetDoc(R"DOC(
      Consume a value and pushes it to the global stat registry as an standard deviation.

--- a/caffe2/operators/stats_put_ops.h
+++ b/caffe2/operators/stats_put_ops.h
@ -41,7 +41,7 @@ struct TemplatePutOp : public Operator<CPUContext> {
      input = *Input(0).template data<V>();
    } else if (!has_default_) {
      CAFFE_THROW(
-          "Default value must be provided when recieving empty tensors for ",
+          "Default value must be provided when receiving empty tensors for ",
          given_name_);
    }

--- a/caffe2/operators/text_file_reader.cc
+++ b/caffe2/operators/text_file_reader.cc
@ -70,7 +70,7 @@ inline void convert(
      static_cast<std::string*>(dst)->assign(src_start, src_end);
    } break;
    case TensorProto_DataType_FLOAT: {
-      // TODO(azzolini): avoid copy, use faster convertion
+      // TODO(azzolini): avoid copy, use faster conversion
      std::string str_copy(src_start, src_end);
      const char* src_copy = str_copy.c_str();
      char* src_copy_end;
--- a/caffe2/operators/utility_ops.h
+++ b/caffe2/operators/utility_ops.h
@ -1180,7 +1180,7 @@ class GatherRangesOp : public Operator<Context> {
    CAFFE_ENFORCE(ranges.dim() == 3, "Ranges must be 3-D");
    CAFFE_ENFORCE(ranges.size(1) > 0, "There has to be at least one range");
    CAFFE_ENFORCE_EQ(
-        ranges.size(2), 2, "Ranges last dimention should be of size 2");
+        ranges.size(2), 2, "Ranges last dimension should be of size 2");

    auto* rawData = static_cast<const char*>(data.raw_data());
    auto* rangesData = ranges.template data<Index>();
--- a/caffe2/opt/converter.cc
+++ b/caffe2/opt/converter.cc
@ -341,7 +341,7 @@ repr::NNModule convertToNNModule(
  }

  /// \brief For the construction of the control flow graph we keep track
-  /// of a current basic block, which we split up as we come accross control
+  /// of a current basic block, which we split up as we come across control
  /// flow operations such as if and while.
  auto bbNode = cfg.createNamedFunction("main");

--- a/caffe2/opt/onnxifi_transformer.cc
+++ b/caffe2/opt/onnxifi_transformer.cc
@ -784,7 +784,7 @@ void OnnxifiTransformer::getBackendId() {
    return;
  }
  // Try to find a backend that support Caffe2 proto. Note that this is quite
-  // opportunistic as we don't offcially support Caffe2 proto.
+  // opportunistic as we don't officially support Caffe2 proto.
  char buf[kBufferSize];
  for (int i = 0; i < backend_ids_.size(); ++i) {
    size_t len = kBufferSize;
--- a/caffe2/predictor/emulator/data_filler.h
+++ b/caffe2/predictor/emulator/data_filler.h
@ -86,7 +86,7 @@ void fill_with_type(

 /*
 * @run_net: the predict net with parameter and input names
- * @input_dims: the input dimentions of all operator inputs of run_net
+ * @input_dims: the input dimensions of all operator inputs of run_net
 * @input_types: the input types of all operator inputs of run_net
 */
 class DataRandomFiller : public Filler {
--- a/caffe2/proto/caffe2.proto
+++ b/caffe2/proto/caffe2.proto
@ -8,7 +8,7 @@ package caffe2;
 // (2) We do not use extension because that used to create quite some conflicts
 //     in Caffe's protobuf design.
 // (3) We have not used any proto3 specific features, such as Any or Map. This
-//     is mainly for backward compability purposes but we may consider using
+//     is mainly for backward compatibility purposes but we may consider using
 //     those in the future.

 // ExternalDataProto stores the pointer to the content of TensorProto
--- a/caffe2/python/crf.py
+++ b/caffe2/python/crf.py
@ -141,7 +141,7 @@ class CRFWithLoss(object):
            to

            input_blob: the input sequence in a format T x N x D
-            where T is sequence size, N - batch size and D - input dimention
+            where T is sequence size, N - batch size and D - input dimension
            ##Only supports batch-size 1##

            seq_lengths: blob containing sequence lengths (unused)
--- a/caffe2/python/dlpack.h
+++ b/caffe2/python/dlpack.h
@ -134,7 +134,7 @@ typedef struct {

 /*!
 * \brief C Tensor object, manage memory of DLTensor. This data structure is
- *  intended to faciliate the borrowing of DLTensor by another framework. It is
+ *  intended to facilitate the borrowing of DLTensor by another framework. It is
 *  not meant to transfer the tensor. When the borrowing framework doesn't need
 *  the tensor, it should call the deleter to notify the host that the resource
 *  is no longer needed.
--- a/caffe2/python/hypothesis_test.py
+++ b/caffe2/python/hypothesis_test.py
@ -1838,7 +1838,7 @@ class TestOperators(hu.HypothesisTestCase):
        # error increases dramtically when input is close to 0 or 1
        # and it will fail the test.
        # So we only run gradient test in the range of (0.01, 0.99)
-        # very occationally, test may fail due to random accumulated error
+        # very occasionally, test may fail due to random accumulated error
        # reduce test range to (0.02, 0.98) will improve test stability
        op = core.CreateOperator('Logit', ["X"], ["Y"], eps=eps)
        self.assertDeviceChecks(dc, op, [a], [0])
--- a/caffe2/python/layer_model_helper.py
+++ b/caffe2/python/layer_model_helper.py
@ -76,7 +76,7 @@ class LayerModelHelper(model_helper.ModelHelper):
        self._breakdown_map = None

        # Connect Schema to self.net. That particular instance of schmea will be
-        # use for generation of the Layers accross the network and would be used
+        # use for generation of the Layers across the network and would be used
        # for connection with Readers.
        self._input_feature_schema = schema.NewRecord(
            self.net,
--- a/caffe2/python/layers/batch_lr_loss.py
+++ b/caffe2/python/layers/batch_lr_loss.py
@ -212,7 +212,7 @@ class BatchLRLoss(ModelLayer):
            )

            prediction = self.input_record.prediction()
-            # focal loss = (y(1-p) + p(1-y))^gamma * orginal LR loss
+            # focal loss = (y(1-p) + p(1-y))^gamma * original LR loss
            # y(1-p) + p(1-y) = y + p - 2 * yp
            y_plus_p = net.Add(
                [prediction, label],
--- a/caffe2/python/layers/feature_sparse_to_dense.py
+++ b/caffe2/python/layers/feature_sparse_to_dense.py
@ -150,7 +150,7 @@ class FeatureSparseToDense(ModelLayer):
        self.output_schema = schema.Struct(*outputs)

        # TODO(amalevich): Consider moving this data to schema, instead
-        # Structs doens't support attaching metadata to them and clonning
+        # Structs doesn't support attaching metadata to them and clonning
        # will break things badly, but this is the most elegant way to pass
        # this info around. Should we change it or it'll be too much work and
        # not worse it?
--- a/caffe2/python/lstm_benchmark.py
+++ b/caffe2/python/lstm_benchmark.py
@ -225,7 +225,7 @@ def Caffe2LSTM(args):
                "Max usage differs from current total usage: {} > {}".
                format(stats['max_total'], stats['total'])
            )
-            log.warning("This means that costly deallocations occured.")
+            log.warning("This means that costly deallocations occurred.")

    return time.time() - start_time

--- a/caffe2/python/memonger.py
+++ b/caffe2/python/memonger.py
@ -604,7 +604,7 @@ def compute_assignments_dp(ranges_sorted, init_assignment, counter=None):
            Return -1 if not found.
        '''
        def is_compatible_all(candidate_range, assignments):
-            ''' return true if compatiable for all assignments in assignments '''
+            ''' return true if compatible for all assignments in assignments '''
            return all([is_compatible(candidate_range[1], x, []) for x in assignments])

        ii = cur_idx - 1
--- a/caffe2/python/model_helper.py
+++ b/caffe2/python/model_helper.py
@ -170,7 +170,7 @@ class ModelHelper(object):
        be created in the CurrentNameScope with the respect of all parameter
        sharing logic, i.e. 'resolved_name_scope/param_name'.

-        Parameter sharing logic is going to override CurrentNameScope accoring
+        Parameter sharing logic is going to override CurrentNameScope according
        to the rules that are specified through ParameterSharing contexts,
        all ParameterSharing contexts are applied recursively until there are no
        extra overrides present, where on each step the best match will be
--- a/caffe2/python/modeling/gradient_clipping.py
+++ b/caffe2/python/modeling/gradient_clipping.py
@ -90,7 +90,7 @@ class GradientClipping(NetModifier):

        for param, grad in final_param_map.items():
            # currently sparse gradients won't be clipped
-            # futher implementation is needed to enable it
+            # further implementation is needed to enable it
            if isinstance(grad, core.GradientSlice):
                continue

--- a/caffe2/python/models/resnet.py
+++ b/caffe2/python/models/resnet.py
@ -84,7 +84,7 @@ class ResNetBuilder():
        return self.prev_blob

    '''
-    Add a "bottleneck" component as decribed in He et. al. Figure 3 (right)
+    Add a "bottleneck" component as described in He et. al. Figure 3 (right)
    '''

    def add_bottleneck(
--- a/caffe2/python/modifier_context.py
+++ b/caffe2/python/modifier_context.py
@ -43,7 +43,7 @@ class ModifierContext(object):
 class UseModifierBase(object):
    '''
    context class to allow setting the current context.
-    Example useage with layer:
+    Example usage with layer:
        modifiers = {'modifier1': modifier1, 'modifier2': modifier2}
        with Modifiers(modifiers):
            modifier = ModifierContext.current().get_modifier('modifier1')
--- a/caffe2/python/normalizer_context.py
+++ b/caffe2/python/normalizer_context.py
@ -28,7 +28,7 @@ class NormalizerContext(ModifierContext):
 class UseNormalizer(UseModifierBase):
    '''
    context class to allow setting the current context.
-    Example useage with layer:
+    Example usage with layer:
        normalizers = {'norm1': norm1, 'norm2': norm2}
        with UseNormalizer(normalizers):
            norm = NormalizerContext.current().get_normalizer('norm1')
--- a/caffe2/python/onnx/backend.py
+++ b/caffe2/python/onnx/backend.py
@ -915,7 +915,7 @@ class Caffe2Backend(Backend):

        return init_net, pred_net

-    # wrapper for backwards compatability
+    # wrapper for backwards compatibility
    @classmethod
    def onnx_graph_to_caffe2_net(cls, model, device="CPU", opset_version=_known_opset_version):
        return cls._onnx_model_to_caffe2_net(model, device=device, opset_version=opset_version, include_initializers=True)
--- a/caffe2/python/operator_test/dataset_ops_test.py
+++ b/caffe2/python/operator_test/dataset_ops_test.py
@ -239,7 +239,7 @@ class TestDatasetOps(TestCase):
        """
        This is what the flattened fields for this schema look like, along
        with its type. Each one of these fields will be stored, read and
-        writen as a tensor.
+        written as a tensor.
        """
        expected_fields = [
            ('dense', (np.float32, 3)),
--- a/caffe2/python/operator_test/gather_ops_test.py
+++ b/caffe2/python/operator_test/gather_ops_test.py
@ -12,7 +12,7 @@ import hypothesis.strategies as st
 import hypothesis.extra.numpy as hnp

 # Basic implementation of gather for axis == 0, shich is lookup of indices
-# in the outer dimention. Keeping it for reference here, although is similar
+# in the outer dimension. Keeping it for reference here, although is similar
 # to more general funciton below.
 def ref_gather_axis0():
    def inner(data, ind):
--- a/caffe2/python/operator_test/pooling_test.py
+++ b/caffe2/python/operator_test/pooling_test.py
@ -165,7 +165,7 @@ class TestPooling(hu.HypothesisTestCase):
        # Currently MIOpen Pooling only supports 2d pooling
        if hiputl.run_in_hip(gc, dc):
            assume(engine != "CUDNN")
-        # pad and stride ignored because they will be infered in global_pooling
+        # pad and stride ignored because they will be inferred in global_pooling
        op = core.CreateOperator(
            op_type,
            ["X"],
--- a/caffe2/python/optimizer_context.py
+++ b/caffe2/python/optimizer_context.py
@ -44,7 +44,7 @@ class UseOptimizer(UseModifierBase):
            with UseOptimizer(optim2):
                brew.func

-    Example useage with layer:
+    Example usage with layer:
        optimizers = {'optim1': optim1, 'optim2': optim2}
        with Optimizers(optimizers):
            optim = OptimizerContext.current().get_optimizer('optim1')
--- a/caffe2/python/optimizer_test_util.py
+++ b/caffe2/python/optimizer_test_util.py
@ -231,7 +231,7 @@ class LRModificationTestBase(object):
        optimizer.set_lr_injection(0)
        self.assertEqual(optimizer.get_lr_injection(), 0)

-        # Test that setting the lr_injector properly propogates to the
+        # Test that setting the lr_injector properly propagates to the
        # lr_multiplier. Here, we have both lr_injector and norm_ratio that
        # affect the lr_multiplier
        workspace.RunNet(model.net.Proto().name)
--- a/caffe2/python/pipeline.py
+++ b/caffe2/python/pipeline.py
@ -111,9 +111,9 @@ def pipe(
                     until a stop is signaled either by the reader or the
                     writer.
        output:      either a Writer, a Queue or a DataStream that will be
-                     writen to as long as neither reader nor writer signal
+                     written to as long as neither reader nor writer signal
                     a stop condition. If output is not provided or is None,
-                     a Queue is created with given `capacity` and writen to.
+                     a Queue is created with given `capacity` and written to.
        num_threads: number of concurrent threads used for processing and
                     piping. If set to 0, no Task is created, and a
                     reader is returned instead -- the reader returned will
--- a/caffe2/python/regularizer.py
+++ b/caffe2/python/regularizer.py
@ -348,7 +348,7 @@ class LogBarrier(Regularizer):
            **self.discount_options
        )
        # TODO(xlwang): param might still be negative at the initialization time or
-        # slighly negative due to the distributed training. Enforce it's non-negativity
+        # slightly negative due to the distributed training. Enforce it's non-negativity
        # for now (at least above machine epsilon)
        param_non_neg = net.NextScopedBlob(param + "_non_neg")
        net.Clip([param], [param_non_neg], min=self.kEpsilon)
--- a/caffe2/python/regularizer_context.py
+++ b/caffe2/python/regularizer_context.py
@ -28,7 +28,7 @@ class RegularizerContext(ModifierContext):
 class UseRegularizer(UseModifierBase):
    '''
    context class to allow setting the current context.
-    Example useage with layer:
+    Example usage with layer:
        regularizers = {'reg1': reg1, 'reg2': reg2}
        with UseRegularizer(regularizers):
            reg = RegularizerContext.current().get_regularizer('reg1')
--- a/caffe2/python/schema.py
+++ b/caffe2/python/schema.py
@ -126,7 +126,7 @@ class Field(object):
    def field_blobs(self):
        """Return the list of blobs with contents for this Field.
        Values can either be all numpy.ndarray or BlobReference.
-        If any of the fields doens't have a blob, throws.
+        If any of the fields doesn't have a blob, throws.
        """
        raise NotImplementedError('Field is an abstract class.')

--- a/caffe2/python/session.py
+++ b/caffe2/python/session.py
@ -57,7 +57,7 @@ class Session(object):


    Global Workspace:
-        At the beggining of the session, a global workspace is created and kept
+        At the beginning of the session, a global workspace is created and kept
        alive for the duration of the session.


--- a/caffe2/python/task.py
+++ b/caffe2/python/task.py
@ -174,7 +174,7 @@ class TaskGroup(object):
    will finish execution when the last task of the group finishes.

    Example:
-        # supose that s1 ... s5 are execution steps or nets.
+        # suppose that s1 ... s5 are execution steps or nets.
        with TaskGroup() as tg:
            # these tasks go to default node 'local'
            Task(step=s1)
--- a/caffe2/quantization/server/dnnlowp_test_utils.py
+++ b/caffe2/quantization/server/dnnlowp_test_utils.py
@ -384,7 +384,7 @@ def run_conv_or_fc(
    # do caching so exercises different code paths from the subsequent
    # runs

-    # self.ws.run re-creates operator everytime so this test covers
+    # self.ws.run re-creates operator every time so this test covers
    # cases when we have multiple nets sharing the same workspace
    test_case.ws.create_blob("X").feed(X, device_option=gc)
    test_case.ws.create_blob("W").feed(W, device_option=gc)
--- a/caffe2/serialize/inline_container.h
+++ b/caffe2/serialize/inline_container.h
@ -50,7 +50,7 @@ typedef struct mz_zip_archive mz_zip_archive;
 //    the reader can still read files that were compressed.
 // 2. It provides a getRecordOffset function which returns the offset into the
 //    raw file where file data lives. If the file was written with PyTorchStreamWriter
-//    it is guarenteed to be 64 byte aligned.
+//    it is guaranteed to be 64 byte aligned.

 // PyTorchReader/Writer handle checking the version number on the archive format
 // and ensure that all files are written to a archive_name directory so they
--- a/caffe2/sgd/clip_tensor_op.cc
+++ b/caffe2/sgd/clip_tensor_op.cc
@ -24,7 +24,7 @@ OPERATOR_SCHEMA(ClipTensorByScaling)
    .Input(
        2,
        "additional_threshold",
-        "An optional additonal threshold to scale the orignal threshold")
+        "An optional additional threshold to scale the orignal threshold")
    .Arg("threshold", "Threshold to determine whether to scale down the tensor")
    .Output(
        0,
--- a/caffe2/video/video_decoder.cc
+++ b/caffe2/video/video_decoder.cc
@ -437,7 +437,7 @@ void VideoDecoder::decodeLoop(
    // the decoder is still giving us frames.
    int ipacket = 0;
    while ((!eof || gotPicture) &&
-           /* either you must decode all frames or decode upto maxFrames
+           /* either you must decode all frames or decode up to maxFrames
            * based on status of the mustDecodeAll flag */
           (mustDecodeAll ||
            ((!mustDecodeAll) && (selectiveDecodedFrames < maxFrames))) &&
--- a/docs/source/community/contribution_guide.rst
+++ b/docs/source/community/contribution_guide.rst
@ -283,7 +283,7 @@ Frequently asked questions
 -  **How can I contribute as a reviewer?** There is lots of value if
   community developer reproduce issues, try out new functionality, or
   otherwise help us identify or troubleshoot issues. Commenting on
-   tasks or pull requests with your enviroment details is helpful and
+   tasks or pull requests with your environment details is helpful and
   appreciated.
 -  **CI tests failed, what does it mean?** Maybe you need to merge with
   master or rebase with latest changes. Pushing your changes should
--- a/docs/source/distributed.rst
+++ b/docs/source/distributed.rst
@ -223,7 +223,7 @@ distributed package and ``group_name`` is deprecated as well.
    In other words, if the file is not removed/cleaned up and you call
    :func:`init_process_group` again on that file, failures are expected.
    The rule of thumb here is that, make sure that the file is non-existent or
-    empty everytime :func:`init_process_group` is called.
+    empty every time :func:`init_process_group` is called.

 ::

--- a/docs/source/hub.rst
+++ b/docs/source/hub.rst
@ -103,7 +103,7 @@ A suggested workflow is
 - ``dir(model)`` to see all avaialble methods of the model.
 - ``help(model.foo)`` to check what arguments ``model.foo`` takes to run

-To help users explore without refering to documentation back and forth, we strongly
+To help users explore without referring to documentation back and forth, we strongly
 recommend repo owners make function help messages clear and succinct. It's also helpful
 to include a minimal working example.

--- a/docs/source/name_inference.rst
+++ b/docs/source/name_inference.rst
@ -389,7 +389,7 @@ For ``torch.mm(tensor, other)``:
    ('N', 'out')

 Inherently, a matrix multiplication performs a dot product over two dimensions,
-collapsing them. When two tensors are matrix-multipled, the contracted dimensions
+collapsing them. When two tensors are matrix-multiplied, the contracted dimensions
 disappear and do not show up in the output tensor.

 :func:`torch.mv`, :func:`torch.dot` work in a similar way: name inference does not
--- a/modules/detectron/smooth_l1_loss_op.cc
+++ b/modules/detectron/smooth_l1_loss_op.cc
@ -33,7 +33,7 @@ transition between L2 loss and L1 loss is adjustable by a hyper-parameter beta:
  SmoothL1(x) = 0.5 * x^2 / beta      if |x| < beta
                |x| - 0.5 * beta      otherwise.

-SmoothL1 is used in Fast R-CNN and decendants as the loss function for bounding
+SmoothL1 is used in Fast R-CNN and descendants as the loss function for bounding
 box regression.

 The loss computed by this op has a flexible form:
--- a/scripts/fbcode-dev-setup/onnx_c2_setup.sh
+++ b/scripts/fbcode-dev-setup/onnx_c2_setup.sh
@ -3,7 +3,7 @@
 # This script helps developers set up the ONNX Caffe2 and PyTorch develop environment on devgpu.
 # It creates an virtualenv instance, and installs all the dependencies in this environment.
 # The script will creates a folder called onnx-dev folder under the $HOME directory.
-# onnx, pytorch and caffe2 are installed seperately.
+# onnx, pytorch and caffe2 are installed separately.
 # Please source $HOME/onnx-dev/.onnx_env_init to initialize the development before starting developing.


--- a/test/common_utils.py
+++ b/test/common_utils.py
@ -754,7 +754,7 @@ class TestCase(expecttest.TestCase):
                        raise TypeError("Was expecting both tensors to be bool type.")
                    else:
                        if a.dtype == torch.bool and b.dtype == torch.bool:
-                            # we want to respect precision but as bool doesn't support substraction,
+                            # we want to respect precision but as bool doesn't support subtraction,
                            # boolean tensor has to be converted to int
                            a = a.to(torch.int)
                            b = b.to(torch.int)
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@ -1983,7 +1983,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
          // output, hence verify the logic. In this test, the cache size is
          // configured to be the same as chunk size and batch size. So the
          // chunk data is written to the cache one by one. Only the current
-          // batch is retrieved, the next chunk is writen. Now in iteration 0,
+          // batch is retrieved, the next chunk is written. Now in iteration 0,
          // after the first batch is retrieved, when we save the dataset
          // statues, there are three possible scenarios for the writer thread:
          // 1. it hasn't started loading the next chunk data yet, so the
--- a/test/cpp/api/nn_utils.cpp
+++ b/test/cpp/api/nn_utils.cpp
@ -67,7 +67,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
    auto scaled = compare_scaling(grads);
    ASSERT_NEAR(0, scaled.std().item().toFloat(), 1e-7);
  }
-  // Small gradients should be lefted unchanged
+  // Small gradients should be left unchanged
  grads = {
      torch::rand({10, 10}).div(10000),
      torch::ones(10).div(500),
--- a/test/cpp/jit/test_utils.h
+++ b/test/cpp/jit/test_utils.h
@ -35,7 +35,7 @@ at::Tensor t_def(at::Tensor x);

 // given the difference of output vs expected tensor, check whether the
 // difference is within a relative tolerance range. This is a standard way of
-// matching tensor values upto certain precision
+// matching tensor values up to certain precision
 bool checkRtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs);
 bool almostEqual(const at::Tensor& a, const at::Tensor& b);

--- a/test/dist_autograd_test.py
+++ b/test/dist_autograd_test.py
@ -299,7 +299,7 @@ class DistAutogradTest(RpcAgentTestFixture):
    # nested rpc call to next dst. In return route, receive result tensor t3
    # from next dst and forwarding t3 back to previous calls.
    # For this context in this rank, it expects graph like this:
-    #  send and recv functions for receving and forwarding t1 and t2:
+    #  send and recv functions for receiving and forwarding t1 and t2:
    #       rpcSendBackward
    #          /          \
    # t1.recvRpcBackward    t2.recvRpcBackward
--- a/test/run_test.py
+++ b/test/run_test.py
@ -355,7 +355,7 @@ def find_test_index(test, selected_tests, find_last_index=False):
            occurrence (first is default)

    Returns:
-        index of the first or last occurance of the given test
+        index of the first or last occurrence of the given test
    """
    idx = 0
    found_idx = -1
--- a/test/test_cpp_api_parity.py
+++ b/test/test_cpp_api_parity.py
@ -273,7 +273,7 @@ class TestCppApiParity(common.TestCase):
        example_inputs = self._prepare_tensors_for_module_input_or_target(test_params, example_inputs)

        # We set all inputs to torch.nn module to requires grad, so that the backward test can always be run.
-        # However, we skip embedding layers for now, becuase they only accept LongTensor as inputs,
+        # However, we skip embedding layers for now, because they only accept LongTensor as inputs,
        # And LongTensor cannot require grad.
        if test_params.module_name not in ["Embedding", "Embedding_sparse", "EmbeddingBag", "EmbeddingBag_sparse"]:
            example_inputs = [x.requires_grad_() for x in example_inputs]
--- a/test/test_nn.py
+++ b/test/test_nn.py
@ -1937,7 +1937,7 @@ class TestNN(NNTestCase):
        amount requested by the user the moment the pruning method
        is initialized. This test checks that the expected errors are
        raised whenever the amount is invalid.
-        The orginal function runs basic type checking + value range checks.
+        The original function runs basic type checking + value range checks.
        It doesn't check the validity of the pruning amount with
        respect to the size of the tensor to prune. That's left to
        `_validate_pruning_amount`, tested below.
--- a/test/test_quantization.py
+++ b/test/test_quantization.py
@ -788,7 +788,7 @@ class GraphModePostTrainingQuantTest(QuantizationTestCase):
        self.assertEqual(result_eager, result_script)

    def test_observer_with_ignored_function(self):
-        r"""Test observers with ignored fucntion and make sure it works in
+        r"""Test observers with ignored function and make sure it works in
        graph mode
        """
        # eager mode
--- a/Show More
+++ b/Show More