fix comparison of narrow type with wide type in loop condition (#53951)

Summary: fix Semmle warning: Comparison of narrow type with wide type in loop condition For example there is below piece of code: for (int i=0; i<array.size(); ++i) {} The problem is that array.size() return type is size_t can be larger type than int depending on the implementation so there is chance that i overflows (for very large array that array size is beyond the range of integer) and this loop will never be terminated. Pull Request resolved: https://github.com/pytorch/pytorch/pull/53951 Reviewed By: zou3519 Differential Revision: D27181495 Pulled By: malfet fbshipit-source-id: 0612c5cedcdc656c193085e7fbb87dd163f20688
2025-12-06 12:20:52 +01:00 · 2021-03-22 16:38:22 -07:00 · 2021-03-22 16:38:22 -07:00 · 92770d25cd
commit 92770d25cd
parent edfc787df4
59 changed files with 247 additions and 136 deletions
--- a/aten/src/ATen/TensorIterator.cpp
+++ b/aten/src/ATen/TensorIterator.cpp
@ -8,6 +8,8 @@
 #include <ATen/native/Resize.h>
 #include <ATen/TensorOperators.h>
 #include <c10/util/irange.h>
 namespace at {
 using DimMask = TensorIteratorBase::DimMask;
@ -1392,7 +1394,7 @@ DimCounter::DimCounter(IntArrayRef shape, Range range)
  , offset(range.begin) {
  int64_t linear_offset = range.begin;
  int64_t ndim = values.size();
-  for (int dim = 0; dim < ndim; dim++) {
+  for (const auto dim : c10::irange(ndim)) {
    int64_t size = shape[dim];
    if (size > 0) {
      values[dim] = linear_offset % size;
--- a/aten/src/ATen/Utils.h
+++ b/aten/src/ATen/Utils.h
@ -9,6 +9,7 @@
 #include <c10/util/accumulate.h>
 #include <c10/util/ArrayRef.h>
 #include <c10/util/Exception.h>
 #include <c10/util/irange.h>
 #include <algorithm>
 #include <sstream>
@ -51,7 +52,7 @@ static inline TensorImpl* checked_dense_tensor_unwrap(const Tensor& expr, const
 static inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(ArrayRef<Tensor> tensors, const char * name, int pos, DeviceType device_type, ScalarType scalar_type) {
  std::vector<TensorImpl*> unwrapped;
  unwrapped.reserve(tensors.size());
-  for (unsigned int i = 0; i < tensors.size(); ++i) {
+  for (const auto i : c10::irange(tensors.size())) {
    const auto& expr = tensors[i];
    if (expr.layout() != Layout::Strided) {
      AT_ERROR("Expected dense tensor but got ", expr.layout(),
--- a/aten/src/ATen/core/type.cpp
+++ b/aten/src/ATen/core/type.cpp
@ -3,6 +3,7 @@
 #include <ATen/core/function_schema.h>
 #include <ATen/core/jit_type.h>
 #include <c10/macros/Macros.h>
 #include <c10/util/irange.h>
 #include <ATen/core/grad_mode.h>
 #include <ATen/core/function.h>
 #include <iostream>
@ -1107,7 +1108,7 @@ torch::jit::Function* ClassType::findForwardHook(const std::string& name) const
 std::string getSchemaInputTypesString(const FunctionSchema& schema) {
  std::stringstream input_types;
  const std::vector<Argument>& forward_args = schema.arguments();
-  for (int i = 1; i < forward_args.size(); ++i) {
+  for (const auto i : c10::irange(1, forward_args.size())) {
    input_types << forward_args[i].type()->annotation_str();
    if (forward_args.size() - 1 != i) {
      input_types << ", ";
@ -1213,7 +1214,7 @@ void checkForwardHookInputArguments(
        hook_err_msg
    );
-    for (int i = 1; i < forward_args.size(); ++i) {
+    for (const auto i : c10::irange(1, forward_args.size())) {
      if (*forward_args[i].type() != *input_tuple_types[i - 1]) {
        TORCH_CHECK(
            false,
@ -1313,7 +1314,7 @@ void ClassType::checkForwardPreHookSchema(
      pre_hook_err_msg
  );
  // check that contained types match forward types
-  for (int i = 1; i < forward_args.size(); ++i) {
+  for (const auto i : c10::irange(1, forward_args.size())) {
    if (*forward_args[i].type() != *return_tuple_types[i - 1]) {
      TORCH_CHECK(
          false,
--- a/aten/src/ATen/native/Activation.cpp
+++ b/aten/src/ATen/native/Activation.cpp
@ -8,6 +8,8 @@
 #include <ATen/Parallel.h>
 #include <ATen/core/DistributionsHelper.h>
 #include <c10/util/irange.h>
 namespace at { namespace native {
 static const double SELU_ALPHA = 1.6732632423543772848170429916717;
@ -453,12 +455,12 @@ void inline prelu_cpu_kernel_multi_weights(
  scalar_t* weight_data = weight.data_ptr<scalar_t>();
  auto loop = [&](int64_t start, int64_t end) {
-    for (auto i = start; i < end; ++i) {
+    for (const auto i : c10::irange(start, end)) {
      int64_t offset = i * channel_size * input_stride1;
      scalar_t* n_input_data = input_data + offset;
      scalar_t* n_result_data = result_data + offset;
-      for (auto j = 0; j < channel_size; ++j) {
+      for (const auto j : c10::irange(channel_size)) {
-        for (auto k = 0; k < input_stride1; ++k) {
+        for (const auto k : c10::irange(input_stride1)) {
          // to allow for compiler optimization, here splitting into two lines:
          scalar_t w = (n_input_data[k] > 0) ? scalar_t(1) : weight_data[j];
          n_result_data[k] = w * n_input_data[k];
@ -578,9 +580,9 @@ void inline prelu_cpu_backward_kernel_multi_weights(
  auto weight_grad_collector_data = weight_grad_collector.data_ptr<scalar_t>();
  auto loop = [&](int64_t start, int64_t end) {
-    for (auto i = start; i < end; i++) {
+    for (const auto i : c10::irange(start, end)) {
-      for (auto j = 0; j < channel_size; j++) {
+      for (const auto j : c10::irange(channel_size)) {
-        for (auto k = 0; k < input_stride1; k++) {
+        for (const auto k : c10::irange(input_stride1)) {
          int64_t pos = i * input_stride0 + j * input_stride1 + k;
          scalar_t weight_data_val = weight_data[j];
          scalar_t input_data_val = input_data[pos];
--- a/aten/src/ATen/native/ConstantPadNd.cpp
+++ b/aten/src/ATen/native/ConstantPadNd.cpp
@ -1,5 +1,7 @@
 #include <ATen/ATen.h>
 #include <c10/util/irange.h>
 namespace at { namespace native {
 Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value) {
@ -20,7 +22,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
    bool all_pads_non_positive = true;
    auto c_input = self;
-    for (int i = l_diff; i < l_inp; i++) {
+    for (const auto i : c10::irange(l_diff, l_inp)) {
        auto pad_idx = 2 * (l_inp - i - 1);
        if (pad[pad_idx] < 0) {
            c_input = c_input.narrow(i, -pad[pad_idx], c_input.size(i) + pad[pad_idx]);
@ -69,7 +71,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
    output.fill_(value);
    auto c_output = output;
-    for (int i = l_diff; i < l_inp; i++) {
+    for (const auto i : c10::irange(l_diff, l_inp)) {
        auto pad_idx = 2 * (l_inp - i - 1);
        if (pad[pad_idx] > 0) {
            c_output = c_output.narrow(i, pad[pad_idx], c_output.size(i) - pad[pad_idx]);
--- a/aten/src/ATen/native/Convolution.cpp
+++ b/aten/src/ATen/native/Convolution.cpp
@ -7,6 +7,7 @@
 #include <ATen/native/xnnpack/Engine.h>
 #include <ATen/NativeFunctions.h>
 #include <c10/util/accumulate.h>
 #include <c10/util/irange.h>
 #include <ATen/Config.h>
 #include <c10/macros/Macros.h>
@ -489,7 +490,7 @@ static void check_shape_forward(const at::Tensor& input,
             ", expected bias to be 1-dimensional with ", weight_sizes[0], " elements",
             ", but got bias of size ", bias.sizes(), " instead");
-    for (int i = 2; i < k; ++i) {
+    for (const auto i : c10::irange(2, k)) {
      input_shape.push_back(input.size(i) + 2 * padding[i-2]);
      // log new kernel size considering dilation
      kernel_shape.push_back(dilation[i-2] * (weight_sizes[i]-1) + 1);
--- a/aten/src/ATen/native/Embedding.cpp
+++ b/aten/src/ATen/native/Embedding.cpp
@ -3,6 +3,8 @@
 #include <ATen/TensorUtils.h>
 #include <ATen/NativeFunctions.h>
 #include <c10/util/irange.h>
 #include <cstring>
 #include <memory>
 #include <sstream>
@ -97,10 +99,10 @@ Tensor embedding_dense_backward_cpu(
    std::unique_ptr<index_t[]> counts;
    if (scale_grad_by_freq) {
      counts.reset(new index_t[num_weights]);
-      for (int i = 0; i < numel; i++) {
+      for (const auto i : c10::irange(numel)) {
        counts[indices_data[i]] = 0;
      }
-      for (int i = 0; i < numel; i++) {
+      for (const auto i : c10::irange(numel)) {
        counts[indices_data[i]]++;
      }
    }
--- a/aten/src/ATen/native/EmbeddingBag.cpp
+++ b/aten/src/ATen/native/EmbeddingBag.cpp
@ -6,6 +6,8 @@
 #include <ATen/native/CPUBlas.h>
 #include <c10/util/irange.h>
 #ifdef USE_FBGEMM
 #include <fbgemm/Fbgemm.h>
 #else
@ -535,11 +537,11 @@ void embedding_bag_cpu_max_out(
    auto weight_stride1 = weight.strides()[1];
    auto output_stride = output.strides()[0];
-    for (int i = 0; i < numIndices; ++i) {
+    for (const auto i : c10::irange(numIndices)) {
      auto bag = offset2bag_data[i];
      auto word_idx = indices_data[i];
-      for (int dim = 0; dim < featureSize; dim++) {
+      for (const auto dim : c10::irange(featureSize)) {
        auto& current_item = output_data[output_stride * bag + dim];
        auto weight_item =
            weight_data[weight_stride0 * word_idx + dim * weight_stride1];
@ -751,7 +753,7 @@ static std::vector<index_t> compute_counts(
    index_t* indices_data,
    int64_t indices_length) {
  std::vector<index_t> counts(num_weights, 0);
-  for (int i = 0; i < indices_length; i++) {
+  for (const auto i : c10::irange(indices_length)) {
    counts[indices_data[i]]++;
  }
  return counts;
--- a/aten/src/ATen/native/ForeachUtils.h
+++ b/aten/src/ATen/native/ForeachUtils.h
@ -1,6 +1,8 @@
 #pragma once
 #include <ATen/ATen.h>
 #include <c10/util/irange.h>
 namespace at {
 namespace native {
 namespace {
@ -29,7 +31,7 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2) {
  auto expected_dtype = tensors1[0].dtype();
-  for (int i = 0; i < tensors1.size(); i++) {
+  for (const auto i : c10::irange(tensors1.size())) {
    TORCH_CHECK(tensors1[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
    TORCH_CHECK(tensors2[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
    TORCH_CHECK(tensors1[i].sizes() == tensors2[i].sizes(), "Corresponding tensors in lists must have the same size, got ", tensors1[i].sizes(), " and ", tensors2[i].sizes());
@ -45,7 +47,7 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, Te
  auto expected_dtype = tensors1[0].dtype();
-  for (int i = 0; i < tensors1.size(); i++) {
+  for (const auto i : c10::irange(tensors1.size())) {
    TORCH_CHECK(tensors1[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
    TORCH_CHECK(tensors2[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
    TORCH_CHECK(tensors1[i].sizes() == tensors2[i].sizes(), "Corresponding tensors in lists must have the same size, got ", tensors1[i].sizes(), " and ", tensors2[i].sizes());
--- a/aten/src/ATen/native/FractionalMaxPool3d.cpp
+++ b/aten/src/ATen/native/FractionalMaxPool3d.cpp
@ -2,6 +2,8 @@
 #include <ATen/NativeFunctions.h>
 #include <ATen/Parallel.h>
 #include <c10/util/irange.h>
 #include <tuple>
 #include <vector>
@ -20,7 +22,7 @@ static std::vector<int> generate_intervals(
    scalar_t alpha = static_cast<scalar_t>(inputSize - poolSize) /
      static_cast<scalar_t>(outputSize - 1);
-    for (int i = 0; i < outputSize - 1; ++i) {
+    for (const auto i : c10::irange(outputSize - 1)) {
      sequence[i] =
        static_cast<int>((i + sample) * alpha) - static_cast<int>(sample * alpha);
    }
--- a/aten/src/ATen/native/NaiveConvolutionTranspose2d.cpp
+++ b/aten/src/ATen/native/NaiveConvolutionTranspose2d.cpp
@ -5,6 +5,8 @@
 #include <ATen/native/CPUBlas.h>
 #include <ATen/native/im2col.h>
 #include <c10/util/irange.h>
 namespace at {
 namespace native {
@ -253,7 +255,7 @@ void slow_conv_transpose2d_out_cpu_template(
  AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Long,
      input.scalar_type(), "slow_conv_transpose2d_out_cpu", [&] {
        // For each elt in batch, do:
-        for (int elt = 0; elt < batch_size; elt++) {
+        for (const auto elt : c10::irange(batch_size)) {
          // Helpers
          Tensor input_n;
          Tensor output_n;
@ -448,7 +450,7 @@ static void slow_conv_transpose2d_backward_out_cpu_template(
        Tensor grad_output_n = Tensor();
        // For each elt in batch, do:
-        for (int elt = 0; elt < batch_size; elt++) {
+        for (const auto elt : c10::irange(batch_size)) {
          // Matrix mulitply per sample:
          grad_input_n = grad_input.select(0, elt);
          grad_output_n = grad_output.select(0, elt);
@ -639,7 +641,7 @@ void slow_conv_transpose2d_acc_grad_parameters_cpu(
        scalar_t scale = static_cast<scalar_t>(scale_);
        // For each elt in batch, do:
-        for (int elt = 0; elt < batch_size; elt++) {
+        for (const auto elt : c10::irange(batch_size)) {
          // Matrix mulitply per output:
          grad_output_n = grad_output.select(0, elt);
--- a/aten/src/ATen/native/NaiveDilatedConvolution.cpp
+++ b/aten/src/ATen/native/NaiveDilatedConvolution.cpp
@ -7,6 +7,7 @@
 #include <ATen/native/vol2col.h>
 #include <ATen/Utils.h>
 #include <c10/util/accumulate.h>
 #include <c10/util/irange.h>
 #include <tuple>
@ -204,7 +205,7 @@ void slow_conv_dilated_all_cpu_template(
    AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Long, input.scalar_type(), "slow_conv_dilated<>", [&] {
    // For each elt in batch, do:
-    for (int elt = 0; elt < batchSize; elt++) {
+    for (const auto elt : c10::irange(batchSize)) {
      // Matrix multiply per output:
      Tensor input_n = input.select(0, elt);
@ -234,7 +235,7 @@ void slow_conv_dilated_all_cpu_template(
          */
          // The following for-loop is equivalent to the above
          // gemm setup but avoids allocation of ones tensor:
-          for (int n = 0; n < nOutputPlane; n++) {
+          for (const auto n : c10::irange(nOutputPlane)) {
            output_n.select(0, n).fill_(bias[n]);
          }
        }
--- a/aten/src/ATen/native/NamedTensor.cpp
+++ b/aten/src/ATen/native/NamedTensor.cpp
@ -3,6 +3,8 @@
 #include <ATen/NamedTensorUtils.h>
 #include <c10/util/irange.h>
 #include <bitset>
 namespace at { namespace native {
@ -143,7 +145,7 @@ static Tensor align(const Tensor& tensor, DimnameList names, bool is_aligning_tw
 static int64_t countUnset(std::bitset<kMaxNamedTensorDim> set, int64_t up_to_idx) {
  int64_t result = 0;
-  for (auto i = 0; i < up_to_idx; ++i) {
+  for (const auto i : c10::irange(up_to_idx)) {
    if (!set.test(i)) result++;
  }
  return result;
@ -188,7 +190,7 @@ Tensor align_to(const Tensor& tensor, DimnameList order, int64_t ellipsis_idx) {
  // appears in the jth element of tensor.
  std::vector<int64_t> tensor_idx_for(order.size(), not_found);
-  for (auto order_idx = 0U; order_idx < order.size(); ++order_idx) {
+  for (const auto order_idx : c10::irange(order.size())) {
    const auto name = order[order_idx];
    TORCH_CHECK(name.isBasic(),
        "align_to: the desired order of dimensions cannot contain a None name, got ",
@ -233,7 +235,7 @@ Tensor align_to(const Tensor& tensor, DimnameList order, int64_t ellipsis_idx) {
  }
  // Fill in the ellipsis dimensions
-  for (auto tensor_idx = 0U; tensor_idx < tensor_dim; ++tensor_idx) {
+  for (const auto tensor_idx : c10::irange(tensor_dim)) {
    if (order_has_tensor_name.test(tensor_idx)) {
      continue;
    }
@ -259,7 +261,7 @@ Tensor align_to(const Tensor& tensor, DimnameList names) {
  std::vector<int64_t> new_sizes(names.size(), 1);
  std::vector<int64_t> new_strides(names.size(), 0);
-  for (auto idx = 0U; idx < tensor_names.size(); ++idx) {
+  for (const auto idx : c10::irange(tensor_names.size())) {
    const auto& dim = tensor_names[idx];
    TORCH_CHECK(dim.isBasic(),
        "align_to: All input dims must be named. Found unnamed dim at index ",
--- a/aten/src/ATen/native/PackedSequence.cpp
+++ b/aten/src/ATen/native/PackedSequence.cpp
@ -1,6 +1,8 @@
 #include <ATen/ATen.h>
 #include <ATen/NativeFunctions.h>
 #include <c10/util/irange.h>
 namespace at { namespace native {
 void checkLongTensor(const Tensor& tensor) {
@ -28,7 +30,7 @@ std::tuple<Tensor, Tensor> _pack_padded_sequence(const Tensor& _input, const Ten
  TORCH_CHECK(lengths[batch_size - 1] > 0,
           "Length of all samples has to be greater than 0, but found an element "
           "in 'lengths' that is <= 0");
-  for(auto i = 0; i < batch_size - 1; i++) {
+  for (const auto i : c10::irange(batch_size - 1)) {
    if (lengths[batch_size - 1 - i] > lengths[batch_size - 2 - i]) {
      // NB: enforce_sorted is implemented at a Python level, but the sortedness
      // check lives here. If enforce_sorted=False then this error should never
--- a/aten/src/ATen/native/QuantizedLinear.cpp
+++ b/aten/src/ATen/native/QuantizedLinear.cpp
@ -15,6 +15,8 @@
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
 #include <ATen/native/quantized/cpu/packed_params.h>
 #include <c10/util/irange.h>
 #ifdef USE_FBGEMM
 #include <fbgemm/Fbgemm.h>
 #include <fbgemm/FbgemmFP16.h>
@ -134,7 +136,7 @@ Tensor fbgemm_linear_int8_weight_fp32_activation(
    // This is the end of the pipeline, pass the resulting matrix through
    fbgemm::DoNothing<float, float> kDoNothingObj{};
-    for (int task_id = begin; task_id < end; ++task_id) {
+    for (const auto task_id : c10::irange(begin, end)) {
      // After the uint8 * int8 matrix multiplication is performed, this
      // operation does:
      //  1) Add in row and column offsets to the rows and columns, respectively
--- a/aten/src/ATen/native/ReduceOps.cpp
+++ b/aten/src/ATen/native/ReduceOps.cpp
@ -13,6 +13,8 @@
 #include <ATen/native/SharedReduceOps.h>
 #include <ATen/core/grad_mode.h>
 #include <c10/util/irange.h>
 #include <algorithm>
 #include <functional>
 #include <limits>
@ -371,7 +373,7 @@ Tensor cumprod_backward(const Tensor& grad, const Tensor& input, int64_t dim, co
    const Tensor ones = at::ones({1}, grad.options()).expand(ones_size);
    Tensor prods_from_k_plus_1;
    Tensor omitted_products;
-    for (int k = 0; k < dim_size; ++k) {
+    for (const auto k : c10::irange(dim_size)) {
      if (k == 0) {
        prods_from_k_plus_1 = at::cumprod(input_conj.slice(dim, k + 1), dim);
        omitted_products = at::cat({ones, prods_from_k_plus_1}, dim);
--- a/aten/src/ATen/native/TensorAdvancedIndexing.cpp
+++ b/aten/src/ATen/native/TensorAdvancedIndexing.cpp
@ -60,6 +60,8 @@
 #include <ATen/native/Copy.h>
 #include <ATen/Parallel.h>
 #include <c10/util/irange.h>
 #include <algorithm>
 #include <functional>
 #include <numeric>
@ -506,7 +508,7 @@ static void check_indexarray_range(
    const IndexType* indices,
    int64_t n,
    IndexType indexing_axis_dim) {
-  for (auto i = 0; i < n; ++i) {
+  for (const auto i : c10::irange(n)) {
    auto idx = indices[i];
    TORCH_CHECK(
        0 <= idx && idx < indexing_axis_dim,
--- a/aten/src/ATen/native/TensorIteratorReduce.cpp
+++ b/aten/src/ATen/native/TensorIteratorReduce.cpp
@ -5,6 +5,8 @@
 #include <ATen/Functions.h>
 #include <ATen/TensorOperators.h>
 #include <c10/util/irange.h>
 /// Contains the implementation of parallel reductions in TensorIterator.
 namespace at {
@ -136,7 +138,7 @@ void TensorIteratorBase::foreach_reduced_elt(loop_subiter_t loop, bool paralleli
    auto non_reduced_shape = shape.slice(reduce_dims, shape.size() - reduce_dims);
    int64_t non_reduced_numel = 1;
-    for (int i = 0; i < non_reduced_shape.size(); ++i) {
+    for (const auto i : c10::irange(non_reduced_shape.size())) {
      non_reduced_numel *= non_reduced_shape[i];
    }
    DimCounter dims {non_reduced_shape, {0, non_reduced_numel}};
--- a/aten/src/ATen/native/TestOps.cpp
+++ b/aten/src/ATen/native/TestOps.cpp
@ -4,6 +4,8 @@
 #include <ATen/NativeFunctions.h>
 #include <ATen/ScalarOps.h>
 #include <c10/util/irange.h>
 namespace at {
 namespace native {
@ -19,7 +21,7 @@ Tensor _test_optional_intlist(
  Tensor output = at::empty_like(values);
  auto inp = values.accessor<int,1>();
  auto out = output.accessor<int,1>();
-  for(int i = 0; i < values.size(0); ++i) {
+  for (const auto i : c10::irange(values.size(0))) {
    out[i] = inp[i] + addends->at(i);
  }
  return output;
@ -37,7 +39,7 @@ Tensor _test_optional_floatlist(
  Tensor output = at::empty_like(values);
  auto inp = values.accessor<float,1>();
  auto out = output.accessor<float,1>();
-  for(int i = 0; i < values.size(0); ++i) {
+  for (const auto i : c10::irange(values.size(0))) {
    out[i] = inp[i] + addends->at(i);
  }
  return output;
--- a/aten/src/ATen/native/quantized/QTensor.cpp
+++ b/aten/src/ATen/native/quantized/QTensor.cpp
@ -6,6 +6,8 @@
 #include <ATen/quantized/QTensorImpl.h>
 #include <ATen/quantized/Quantizer.h>
 #include <c10/util/irange.h>
 namespace at {
 namespace native {
@ -24,7 +26,7 @@ std::vector<Tensor> quantize_per_tensor_list_cpu(
    const Tensor& zero_points,
    ScalarType dtype) {
  std::vector<Tensor> quantized_tensors;
-  for (auto i = 0; i < tensors.size(); ++i) {
+  for (const auto i : c10::irange(tensors.size())) {
    quantized_tensors.push_back(at::quantize_per_tensor(
        tensors[i],
        scales[i].item<double>(),
@ -54,7 +56,7 @@ Tensor dequantize_quantized_cpu(const Tensor& self) {
 std::vector<Tensor> dequantize_tensors_quantized_cpu(TensorList tensors) {
  std::vector<Tensor> dequantized_tensors;
-  for (auto i = 0; i < tensors.size(); ++i) {
+  for (const auto i : c10::irange(tensors.size())) {
    dequantized_tensors.push_back(tensors[i].dequantize());
  }
  return dequantized_tensors;
--- a/aten/src/ATen/native/quantized/cpu/qconv.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qconv.cpp
@ -12,6 +12,8 @@
 #include <caffe2/utils/threadpool/pthreadpool-cpp.h>
 #include <torch/library.h>
 #include <c10/util/irange.h>
 namespace {
 // To have a sanity check for maximum matrix size.
 constexpr int64_t kReasonableMaxDim = 1000000;
@ -453,7 +455,7 @@ at::Tensor PackedConvWeight<kSpatialDim>::apply_impl(
  const int num_tasks = at::get_num_threads();
  at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
    fbgemm::DoNothing<> kNoOpObj{};
-    for (int task_id = begin; task_id < end; ++task_id) {
+    for (const auto task_id : c10::irange(begin, end)) {
      if (q_scheme == c10::kPerTensorAffine) {
        fbgemm::ReQuantizeOutput<
            kReluFused,
--- a/aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
@ -10,6 +10,8 @@
 #include <ATen/quantized/Quantizer.h>
 #include <torch/library.h>
 #include <c10/util/irange.h>
 #ifdef USE_FBGEMM
 template <int kSpatialDim>
 c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeight<
@ -114,7 +116,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeight<
  const int output_channels_per_group = output_channels / groups;
  const int inner_size =
      kernel_d * kernel_h * kernel_w * input_channels_per_group;
-  for (int g = 0; g < groups; ++g) {
+  for (const auto g : c10::irange(groups)) {
    for (int i = 0; i < output_channels_per_group; ++i) {
      const int c = g * output_channels_per_group + i;
      int32_t sum = 0;
--- a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp
@ -4,6 +4,8 @@
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
 #include <torch/library.h>
 #include <c10/util/irange.h>
 torch::class_<EmbeddingPackedParamsBase> register_embedding_params();
 /*
@ -271,7 +273,7 @@ Tensor _qembeddingbag_nbit_prepack_helper(
      output_row_scale_zp[1] = Xmin;
      // Pack the weight values.
-      for (int col = 0; col < embedding_cols; ++col) {
+      for (const auto col : c10::irange(embedding_cols)) {
        float X = input_row[col];
        std::uint8_t quantized = std::max(
            0,
--- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp
@ -8,6 +8,8 @@
 #include <torch/custom_class.h>
 #include <torch/library.h>
 #include <c10/util/irange.h>
 #include <algorithm>
 #include <string>
@ -65,7 +67,7 @@ at::Tensor PackedLinearWeight::apply_impl(
    // Process the per channel quantization.
    output_multiplier_float.resize(N, 0.0);
    act_times_w_scale.resize(N, 1.0f);
-    for (int i = 0; i < N; ++i) {
+    for (const auto i : c10::irange(N)) {
      act_times_w_scale[i] = (input_scale_float * w_scale[i]);
      output_multiplier_float[i] =
          act_times_w_scale[i] / static_cast<float>(output_scale);
@ -101,7 +103,7 @@ at::Tensor PackedLinearWeight::apply_impl(
  int num_tasks = at::get_num_threads();
  at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
-    for (int task_id = begin; task_id < end; ++task_id) {
+    for (const auto task_id : c10::irange(begin, end)) {
      // This operation does the following:
      // 1) Creates a "row buffer" vector with offset values that must be
      //    added to the integer matrix multiplication operation to ensure
--- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
@ -10,6 +10,8 @@
 #include <torch/custom_class.h>
 #include <c10/util/irange.h>
 #include <algorithm>
 #include <string>
@ -135,7 +137,7 @@ at::Tensor PackedLinearWeight::apply_dynamic_impl(at::Tensor input, bool reduce_
    // This is the end of the pipeline, pass the resulting matrix through.
    fbgemm::DoNothing<float, float> doNothingObj{};
-    for (int task_id = begin; task_id < end; ++task_id) {
+    for (const auto task_id : c10::irange(begin, end)) {
      if (q_scheme == c10::kPerTensorAffine) {
        // Process the per tensor quantization.
        //
--- a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
@ -8,6 +8,9 @@
 #include <ATen/quantized/Quantizer.h>
 #include <torch/custom_class.h>
 #include <torch/library.h>
 #include <c10/util/irange.h>
 #include <algorithm>
 #include <vector>
@ -59,7 +62,7 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeight::prepack(
    weight_zero_points_int32[0] = weight.q_zero_point();
  } else if (qtype == c10::kPerChannelAffine) {
    weight_zero_points_int32.resize(N, 0);
-    for (int i = 0; i < N; ++i) {
+    for (const auto i : c10::irange(N)) {
      weight_zero_points_int32[i] =
          weight.q_per_channel_zero_points()[i].item<int32_t>();
    }
@ -69,7 +72,7 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeight::prepack(
    weight_scales_float[0] = weight.q_scale();
  } else if (qtype == c10::kPerChannelAffine) {
    weight_scales_float.resize(N, 0.0);
-    for (int i = 0; i < N; ++i) {
+    for (const auto i : c10::irange(N)) {
      weight_scales_float[i] = weight.q_per_channel_scales()[i].item<float>();
    }
  }
--- a/aten/src/ATen/native/quantized/cpu/qupsample_nearest2d.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qupsample_nearest2d.cpp
@ -3,6 +3,9 @@
 #include <ATen/native/cpu/Loops.h>
 #include <ATen/native/quantized/cpu/quantized_ops.h>
 #include <ATen/quantized/Quantizer.h>
 #include <c10/util/irange.h>
 #include <cstring>
@ -70,7 +73,7 @@ static void upsample_nearest2d_out_frame_nhwc(
  float height_scale = compute_scales_value<float>(scales_h, input_height, output_height);
  float width_scale = compute_scales_value<float>(scales_w, input_width, output_width);
-  for (int b = 0; b < nbatch; b++) {
+  for (const auto b : c10::irange(nbatch)) {
    auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(idata + b * input_height * input_width * channels);
    auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata + b * output_height * output_width * channels);
    // special case: just copy
--- a/aten/src/ATen/native/quantized/cpu/qupsample_nearest3d.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qupsample_nearest3d.cpp
@ -3,6 +3,9 @@
 #include <ATen/native/cpu/Loops.h>
 #include <ATen/native/quantized/cpu/quantized_ops.h>
 #include <ATen/quantized/Quantizer.h>
 #include <c10/util/irange.h>
 #include <cstring>
@ -83,7 +86,7 @@ static void upsample_nearest3d_out_frame_nhwc(
  float height_scale = compute_scales_value<float>(scales_h, input_height, output_height);
  float width_scale = compute_scales_value<float>(scales_w, input_width, output_width);
-  for (int b = 0; b < nbatch; b++) {
+  for (const auto b : c10::irange(nbatch)) {
    auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(idata + b * input_depth * input_height * input_width * channels);
    auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata + b * output_depth * output_height * output_width * channels);
    // special case: just copy
--- a/aten/src/ATen/native/quantized/fake_quant_per_channel_affine.cpp
+++ b/aten/src/ATen/native/quantized/fake_quant_per_channel_affine.cpp
@ -5,6 +5,8 @@
 #include <ATen/native/cpu/Loops.h>
 #include <ATen/native/quantized/fake_quant_affine.h>
 #include <c10/util/irange.h>
 // FakeQuantize Op for PerChannelAffine quantization scheme.
 namespace at {
 namespace native {
@ -243,10 +245,10 @@ std::tuple<Tensor, Tensor, Tensor> _fake_quantize_learnable_per_channel_affine_b
  // Create a collection of axes that include all but the channel axis for
  // reduction when summing over the dScale and dZeroPoint tensors.
  int64_t* axis_for_reduction = (int64_t*) calloc(numElements, sizeof(int64_t));
-  for (int i = 0; i < axis; ++i) {
+  for (const auto i : c10::irange(axis)) {
    axis_for_reduction[i] = i;
  }
-  for (int i = axis; i < numElements; ++i) {
+  for (const auto i : c10::irange(axis, numElements)) {
    axis_for_reduction[i] = i + 1;
  }
--- a/aten/src/ATen/test/NamedTensor_test.cpp
+++ b/aten/src/ATen/test/NamedTensor_test.cpp
@ -5,6 +5,7 @@
 #include <ATen/TensorNames.h>
 #include <c10/util/Exception.h>
 #include <c10/util/C++17.h>
 #include <c10/util/irange.h>
 using at::Dimname;
 using at::DimnameList;
@ -38,7 +39,7 @@ static bool dimnames_equal(at::DimnameList names, at::DimnameList other) {
  if (names.size() != other.size()) {
    return false;
  }
-  for (auto i = 0; i < names.size(); i++) {
+  for (const auto i : c10::irange(names.size())) {
    const auto& name = names[i];
    const auto& other_name = other[i];
    if (name.type() != other_name.type() || name.symbol() != other_name.symbol()) {
--- a/c10/benchmark/intrusive_ptr_benchmark.cpp
+++ b/c10/benchmark/intrusive_ptr_benchmark.cpp
@ -1,4 +1,5 @@
 #include <c10/util/intrusive_ptr.h>
 #include <c10/util/irange.h>
 #include <benchmark/benchmark.h>
 #include <memory>
@ -45,10 +46,10 @@ static void BM_IntrusivePtrArray(benchmark::State& state) {
  const size_t kLength = state.range(0);
  std::vector<intrusive_ptr<Foo> > vararray(kLength);
  while (state.KeepRunning()) {
-    for (int i = 0; i < kLength; ++i) {
+    for (const auto i : c10::irange(kLength)) {
      vararray[i] = var;
    }
-    for (int i = 0; i < kLength; ++i) {
+    for (const auto i : c10::irange(kLength)) {
      vararray[i].reset();
    }
  }
@ -60,10 +61,10 @@ static void BM_SharedPtrArray(benchmark::State& state) {
  const size_t kLength = state.range(0);
  std::vector<std::shared_ptr<Bar> > vararray(kLength);
  while (state.KeepRunning()) {
-    for (int i = 0; i < kLength; ++i) {
+    for (const auto i : c10::irange(kLength)) {
      vararray[i] = var;
    }
-    for (int i = 0; i < kLength; ++i) {
+    for (const auto i : c10::irange(kLength)) {
      vararray[i].reset();
    }
  }
--- a/c10/mobile/CPUProfilingAllocator.cpp
+++ b/c10/mobile/CPUProfilingAllocator.cpp
@ -1,6 +1,7 @@
 #include <climits>
 #include <c10/mobile/CPUProfilingAllocator.h>
 #include <c10/util/irange.h>
 namespace c10 {
@ -304,7 +305,7 @@ void AllocationPlanner::formulate_plan() {
    formulate_greedy_allocation_plan(
        allocation_plan_->allocation_sizes, allocation_plan_->allocation_lifetimes);
  allocation_plan_->total_size = 0;
-  for (auto i = 0; i < allocation_plan_->allocation_sizes.size(); ++i) {
+  for (const auto i : c10::irange(allocation_plan_->allocation_sizes.size())) {
    if (allocation_plan_->allocation_lifetimes[i] ==
        std::numeric_limits<uint64_t>::max()) {
      continue;
--- a/c10/test/core/impl/SizesAndStrides_test.cpp
+++ b/c10/test/core/impl/SizesAndStrides_test.cpp
@ -1,6 +1,7 @@
 #include <gtest/gtest.h>
 #include <c10/core/impl/SizesAndStrides.h>
 #include <c10/util/irange.h>
 using namespace c10;
 using namespace c10::impl;
@ -55,7 +56,7 @@ TEST(SizesAndStridesTest, Resize) {
  sz.resize(5);
  checkData(sz, {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0});
-  for (int ii = 0; ii < sz.size(); ++ii) {
+  for (const auto ii : c10::irange(sz.size())) {
    sz.size_at_unchecked(ii) = ii + 1;
    sz.stride_at_unchecked(ii) = 2 * (ii + 1);
  }
@ -113,7 +114,7 @@ TEST(SizesAndStridesTest, Resize) {
  // Give it different data than it had when it was small to avoid
  // getting it right by accident (i.e., because of leftover inline
  // storage when going small to big).
-  for (int ii = 0; ii < sz.size(); ++ii) {
+  for (const auto ii : c10::irange(sz.size())) {
    sz.size_at_unchecked(ii) = ii - 1;
    sz.stride_at_unchecked(ii) = 2 * (ii - 1);
  }
@ -175,7 +176,7 @@ TEST(SizesAndStridesTest, SetViaData) {
 static SizesAndStrides makeSmall(int offset = 0) {
  SizesAndStrides small;
  small.resize(3);
-  for (int ii = 0; ii < small.size(); ++ii) {
+  for (const auto ii : c10::irange(small.size())) {
    small.size_at_unchecked(ii) = ii + 1 + offset;
    small.stride_at_unchecked(ii) = 2 * (ii + 1 + offset);
  }
@ -186,7 +187,7 @@ static SizesAndStrides makeSmall(int offset = 0) {
 static SizesAndStrides makeBig(int offset = 0) {
  SizesAndStrides big;
  big.resize(8);
-  for (int ii = 0; ii < big.size(); ++ii) {
+  for (const auto ii : c10::irange(big.size())) {
    big.size_at_unchecked(ii) = ii - 1 + offset;
    big.stride_at_unchecked(ii) = 2 * (ii - 1 + offset);
  }
--- a/c10/util/irange.h
+++ b/c10/util/irange.h
@ -4,6 +4,7 @@
 #include <c10/util/Exception.h>
 #include <algorithm>
 #include <iterator>
 #include <limits>
 #include <type_traits>
@ -12,7 +13,7 @@ namespace c10 {
 namespace detail {
-template <typename I, std::enable_if_t<std::is_integral<I>{}, int> = 0>
+template <typename I, typename std::enable_if<std::is_integral<I>::value, int>::type = 0>
 struct integer_iterator : std::iterator<std::input_iterator_tag, I> {
    explicit integer_iterator(I value) : value(value) {}
@ -45,7 +46,7 @@ struct integer_iterator : std::iterator<std::input_iterator_tag, I> {
 } // namespace detail
-template <typename I, std::enable_if_t<std::is_integral<I>{}, bool> = true>
+template <typename I, typename std::enable_if<std::is_integral<I>::value, bool>::type = true>
 struct integer_range {
 public:
    integer_range(I begin, I end) : begin_(begin), end_(end) {}
@ -64,8 +65,8 @@ struct integer_range {
 template <
    typename Integer1,
    typename Integer2,
-    std::enable_if_t<std::is_integral<Integer1>::value, bool> = true,
+    typename std::enable_if<std::is_integral<Integer1>::value, bool>::type = true,
-    std::enable_if_t<std::is_integral<Integer2>::value, bool> = true
+    typename std::enable_if<std::is_integral<Integer2>::value, bool>::type = true
 >
 integer_range<Integer2> irange(Integer1 begin, Integer2 end) {
    //If end<=begin then the range is empty; we can achieve this effect by
@ -75,10 +76,11 @@ integer_range<Integer2> irange(Integer1 begin, Integer2 end) {
 /// Creates an integer range for the half-open interval [0, end)
 /// If end<=begin, then the range is empty
-template <typename Integer, std::enable_if_t<std::is_integral<Integer>::value, bool> = true>
+template <typename Integer, typename std::enable_if<std::is_integral<Integer>::value, bool>::type = true>
 integer_range<Integer> irange(Integer end) {
    //If end<=begin then the range is empty; we can achieve this effect by
    //choosing the larger of {0, end} as the loop terminator
    //Handles the case where end<0. irange only works for ranges >=0
    return {Integer(), std::max(Integer(), end)};
 }
--- a/caffe2/core/blob_serialization.cc
+++ b/caffe2/core/blob_serialization.cc
@ -4,6 +4,7 @@
 #include <sstream>
 #include <utility>
 #include <c10/util/irange.h>
 #include <c10/util/string_view.h>
 #include "caffe2/core/blob.h"
@ -249,7 +250,7 @@ void TensorSerializer::SerializeWithOptions(
  std::vector<std::future<void>> futures;
  if (tensor.numel() > chunk_size) {
    futures.reserve(FLAGS_caffe2_max_tensor_serializer_threads);
-    for (int i = 0; i < FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
+    for (const auto i : c10::irange(FLAGS_caffe2_max_tensor_serializer_threads)) {
      futures.emplace_back(std::async(std::launch::async, task));
    }
  }
@ -449,7 +450,7 @@ void TensorSerializer::Serialize(
  proto.mutable_segment()->set_begin(chunkBegin);
  proto.mutable_segment()->set_end(chunkBegin + chunkSize);
-  for (int i = 0; i < input.dim(); ++i) {
+  for (const auto i : c10::irange(input.dim())) {
    proto.add_dims(input.size(i));
  }
  StoreDeviceDetail(input, &proto);
@ -479,7 +480,7 @@ void TensorSerializer::Serialize(
      proto.mutable_string_data()->Reserve(chunkSize);
      if (chunkSize > 0) {
        const char* raw_data = static_cast<const char*>(input.raw_data());
-        for (int i = chunkBegin; i < chunkBegin + chunkSize; ++i) {
+        for (const auto i : c10::irange(chunkBegin, chunkBegin + chunkSize)) {
          proto.add_string_data(SerializeBlob(
              raw_data + i * input.itemsize(), input.dtype(), ""));
        }
@ -803,7 +804,7 @@ DESERIALIZE_IMPL(std::string, FMT_PROTOBUF) {
      params.dest.size(),
      " != ",
      params.tensor_proto.string_data().size());
-  for (int i = 0; i < params.dest.size(); ++i) {
+  for (const auto i : c10::irange(params.dest.size())) {
    params.dest[i] = params.tensor_proto.string_data(i);
  }
 }
@ -910,7 +911,7 @@ void DeserializeTensor(
    case TensorProto_DataType_UNDEFINED: {
      Blob temp_blob;
      void* raw_ptr = nullptr;
-      for (int i = 0; i < chunkSize; ++i) {
+      for (const auto i : c10::irange(chunkSize)) {
        DeserializeBlob(tensor_proto.string_data(i), &temp_blob);
        if (i == 0) {
          raw_ptr = tensor->raw_mutable_data(temp_blob.meta());
--- a/caffe2/core/operator_schema.cc
+++ b/caffe2/core/operator_schema.cc
@ -1,6 +1,8 @@
 #include "caffe2/core/operator_schema.h"
 #include "caffe2/core/logging.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 OpSchema::OpSchema(const string& type, const string& file, const int line)
@ -256,7 +258,7 @@ OpSchema& OpSchema::IdenticalTypeAndShapeOfMultipleInputs(
  return TensorInferenceFunction(
      [indices](const OperatorDef&, const vector<TensorShape>& input_types) {
        vector<TensorShape> out(indices.size());
-        for (int i = 0; i < indices.size(); i++) {
+        for (const auto i : c10::irange(indices.size())) {
          out[i] = input_types[indices.at(i)];
        }
        return out;
--- a/caffe2/core/prof_dag_counters.cc
+++ b/caffe2/core/prof_dag_counters.cc
@ -4,6 +4,8 @@
 #include <ostream>
 #include <sstream>
 #include <c10/util/irange.h>
 namespace caffe2 {
 ProfDAGCounters::ProfDAGCounters(const std::shared_ptr<const NetDef>& net_def) {
@ -81,7 +83,7 @@ void ProfDAGCounters::ReportRunEnd() {
  CaffeMap<std::string, float> cum_per_type_time_run;
  CaffeMap<std::string, float> cum_per_type_invocations_run;
  std::vector<float> per_op_time_run(report_.op_types_.size(), 0.0);
-  for (auto op_id = 0U; op_id < report_.op_types_.size(); ++op_id) {
+  for (const auto op_id : c10::irange(report_.op_types_.size())) {
    // check that we have valid times, otherwise return;
    // times might not be valid if network execution ended prematurely
    // because of operator errors
@ -109,7 +111,7 @@ void ProfDAGCounters::ReportRunEnd() {
  // all operator times are valid, update report stats
  report_.runtime_stats_ += ProfDAGStats(runtime);
-  for (auto op_id = 0U; op_id < report_.op_types_.size(); ++op_id) {
+  for (const auto op_id : c10::irange(report_.op_types_.size())) {
    report_.time_per_op_total_[op_id] += ProfDAGStats(per_op_time_run[op_id]);
  }
@ -159,7 +161,7 @@ ProfDAGProtos ProfDAGReport::GetPerOperatorCost() const {
  ProfDAGProtos prof_dag_protos;
  prof_dag_protos.set_net_name(net_name_);
  if (hasStats()) {
-    for (auto op_id = 0U; op_id < op_types_.size(); op_id++) {
+    for (const auto op_id : c10::irange(op_types_.size())) {
      const string& op_type = op_types_[op_id];
      auto buf = prof_dag_protos.add_stats();
      std::string op_output_name =
@ -208,7 +210,7 @@ ProfDAGReport& ProfDAGReport::operator+=(const ProfDAGReport& rhs) {
      op_types_.size(),
      rhs.op_types_.size(),
      "Incompatible nets to add counters");
-  for (auto idx = 0U; idx < op_types_.size(); ++idx) {
+  for (const auto idx : c10::irange(op_types_.size())) {
    CAFFE_ENFORCE_EQ(
        op_types_[idx],
        rhs.op_types_[idx],
@ -229,7 +231,7 @@ ProfDAGReport& ProfDAGReport::operator+=(const ProfDAGReport& rhs) {
  }
  // Do the addition
-  for (auto idx = 0U; idx < time_per_op_total_.size(); ++idx) {
+  for (const auto idx : c10::irange(time_per_op_total_.size())) {
    time_per_op_total_[idx] += rhs.time_per_op_total_.at(idx);
  }
  for (auto& item : time_per_op_type_total_) {
--- a/caffe2/operators/generate_proposals_op_test.cc
+++ b/caffe2/operators/generate_proposals_op_test.cc
@ -6,6 +6,8 @@
 #include "caffe2/operators/generate_proposals_op_util_boxes.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 static void AddConstInput(
@ -719,7 +721,7 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotated) {
  // Verify that the resulting angles are correct
  auto rois_data =
      Eigen::Map<const ERMatXf>(rois.data<float>(), rois.size(0), rois.size(1));
-  for (int i = 0; i < rois.size(0); ++i) {
+  for (const auto i : c10::irange(rois.size(0))) {
    EXPECT_LE(std::abs(rois_data(i, 5) - expected_angle), 1e-4);
  }
 }
--- a/caffe2/operators/generate_proposals_op_util_boxes.h
+++ b/caffe2/operators/generate_proposals_op_util_boxes.h
@ -4,6 +4,8 @@
 #include "caffe2/utils/eigen_utils.h"
 #include "caffe2/utils/math.h"
 #include <c10/util/irange.h>
 // Bounding box utils for generate_proposals_op
 // Reference: facebookresearch/Detectron/detectron/utils/boxes.py
@ -148,7 +150,7 @@ EArrXXt<typename Derived1::Scalar> bbox_transform_rotated(
    const int period = angle_bound_hi - angle_bound_lo;
    CAFFE_ENFORCE(period > 0 && period % 180 == 0);
    auto angles = pred_boxes.col(4);
-    for (int i = 0; i < angles.size(); ++i) {
+    for (const auto i : c10::irange(angles.size())) {
      if (angles[i] < angle_bound_lo) {
        angles[i] += T(period);
      } else if (angles[i] > angle_bound_hi) {
--- a/caffe2/operators/generate_proposals_op_util_nms.h
+++ b/caffe2/operators/generate_proposals_op_util_nms.h
@ -8,6 +8,8 @@
 #include "caffe2/utils/eigen_utils.h"
 #include "caffe2/utils/math.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 namespace utils {
@ -148,7 +150,7 @@ std::vector<int> soft_nms_cpu_upright(
    EArrX ovr = inter / (areas[i] + GetSubArray(areas, rest_indices) - inter);
    // Update scores based on computed IoU, overlap threshold and NMS method
-    for (int j = 0; j < rest_indices.size(); ++j) {
+    for (const auto j : c10::irange(rest_indices.size())) {
      typename Derived2::Scalar weight;
      switch (method) {
        case 1: // Linear
@ -569,7 +571,7 @@ std::vector<int> nms_cpu_rotated(
        order.data() + 1, order.size() - 1);
    EArrX inter(rest_indices.size());
-    for (int j = 0; j < rest_indices.size(); ++j) {
+    for (const auto j : c10::irange(rest_indices.size())) {
      inter[j] = rotated_rect_intersection(
          rotated_rects[i], rotated_rects[rest_indices[j]]);
    }
@ -638,7 +640,7 @@ std::vector<int> soft_nms_cpu_rotated(
    std::swap(pending(0), pending(max_pos));
    const auto& rest_indices = pending.tail(pending.size() - 1);
    EArrX inter(rest_indices.size());
-    for (int j = 0; j < rest_indices.size(); ++j) {
+    for (const auto j : c10::irange(rest_indices.size())) {
      inter[j] = rotated_rect_intersection(
          rotated_rects[i], rotated_rects[rest_indices[j]]);
    }
@ -646,7 +648,7 @@ std::vector<int> soft_nms_cpu_rotated(
    // Update scores based on computed IoU, overlap threshold and NMS method
    // TODO (viswanath): Should angle info be included as well while filtering?
-    for (int j = 0; j < rest_indices.size(); ++j) {
+    for (const auto j : c10::irange(rest_indices.size())) {
      typename Derived2::Scalar weight;
      switch (method) {
        case 1: // Linear
--- a/caffe2/operators/generate_proposals_op_util_nms_test.cc
+++ b/caffe2/operators/generate_proposals_op_util_nms_test.cc
@ -3,6 +3,8 @@
 #include <gtest/gtest.h>
 #include <c10/util/irange.h>
 namespace caffe2 {
 TEST(UtilsNMSTest, TestNMS) {
@ -18,7 +20,7 @@ TEST(UtilsNMSTest, TestNMS) {
  // test utils::nms_cpu without indices input
  auto proposals = input.block(0, 0, input.rows(), 4);
  auto scores = input.col(4);
-  for (int i = 0; i < input_thresh.size(); i++) {
+  for (const auto i : c10::irange(input_thresh.size())) {
    auto cur_out = utils::nms_cpu(
        proposals, scores, input_thresh[i], true /* legacy_plus_one */);
    EXPECT_EQ(output_gt[i], cur_out);
@ -31,7 +33,7 @@ TEST(UtilsNMSTest, TestNMS) {
      indices.data(),
      indices.data() + indices.size(),
      [&scores](int lhs, int rhs) { return scores(lhs) > scores(rhs); });
-  for (int i = 0; i < input_thresh.size(); i++) {
+  for (const auto i : c10::irange(input_thresh.size())) {
    auto cur_out = utils::nms_cpu(
        proposals,
        scores,
@ -45,7 +47,7 @@ TEST(UtilsNMSTest, TestNMS) {
  // test utils::nms_cpu with topN
  std::vector<int> top_n = {1, 1, 2, 2, 3};
  auto gt_out = output_gt;
-  for (int i = 0; i < input_thresh.size(); i++) {
+  for (const auto i : c10::irange(input_thresh.size())) {
    auto cur_out = utils::nms_cpu(
        proposals,
        scores,
@ -149,7 +151,7 @@ TEST(UtilsNMSTest, TestSoftNMS) {
      9.99834776e-01, 9.99737203e-01;
  Eigen::ArrayXf out_scores;
-  for (int i = 0; i < method.size(); ++i) {
+  for (const auto i : c10::irange(method.size())) {
    LOG(INFO) << "Testing SoftNMS with method=" << method[i]
              << ", overlap_thresh=" << overlap_thresh[i];
    const auto& expected_scores = scores_gt.col(i);
@ -254,7 +256,7 @@ TEST(UtilsNMSTest, TestNMSRotatedAngle0) {
  proposals.col(3) = input.col(3) - input.col(1) + 1.0; // h = y2 - y1 + 1
  auto scores = input.col(4);
-  for (int i = 0; i < input_thresh.size(); i++) {
+  for (const auto i : c10::irange(input_thresh.size())) {
    auto cur_out = utils::nms_cpu(
        proposals, scores, input_thresh[i], true /* legacy_plus_one */);
    EXPECT_EQ(output_gt[i], cur_out);
@ -267,7 +269,7 @@ TEST(UtilsNMSTest, TestNMSRotatedAngle0) {
      indices.data(),
      indices.data() + indices.size(),
      [&scores](int lhs, int rhs) { return scores(lhs) > scores(rhs); });
-  for (int i = 0; i < input_thresh.size(); i++) {
+  for (const auto i : c10::irange(input_thresh.size())) {
    auto cur_out = utils::nms_cpu(
        proposals,
        scores,
@ -281,7 +283,7 @@ TEST(UtilsNMSTest, TestNMSRotatedAngle0) {
  // test utils::nms_cpu with topN
  std::vector<int> top_n = {1, 1, 2, 2, 3};
  auto gt_out = output_gt;
-  for (int i = 0; i < input_thresh.size(); i++) {
+  for (const auto i : c10::irange(input_thresh.size())) {
    auto cur_out = utils::nms_cpu(
        proposals,
        scores,
@ -342,7 +344,7 @@ TEST(UtilsNMSTest, TestSoftNMSRotatedAngle0) {
      9.99834776e-01, 9.99737203e-01;
  Eigen::ArrayXf out_scores;
-  for (int i = 0; i < method.size(); ++i) {
+  for (const auto i : c10::irange(method.size())) {
    LOG(INFO) << "Testing SoftNMS with method=" << method[i]
              << ", overlap_thresh=" << overlap_thresh[i];
    const auto& expected_scores = scores_gt.col(i);
--- a/caffe2/operators/half_float_ops_test.cc
+++ b/caffe2/operators/half_float_ops_test.cc
@ -5,6 +5,8 @@
 #include "caffe2/operators/half_float_ops.h"
 #include "caffe2/utils/conversions.h"
 #include <c10/util/irange.h>
 #include <gtest/gtest.h>
 C10_DECLARE_string(caffe_test_root);
@ -57,7 +59,7 @@ TEST(Float16, SimpleTest) {
  const TensorCPU& resultTensor = resultBlob->Get<Tensor>();
  EXPECT_EQ(resultTensor.numel(), 5);
-  for (auto i = 0; i < data.size(); ++i) {
+  for (const auto i : c10::irange(data.size())) {
    EXPECT_NEAR(resultTensor.data<float>()[i], data[i], 0.01);
  }
 }
@ -71,7 +73,7 @@ TEST(Float16, UniformDistributionTest) {
  int64_t size = 5000000L;
  std::vector<int64_t> shape = {size, 32};
  long tot_size = shape[0];
-  for (int i = 1; i < shape.size(); i++) {
+  for (const auto i : c10::irange(1, shape.size())) {
    tot_size *= shape[i];
  }
  caffe2::AddArgument<std::vector<int64_t>>("shape", shape, &def);
--- a/caffe2/operators/string_ops_test.cc
+++ b/caffe2/operators/string_ops_test.cc
@ -3,6 +3,8 @@
 #include "caffe2/operators/string_ops.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 class StringJoinOpTest : public testing::Test {
@ -43,7 +45,7 @@ TEST_F(StringJoinOpTest, testString1DJoin) {
  auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
  tensor->Resize(input.size());
  auto* data = tensor->template mutable_data<std::string>();
-  for (int i = 0; i < input.size(); ++i) {
+  for (const auto i : c10::irange(input.size())) {
    *data++ = input[i];
  }
@ -63,8 +65,8 @@ TEST_F(StringJoinOpTest, testString2DJoin) {
  auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
  tensor->Resize(input.size(), input[0].size());
  auto* data = tensor->template mutable_data<std::string>();
-  for (int i = 0; i < input.size(); ++i) {
+  for (const auto i : c10::irange(input.size())) {
-    for (int j = 0; j < input[0].size(); ++j) {
+    for (const auto j : c10::irange(input[0].size())) {
      *data++ = input[i][j];
    }
  }
@ -83,7 +85,7 @@ TEST_F(StringJoinOpTest, testFloat1DJoin) {
  auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
  tensor->Resize(input.size());
  auto* data = tensor->template mutable_data<float>();
-  for (int i = 0; i < input.size(); ++i) {
+  for (const auto i : c10::irange(input.size())) {
    *data++ = input[i];
  }
@ -103,8 +105,8 @@ TEST_F(StringJoinOpTest, testFloat2DJoin) {
  auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
  tensor->Resize(input.size(), input[0].size());
  auto* data = tensor->template mutable_data<float>();
-  for (int i = 0; i < input.size(); ++i) {
+  for (const auto i : c10::irange(input.size())) {
-    for (int j = 0; j < input[0].size(); ++j) {
+    for (const auto j : c10::irange(input[0].size())) {
      *data++ = input[i][j];
    }
  }
@ -123,8 +125,8 @@ TEST_F(StringJoinOpTest, testLong2DJoin) {
  auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
  tensor->Resize(input.size(), input[0].size());
  auto* data = tensor->template mutable_data<int64_t>();
-  for (int i = 0; i < input.size(); ++i) {
+  for (const auto i : c10::irange(input.size())) {
-    for (int j = 0; j < input[0].size(); ++j) {
+    for (const auto j : c10::irange(input[0].size())) {
      *data++ = input[i][j];
    }
  }
--- a/caffe2/operators/text_file_reader_utils.cc
+++ b/caffe2/operators/text_file_reader_utils.cc
@ -5,13 +5,15 @@
 #include <cstring>
 #include <sstream>
 #include <c10/util/irange.h>
 namespace caffe2 {
 Tokenizer::Tokenizer(const std::vector<char>& delims, char escape)
    : escape_(escape) {
  reset();
  std::memset(delimTable_, 0, sizeof(delimTable_));
-  for (int i = 0; i < delims.size(); ++i) {
+  for (const auto i : c10::irange(delims.size())) {
    delimTable_[(unsigned char)delims.at(i)] = i + 1;
  }
 }
--- a/caffe2/operators/text_file_reader_utils_test.cc
+++ b/caffe2/operators/text_file_reader_utils_test.cc
@ -9,6 +9,8 @@
 #include "caffe2/operators/text_file_reader_utils.h"
 #include "caffe2/utils/string_utils.h"
 #include <c10/util/irange.h>
 #include <cstdio>
 #include <cstdlib>
@ -31,20 +33,20 @@ TEST(TextFileReaderUtilsTest, TokenizeTest) {
                                                       {1, "Second"}};
  EXPECT_EQ(expected.size(), tokenized.tokens().size());
-  for (int i = 0; i < expected.size(); ++i) {
+  for (const auto i : c10::irange(expected.size())) {
    const auto& token = tokenized.tokens().at(i);
    EXPECT_EQ(expected.at(i).first, token.startDelimId);
    EXPECT_EQ(expected.at(i).second, std::string(token.start, token.end));
  }
  // try each of the subsplits
-  for (int i = 0; i < ch.size() - 1; ++i) {
+  for (const auto i : c10::irange(ch.size() - 1)) {
    tokenizer.reset();
    char* mid = &ch.front() + i;
    tokenizer.next(&ch.front(), mid, tokenized);
    EXPECT_GE(expected.size(), tokenized.tokens().size());
-    for (int j = 0; j < tokenized.tokens().size(); ++j) {
+    for (const auto j : c10::irange(tokenized.tokens().size())) {
      const auto& token = tokenized.tokens().at(j);
      EXPECT_EQ(expected.at(j).first, token.startDelimId);
      EXPECT_EQ(expected.at(j).second, std::string(token.start, token.end));
@ -53,7 +55,7 @@ TEST(TextFileReaderUtilsTest, TokenizeTest) {
    tokenizer.next(mid, &ch.back() + 1, tokenized);
    EXPECT_EQ(expected.size(), s1 + tokenized.tokens().size());
-    for (int j = 0; j < tokenized.tokens().size(); ++j) {
+    for (const auto j : c10::irange(tokenized.tokens().size())) {
      const auto& token = tokenized.tokens().at(j);
      EXPECT_EQ(expected.at(j + s1).first, token.startDelimId);
      EXPECT_EQ(
--- a/caffe2/perfkernels/embedding_lookup.cc
+++ b/caffe2/perfkernels/embedding_lookup.cc
@ -3,6 +3,8 @@
 #include "caffe2/core/types.h"
 #include "caffe2/perfkernels/common.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 /**
@ -27,7 +29,7 @@ static bool EmbeddingLookupGenericSlow(
    bool normalize_by_lengths,
    OutType* out) {
  int64_t current = 0;
-  for (int m = 0; m < output_size; ++m) {
+  for (const auto m : c10::irange(output_size)) {
    memset(out, 0, sizeof(OutType) * block_size);
    if (current + lengths[m] > index_size) {
      return false;
@ -52,7 +54,7 @@ static bool EmbeddingLookupGenericSlow(
        w = w * scale_bias[2 * indices[current]];
      }
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] += w * input[block_size * indices[current] + j] + b;
      }
@ -60,7 +62,7 @@ static bool EmbeddingLookupGenericSlow(
    }
    if (normalize_by_lengths && lengths[m]) {
      float scale = 1.f / lengths[m];
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] *= scale;
      }
    }
--- a/caffe2/perfkernels/embedding_lookup_idx.cc
+++ b/caffe2/perfkernels/embedding_lookup_idx.cc
@ -1,6 +1,7 @@
 #include "caffe2/perfkernels/embedding_lookup_idx.h"
 #include <c10/util/Half.h>
 #include <c10/util/irange.h>
 #include "caffe2/core/common.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/perfkernels/common.h"
@ -29,7 +30,7 @@ static bool EmbeddingLookupGenericSlowIdx(
    bool normalize_by_lengths,
    OutType* out) {
  int64_t current = 0;
-  for (int m = 0; m < output_size; ++m) {
+  for (const auto m : c10::irange(output_size)) {
    memset(out, 0, sizeof(OutType) * block_size);
    if (current != offsets[m] - offsets[0]) {
      return false;
@ -37,7 +38,7 @@ static bool EmbeddingLookupGenericSlowIdx(
    int64_t start_offset = offsets[m];
    int64_t end_offset = offsets[m + 1];
    int64_t length = end_offset - start_offset;
-    for (int i = start_offset; i < end_offset; ++i) {
+    for (const auto i : c10::irange(start_offset, end_offset)) {
      int64_t idx = indices[current];
      if (idx < 0 || idx >= data_size) {
        return false;
@ -57,7 +58,7 @@ static bool EmbeddingLookupGenericSlowIdx(
        w = w * scale_bias[2 * indices[current]];
      }
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] += w * input[block_size * indices[current] + j] + b;
      }
@ -65,7 +66,7 @@ static bool EmbeddingLookupGenericSlowIdx(
    }
    if (normalize_by_lengths && length) {
      float scale = 1.f / length;
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] *= scale;
      }
    }
--- a/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc
+++ b/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc
@ -4,6 +4,8 @@
 #include "caffe2/perfkernels/common.h"
 #include "caffe2/utils/cpuid.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 /**
@ -31,7 +33,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlow(
  const auto scale_bias_offset = 8 / sizeof(InType);
  const int64_t fused_block_size = block_size + scale_bias_offset;
  int64_t current = 0;
-  for (int m = 0; m < output_size; ++m) {
+  for (const auto m : c10::irange(output_size)) {
    memset(out, 0, sizeof(OutType) * block_size);
    if (current + lengths[m] > index_size) {
      return false;
@ -58,7 +60,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlow(
      const float scale = weight * scale_bias[0];
      const float bias = weight * scale_bias[1];
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] += scale * input[fused_block_size * indices[current] + j] + bias;
      }
@ -66,7 +68,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlow(
    }
    if (normalize_by_lengths && lengths[m]) {
      float scale = 1.f / lengths[m];
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] *= scale;
      }
    }
--- a/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.cc
+++ b/caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.cc
@ -4,6 +4,8 @@
 #include "caffe2/perfkernels/common.h"
 #include "caffe2/utils/cpuid.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 /**
@ -31,7 +33,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
  const auto scale_bias_offset = 8 / sizeof(InType);
  const int64_t fused_block_size = block_size + scale_bias_offset;
  int64_t current = 0;
-  for (int m = 0; m < output_size; ++m) {
+  for (const auto m : c10::irange(output_size)) {
    memset(out, 0, sizeof(OutType) * block_size);
    if (current != offsets[m] - offsets[0]) {
      return false;
@ -39,7 +41,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
    int64_t start_offset = offsets[m];
    int64_t end_offset = offsets[m + 1];
    int64_t length = end_offset - start_offset;
-    for (int i = start_offset; i < end_offset; ++i) {
+    for (const auto i : c10::irange(start_offset, end_offset)) {
      int64_t idx = indices[current];
      if (idx < 0 || idx >= data_size) {
        return false;
@ -61,7 +63,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
      const float scale = weight * scale_bias[0];
      const float bias = weight * scale_bias[1];
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] += scale * input[fused_block_size * indices[current] + j] + bias;
      }
@ -69,7 +71,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
    }
    if (normalize_by_lengths && length) {
      float scale = 1.f / length;
-      for (int j = 0; j < block_size; ++j) {
+      for (const auto j : c10::irange(block_size)) {
        out[j] *= scale;
      }
    }
--- a/caffe2/perfkernels/math_cpu_avx2.cc
+++ b/caffe2/perfkernels/math_cpu_avx2.cc
@ -6,6 +6,8 @@
 #include <cmath>
 #include <cstdint>
 #include <c10/util/irange.h>
 using std::uint64_t;
 using std::uint8_t;
@ -65,7 +67,7 @@ void quantize_and_compress__avx2(
  // basic info
  float minimum_element = INFINITY, maximum_element = -INFINITY;
-  for (auto i = 0; i < input_size; ++i) {
+  for (const auto i : c10::irange(input_size)) {
    minimum_element =
        (input_data[i] < minimum_element) ? input_data[i] : minimum_element;
    maximum_element =
--- a/caffe2/perfkernels/math_cpu_base.cc
+++ b/caffe2/perfkernels/math_cpu_base.cc
@ -9,6 +9,8 @@
 #include "common.h"
 #include "math.h"
 #include <c10/util/irange.h>
 using std::uint64_t;
 using std::uint8_t;
@ -32,7 +34,7 @@ void quantize_and_compress__base(
  // basic info
  float minimum_element = INFINITY, maximum_element = -INFINITY;
-  for (auto i = 0; i < input_size; ++i) {
+  for (const auto i : c10::irange(input_size)) {
    minimum_element =
        input_data[i] < minimum_element ? input_data[i] : minimum_element;
    maximum_element =
--- a/caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc
+++ b/caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc
@ -1,5 +1,7 @@
 #include <immintrin.h>
 #include <c10/util/irange.h>
 namespace caffe2 {
 namespace {
@ -80,7 +82,7 @@ void fp32_to_bfp14(const float* source, size_t size, float* dest) {
 void fp32_to_bfp16_scalar(const float* source, size_t size, float* dest) {
  constexpr int mask = 0xFFFF0000;
-  for (auto i = 0; i < size; i++) {
+  for (const auto i : c10::irange(size)) {
    *(int*)(dest + i) = *(int*)(source + i) & mask;
  }
 }
--- a/caffe2/quantization/server/norm_minimization.cc
+++ b/caffe2/quantization/server/norm_minimization.cc
@ -8,6 +8,8 @@
 #include <immintrin.h>
 #include <c10/util/irange.h>
 using namespace std;
 namespace dnnlowp {
@ -306,7 +308,7 @@ TensorQuantizationParams NormMinimization::ChooseQuantizationParams(
  }
  float total_sum = 0;
-  for (int i = 0; i < bins_f.size(); ++i) {
+  for (const auto i : c10::irange(bins_f.size())) {
    total_sum += bins_f[i];
  }
  float selected_sum = 0;
--- a/caffe2/queue/blobs_queue.cc
+++ b/caffe2/queue/blobs_queue.cc
@ -13,6 +13,8 @@
 #include "caffe2/core/timer.h"
 #include "caffe2/core/workspace.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 // Constants for user tracepoints
@ -88,7 +90,7 @@ bool BlobsQueue::blockingRead(
  DCHECK(canRead());
  auto& result = queue_[reader_ % queue_.size()];
  CAFFE_ENFORCE(inputs.size() >= result.size());
-  for (auto i = 0; i < result.size(); ++i) {
+  for (const auto i : c10::irange(result.size())) {
    auto bytes = BlobStat::sizeBytes(*result[i]);
    CAFFE_EVENT(stats_, queue_dequeued_bytes, bytes, i);
    using std::swap;
@ -160,7 +162,7 @@ void BlobsQueue::doWrite(const std::vector<Blob*>& inputs) {
  auto& result = queue_[writer_ % queue_.size()];
  CAFFE_ENFORCE(inputs.size() >= result.size());
  const auto& name = name_.c_str();
-  for (auto i = 0; i < result.size(); ++i) {
+  for (const auto i : c10::irange(result.size())) {
    using std::swap;
    swap(*(inputs[i]), *(result[i]));
  }
--- a/caffe2/transforms/common_subexpression_elimination.cc
+++ b/caffe2/transforms/common_subexpression_elimination.cc
@ -4,6 +4,8 @@
 #include "caffe2/core/net.h"
 #include "caffe2/proto/caffe2_pb.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 using transform::Graph;
@ -101,7 +103,7 @@ bool CommonSubexpressionEliminationTransform::ReplaceRule(
    g.node(parent_idx).children[new_idx] = new_op_parents.at(parent_idx);
    // Make the parents disown all our outdated siblings.
-    for (int i = 0; i < subgraph.size(); i++) {
+    for (const auto i : c10::irange(subgraph.size())) {
      g.node(parent_idx).children.erase(subgraph[i]);
    }
  }
--- a/caffe2/transforms/pattern_net_transform.cc
+++ b/caffe2/transforms/pattern_net_transform.cc
@ -5,6 +5,8 @@
 #include "caffe2/core/net.h"
 #include "caffe2/proto/caffe2_pb.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 // First, single source traverse through the netdef.
@ -150,7 +152,7 @@ bool PatternNetTransform::ReplaceRule(
  std::unordered_map<string, string> external_renaming;
  // Figure out blob renamings
-  for (auto i = 0U; i < match.size(); i++) {
+  for (const auto i : c10::irange(match.size())) {
    int g_idx = match[i];
    int p_idx = ordered_ops_[i];
    for (int j = 0; j < p_.node(p_idx).op.input().size(); j++) {
@ -179,7 +181,7 @@ bool PatternNetTransform::ReplaceRule(
  g.resize_nodes(offset + r_.size());
  // Append all the new operators.
-  for (auto i = 0U; i < r_.size(); i++) {
+  for (const auto i : c10::irange(r_.size())) {
    int new_node_idx = offset + i;
    OperatorDef new_op = r_.node(i).op;
--- a/caffe2/utils/eigen_utils.h
+++ b/caffe2/utils/eigen_utils.h
@ -8,6 +8,8 @@
 #include "caffe2/core/logging.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 // Common Eigen types that we will often use
@ -146,7 +148,7 @@ void GetSubArray(
  // using T = typename Derived::Scalar;
  out_array->derived().resize(indices.size());
-  for (int i = 0; i < indices.size(); i++) {
+  for (const auto i : c10::irange(indices.size())) {
    DCHECK_LT(indices[i], array.size());
    (*out_array)[i] = array[indices[i]];
  }
@ -179,7 +181,7 @@ void GetSubArrayRows(
    Eigen::ArrayBase<Derived2>* out_array) {
  out_array->derived().resize(row_indices.size(), array2d.cols());
-  for (int i = 0; i < row_indices.size(); i++) {
+  for (const auto i : c10::irange(row_indices.size())) {
    DCHECK_LT(row_indices[i], array2d.size());
    out_array->row(i) =
        array2d.row(row_indices[i]).template cast<typename Derived2::Scalar>();
@ -190,7 +192,7 @@ void GetSubArrayRows(
 template <class Derived>
 std::vector<int> GetArrayIndices(const Eigen::ArrayBase<Derived>& array) {
  std::vector<int> ret;
-  for (int i = 0; i < array.size(); i++) {
+  for (const auto i : c10::irange(array.size())) {
    if (array[i]) {
      ret.push_back(i);
    }
--- a/caffe2/utils/math_test.cc
+++ b/caffe2/utils/math_test.cc
@ -11,6 +11,8 @@
 #include "caffe2/utils/conversions.h"
 #include "caffe2/utils/math.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 TEST(MathTest, GemmNoTransNoTrans) {
@ -456,7 +458,7 @@ class BroadcastTest : public testing::Test {
        Y_.mutable_data<float>(),
        cpu_context_.get());
    ASSERT_EQ(Y_data.size(), Y_.numel());
-    for (int i = 0; i < Y_data.size(); ++i) {
+    for (const auto i : c10::irange(Y_data.size())) {
      EXPECT_FLOAT_EQ(Y_data[i], Y_.data<float>()[i]);
    }
  }
--- a/modules/observers/net_observer_reporter_print.cc
+++ b/modules/observers/net_observer_reporter_print.cc
@ -5,6 +5,8 @@
 #include "caffe2/core/init.h"
 #include "observers/observer_config.h"
 #include <c10/util/irange.h>
 namespace caffe2 {
 const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer ";
@ -97,7 +99,7 @@ static std::string get_tensor_shapes(PerformanceInformation p) {
  std::stringstream shape_stream;
  if (!p.tensor_shapes.empty()) {
    shape_stream << "[";
-    for (int i = 0; i < p.tensor_shapes.size(); i++) {
+    for (const auto i : c10::irange(p.tensor_shapes.size())) {
      shape_stream << "[";
      for (int j = 0; j < p.tensor_shapes[i].dims_size(); j++) {
        shape_stream << p.tensor_shapes[i].dims(j) << ", ";
@ -117,7 +119,7 @@ static std::string get_op_args(PerformanceInformation p) {
  if (!p.args.empty()) {
    std::stringstream args;
    args << "[";
-    for (int i = 0; i < p.args.size(); i++) {
+    for (const auto i : c10::irange(p.args.size())) {
      args << "{" << p.args[i].name() << ": ";
      if (p.args[i].has_i()) {
        args << p.args[i].i();