mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
fix comparison of narrow type with wide type in loop condition (#53951)
Summary:
fix Semmle warning: Comparison of narrow type with wide type in loop condition
For example there is below piece of code:
for (int i=0; i<array.size(); ++i) {}
The problem is that array.size() return type is size_t can be larger type than int depending on the implementation so there is chance that i overflows (for very large array that array size is beyond the range of integer) and this loop will never be terminated.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/53951
Reviewed By: zou3519
Differential Revision: D27181495
Pulled By: malfet
fbshipit-source-id: 0612c5cedcdc656c193085e7fbb87dd163f20688
This commit is contained in:
parent
edfc787df4
commit
92770d25cd
|
|
@ -8,6 +8,8 @@
|
||||||
#include <ATen/native/Resize.h>
|
#include <ATen/native/Resize.h>
|
||||||
#include <ATen/TensorOperators.h>
|
#include <ATen/TensorOperators.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
|
||||||
using DimMask = TensorIteratorBase::DimMask;
|
using DimMask = TensorIteratorBase::DimMask;
|
||||||
|
|
@ -1392,7 +1394,7 @@ DimCounter::DimCounter(IntArrayRef shape, Range range)
|
||||||
, offset(range.begin) {
|
, offset(range.begin) {
|
||||||
int64_t linear_offset = range.begin;
|
int64_t linear_offset = range.begin;
|
||||||
int64_t ndim = values.size();
|
int64_t ndim = values.size();
|
||||||
for (int dim = 0; dim < ndim; dim++) {
|
for (const auto dim : c10::irange(ndim)) {
|
||||||
int64_t size = shape[dim];
|
int64_t size = shape[dim];
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
values[dim] = linear_offset % size;
|
values[dim] = linear_offset % size;
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
#include <c10/util/accumulate.h>
|
#include <c10/util/accumulate.h>
|
||||||
#include <c10/util/ArrayRef.h>
|
#include <c10/util/ArrayRef.h>
|
||||||
#include <c10/util/Exception.h>
|
#include <c10/util/Exception.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
@ -51,7 +52,7 @@ static inline TensorImpl* checked_dense_tensor_unwrap(const Tensor& expr, const
|
||||||
static inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(ArrayRef<Tensor> tensors, const char * name, int pos, DeviceType device_type, ScalarType scalar_type) {
|
static inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(ArrayRef<Tensor> tensors, const char * name, int pos, DeviceType device_type, ScalarType scalar_type) {
|
||||||
std::vector<TensorImpl*> unwrapped;
|
std::vector<TensorImpl*> unwrapped;
|
||||||
unwrapped.reserve(tensors.size());
|
unwrapped.reserve(tensors.size());
|
||||||
for (unsigned int i = 0; i < tensors.size(); ++i) {
|
for (const auto i : c10::irange(tensors.size())) {
|
||||||
const auto& expr = tensors[i];
|
const auto& expr = tensors[i];
|
||||||
if (expr.layout() != Layout::Strided) {
|
if (expr.layout() != Layout::Strided) {
|
||||||
AT_ERROR("Expected dense tensor but got ", expr.layout(),
|
AT_ERROR("Expected dense tensor but got ", expr.layout(),
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
#include <ATen/core/function_schema.h>
|
#include <ATen/core/function_schema.h>
|
||||||
#include <ATen/core/jit_type.h>
|
#include <ATen/core/jit_type.h>
|
||||||
#include <c10/macros/Macros.h>
|
#include <c10/macros/Macros.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
#include <ATen/core/grad_mode.h>
|
#include <ATen/core/grad_mode.h>
|
||||||
#include <ATen/core/function.h>
|
#include <ATen/core/function.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
@ -1107,7 +1108,7 @@ torch::jit::Function* ClassType::findForwardHook(const std::string& name) const
|
||||||
std::string getSchemaInputTypesString(const FunctionSchema& schema) {
|
std::string getSchemaInputTypesString(const FunctionSchema& schema) {
|
||||||
std::stringstream input_types;
|
std::stringstream input_types;
|
||||||
const std::vector<Argument>& forward_args = schema.arguments();
|
const std::vector<Argument>& forward_args = schema.arguments();
|
||||||
for (int i = 1; i < forward_args.size(); ++i) {
|
for (const auto i : c10::irange(1, forward_args.size())) {
|
||||||
input_types << forward_args[i].type()->annotation_str();
|
input_types << forward_args[i].type()->annotation_str();
|
||||||
if (forward_args.size() - 1 != i) {
|
if (forward_args.size() - 1 != i) {
|
||||||
input_types << ", ";
|
input_types << ", ";
|
||||||
|
|
@ -1213,7 +1214,7 @@ void checkForwardHookInputArguments(
|
||||||
hook_err_msg
|
hook_err_msg
|
||||||
);
|
);
|
||||||
|
|
||||||
for (int i = 1; i < forward_args.size(); ++i) {
|
for (const auto i : c10::irange(1, forward_args.size())) {
|
||||||
if (*forward_args[i].type() != *input_tuple_types[i - 1]) {
|
if (*forward_args[i].type() != *input_tuple_types[i - 1]) {
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
false,
|
false,
|
||||||
|
|
@ -1313,7 +1314,7 @@ void ClassType::checkForwardPreHookSchema(
|
||||||
pre_hook_err_msg
|
pre_hook_err_msg
|
||||||
);
|
);
|
||||||
// check that contained types match forward types
|
// check that contained types match forward types
|
||||||
for (int i = 1; i < forward_args.size(); ++i) {
|
for (const auto i : c10::irange(1, forward_args.size())) {
|
||||||
if (*forward_args[i].type() != *return_tuple_types[i - 1]) {
|
if (*forward_args[i].type() != *return_tuple_types[i - 1]) {
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
false,
|
false,
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@
|
||||||
#include <ATen/Parallel.h>
|
#include <ATen/Parallel.h>
|
||||||
#include <ATen/core/DistributionsHelper.h>
|
#include <ATen/core/DistributionsHelper.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at { namespace native {
|
namespace at { namespace native {
|
||||||
|
|
||||||
static const double SELU_ALPHA = 1.6732632423543772848170429916717;
|
static const double SELU_ALPHA = 1.6732632423543772848170429916717;
|
||||||
|
|
@ -453,12 +455,12 @@ void inline prelu_cpu_kernel_multi_weights(
|
||||||
scalar_t* weight_data = weight.data_ptr<scalar_t>();
|
scalar_t* weight_data = weight.data_ptr<scalar_t>();
|
||||||
|
|
||||||
auto loop = [&](int64_t start, int64_t end) {
|
auto loop = [&](int64_t start, int64_t end) {
|
||||||
for (auto i = start; i < end; ++i) {
|
for (const auto i : c10::irange(start, end)) {
|
||||||
int64_t offset = i * channel_size * input_stride1;
|
int64_t offset = i * channel_size * input_stride1;
|
||||||
scalar_t* n_input_data = input_data + offset;
|
scalar_t* n_input_data = input_data + offset;
|
||||||
scalar_t* n_result_data = result_data + offset;
|
scalar_t* n_result_data = result_data + offset;
|
||||||
for (auto j = 0; j < channel_size; ++j) {
|
for (const auto j : c10::irange(channel_size)) {
|
||||||
for (auto k = 0; k < input_stride1; ++k) {
|
for (const auto k : c10::irange(input_stride1)) {
|
||||||
// to allow for compiler optimization, here splitting into two lines:
|
// to allow for compiler optimization, here splitting into two lines:
|
||||||
scalar_t w = (n_input_data[k] > 0) ? scalar_t(1) : weight_data[j];
|
scalar_t w = (n_input_data[k] > 0) ? scalar_t(1) : weight_data[j];
|
||||||
n_result_data[k] = w * n_input_data[k];
|
n_result_data[k] = w * n_input_data[k];
|
||||||
|
|
@ -578,9 +580,9 @@ void inline prelu_cpu_backward_kernel_multi_weights(
|
||||||
auto weight_grad_collector_data = weight_grad_collector.data_ptr<scalar_t>();
|
auto weight_grad_collector_data = weight_grad_collector.data_ptr<scalar_t>();
|
||||||
|
|
||||||
auto loop = [&](int64_t start, int64_t end) {
|
auto loop = [&](int64_t start, int64_t end) {
|
||||||
for (auto i = start; i < end; i++) {
|
for (const auto i : c10::irange(start, end)) {
|
||||||
for (auto j = 0; j < channel_size; j++) {
|
for (const auto j : c10::irange(channel_size)) {
|
||||||
for (auto k = 0; k < input_stride1; k++) {
|
for (const auto k : c10::irange(input_stride1)) {
|
||||||
int64_t pos = i * input_stride0 + j * input_stride1 + k;
|
int64_t pos = i * input_stride0 + j * input_stride1 + k;
|
||||||
scalar_t weight_data_val = weight_data[j];
|
scalar_t weight_data_val = weight_data[j];
|
||||||
scalar_t input_data_val = input_data[pos];
|
scalar_t input_data_val = input_data[pos];
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
#include <ATen/ATen.h>
|
#include <ATen/ATen.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at { namespace native {
|
namespace at { namespace native {
|
||||||
|
|
||||||
Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value) {
|
Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value) {
|
||||||
|
|
@ -20,7 +22,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
|
||||||
bool all_pads_non_positive = true;
|
bool all_pads_non_positive = true;
|
||||||
|
|
||||||
auto c_input = self;
|
auto c_input = self;
|
||||||
for (int i = l_diff; i < l_inp; i++) {
|
for (const auto i : c10::irange(l_diff, l_inp)) {
|
||||||
auto pad_idx = 2 * (l_inp - i - 1);
|
auto pad_idx = 2 * (l_inp - i - 1);
|
||||||
if (pad[pad_idx] < 0) {
|
if (pad[pad_idx] < 0) {
|
||||||
c_input = c_input.narrow(i, -pad[pad_idx], c_input.size(i) + pad[pad_idx]);
|
c_input = c_input.narrow(i, -pad[pad_idx], c_input.size(i) + pad[pad_idx]);
|
||||||
|
|
@ -69,7 +71,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
|
||||||
output.fill_(value);
|
output.fill_(value);
|
||||||
|
|
||||||
auto c_output = output;
|
auto c_output = output;
|
||||||
for (int i = l_diff; i < l_inp; i++) {
|
for (const auto i : c10::irange(l_diff, l_inp)) {
|
||||||
auto pad_idx = 2 * (l_inp - i - 1);
|
auto pad_idx = 2 * (l_inp - i - 1);
|
||||||
if (pad[pad_idx] > 0) {
|
if (pad[pad_idx] > 0) {
|
||||||
c_output = c_output.narrow(i, pad[pad_idx], c_output.size(i) - pad[pad_idx]);
|
c_output = c_output.narrow(i, pad[pad_idx], c_output.size(i) - pad[pad_idx]);
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
#include <ATen/native/xnnpack/Engine.h>
|
#include <ATen/native/xnnpack/Engine.h>
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
#include <c10/util/accumulate.h>
|
#include <c10/util/accumulate.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <ATen/Config.h>
|
#include <ATen/Config.h>
|
||||||
#include <c10/macros/Macros.h>
|
#include <c10/macros/Macros.h>
|
||||||
|
|
@ -489,7 +490,7 @@ static void check_shape_forward(const at::Tensor& input,
|
||||||
", expected bias to be 1-dimensional with ", weight_sizes[0], " elements",
|
", expected bias to be 1-dimensional with ", weight_sizes[0], " elements",
|
||||||
", but got bias of size ", bias.sizes(), " instead");
|
", but got bias of size ", bias.sizes(), " instead");
|
||||||
|
|
||||||
for (int i = 2; i < k; ++i) {
|
for (const auto i : c10::irange(2, k)) {
|
||||||
input_shape.push_back(input.size(i) + 2 * padding[i-2]);
|
input_shape.push_back(input.size(i) + 2 * padding[i-2]);
|
||||||
// log new kernel size considering dilation
|
// log new kernel size considering dilation
|
||||||
kernel_shape.push_back(dilation[i-2] * (weight_sizes[i]-1) + 1);
|
kernel_shape.push_back(dilation[i-2] * (weight_sizes[i]-1) + 1);
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@
|
||||||
#include <ATen/TensorUtils.h>
|
#include <ATen/TensorUtils.h>
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
@ -97,10 +99,10 @@ Tensor embedding_dense_backward_cpu(
|
||||||
std::unique_ptr<index_t[]> counts;
|
std::unique_ptr<index_t[]> counts;
|
||||||
if (scale_grad_by_freq) {
|
if (scale_grad_by_freq) {
|
||||||
counts.reset(new index_t[num_weights]);
|
counts.reset(new index_t[num_weights]);
|
||||||
for (int i = 0; i < numel; i++) {
|
for (const auto i : c10::irange(numel)) {
|
||||||
counts[indices_data[i]] = 0;
|
counts[indices_data[i]] = 0;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < numel; i++) {
|
for (const auto i : c10::irange(numel)) {
|
||||||
counts[indices_data[i]]++;
|
counts[indices_data[i]]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
#include <ATen/native/CPUBlas.h>
|
#include <ATen/native/CPUBlas.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#ifdef USE_FBGEMM
|
#ifdef USE_FBGEMM
|
||||||
#include <fbgemm/Fbgemm.h>
|
#include <fbgemm/Fbgemm.h>
|
||||||
#else
|
#else
|
||||||
|
|
@ -535,11 +537,11 @@ void embedding_bag_cpu_max_out(
|
||||||
auto weight_stride1 = weight.strides()[1];
|
auto weight_stride1 = weight.strides()[1];
|
||||||
auto output_stride = output.strides()[0];
|
auto output_stride = output.strides()[0];
|
||||||
|
|
||||||
for (int i = 0; i < numIndices; ++i) {
|
for (const auto i : c10::irange(numIndices)) {
|
||||||
auto bag = offset2bag_data[i];
|
auto bag = offset2bag_data[i];
|
||||||
auto word_idx = indices_data[i];
|
auto word_idx = indices_data[i];
|
||||||
|
|
||||||
for (int dim = 0; dim < featureSize; dim++) {
|
for (const auto dim : c10::irange(featureSize)) {
|
||||||
auto& current_item = output_data[output_stride * bag + dim];
|
auto& current_item = output_data[output_stride * bag + dim];
|
||||||
auto weight_item =
|
auto weight_item =
|
||||||
weight_data[weight_stride0 * word_idx + dim * weight_stride1];
|
weight_data[weight_stride0 * word_idx + dim * weight_stride1];
|
||||||
|
|
@ -751,7 +753,7 @@ static std::vector<index_t> compute_counts(
|
||||||
index_t* indices_data,
|
index_t* indices_data,
|
||||||
int64_t indices_length) {
|
int64_t indices_length) {
|
||||||
std::vector<index_t> counts(num_weights, 0);
|
std::vector<index_t> counts(num_weights, 0);
|
||||||
for (int i = 0; i < indices_length; i++) {
|
for (const auto i : c10::irange(indices_length)) {
|
||||||
counts[indices_data[i]]++;
|
counts[indices_data[i]]++;
|
||||||
}
|
}
|
||||||
return counts;
|
return counts;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <ATen/ATen.h>
|
#include <ATen/ATen.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
namespace native {
|
namespace native {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
@ -29,7 +31,7 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2) {
|
||||||
|
|
||||||
auto expected_dtype = tensors1[0].dtype();
|
auto expected_dtype = tensors1[0].dtype();
|
||||||
|
|
||||||
for (int i = 0; i < tensors1.size(); i++) {
|
for (const auto i : c10::irange(tensors1.size())) {
|
||||||
TORCH_CHECK(tensors1[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
TORCH_CHECK(tensors1[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
||||||
TORCH_CHECK(tensors2[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
TORCH_CHECK(tensors2[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
||||||
TORCH_CHECK(tensors1[i].sizes() == tensors2[i].sizes(), "Corresponding tensors in lists must have the same size, got ", tensors1[i].sizes(), " and ", tensors2[i].sizes());
|
TORCH_CHECK(tensors1[i].sizes() == tensors2[i].sizes(), "Corresponding tensors in lists must have the same size, got ", tensors1[i].sizes(), " and ", tensors2[i].sizes());
|
||||||
|
|
@ -45,7 +47,7 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, Te
|
||||||
|
|
||||||
auto expected_dtype = tensors1[0].dtype();
|
auto expected_dtype = tensors1[0].dtype();
|
||||||
|
|
||||||
for (int i = 0; i < tensors1.size(); i++) {
|
for (const auto i : c10::irange(tensors1.size())) {
|
||||||
TORCH_CHECK(tensors1[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
TORCH_CHECK(tensors1[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
||||||
TORCH_CHECK(tensors2[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
TORCH_CHECK(tensors2[i].dtype() == expected_dtype, "All tensors in the tensor list must have the same dtype.");
|
||||||
TORCH_CHECK(tensors1[i].sizes() == tensors2[i].sizes(), "Corresponding tensors in lists must have the same size, got ", tensors1[i].sizes(), " and ", tensors2[i].sizes());
|
TORCH_CHECK(tensors1[i].sizes() == tensors2[i].sizes(), "Corresponding tensors in lists must have the same size, got ", tensors1[i].sizes(), " and ", tensors2[i].sizes());
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
#include <ATen/Parallel.h>
|
#include <ATen/Parallel.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
@ -20,7 +22,7 @@ static std::vector<int> generate_intervals(
|
||||||
scalar_t alpha = static_cast<scalar_t>(inputSize - poolSize) /
|
scalar_t alpha = static_cast<scalar_t>(inputSize - poolSize) /
|
||||||
static_cast<scalar_t>(outputSize - 1);
|
static_cast<scalar_t>(outputSize - 1);
|
||||||
|
|
||||||
for (int i = 0; i < outputSize - 1; ++i) {
|
for (const auto i : c10::irange(outputSize - 1)) {
|
||||||
sequence[i] =
|
sequence[i] =
|
||||||
static_cast<int>((i + sample) * alpha) - static_cast<int>(sample * alpha);
|
static_cast<int>((i + sample) * alpha) - static_cast<int>(sample * alpha);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include <ATen/native/CPUBlas.h>
|
#include <ATen/native/CPUBlas.h>
|
||||||
#include <ATen/native/im2col.h>
|
#include <ATen/native/im2col.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
namespace native {
|
namespace native {
|
||||||
|
|
||||||
|
|
@ -253,7 +255,7 @@ void slow_conv_transpose2d_out_cpu_template(
|
||||||
AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Long,
|
AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Long,
|
||||||
input.scalar_type(), "slow_conv_transpose2d_out_cpu", [&] {
|
input.scalar_type(), "slow_conv_transpose2d_out_cpu", [&] {
|
||||||
// For each elt in batch, do:
|
// For each elt in batch, do:
|
||||||
for (int elt = 0; elt < batch_size; elt++) {
|
for (const auto elt : c10::irange(batch_size)) {
|
||||||
// Helpers
|
// Helpers
|
||||||
Tensor input_n;
|
Tensor input_n;
|
||||||
Tensor output_n;
|
Tensor output_n;
|
||||||
|
|
@ -448,7 +450,7 @@ static void slow_conv_transpose2d_backward_out_cpu_template(
|
||||||
Tensor grad_output_n = Tensor();
|
Tensor grad_output_n = Tensor();
|
||||||
|
|
||||||
// For each elt in batch, do:
|
// For each elt in batch, do:
|
||||||
for (int elt = 0; elt < batch_size; elt++) {
|
for (const auto elt : c10::irange(batch_size)) {
|
||||||
// Matrix mulitply per sample:
|
// Matrix mulitply per sample:
|
||||||
grad_input_n = grad_input.select(0, elt);
|
grad_input_n = grad_input.select(0, elt);
|
||||||
grad_output_n = grad_output.select(0, elt);
|
grad_output_n = grad_output.select(0, elt);
|
||||||
|
|
@ -639,7 +641,7 @@ void slow_conv_transpose2d_acc_grad_parameters_cpu(
|
||||||
scalar_t scale = static_cast<scalar_t>(scale_);
|
scalar_t scale = static_cast<scalar_t>(scale_);
|
||||||
|
|
||||||
// For each elt in batch, do:
|
// For each elt in batch, do:
|
||||||
for (int elt = 0; elt < batch_size; elt++) {
|
for (const auto elt : c10::irange(batch_size)) {
|
||||||
// Matrix mulitply per output:
|
// Matrix mulitply per output:
|
||||||
grad_output_n = grad_output.select(0, elt);
|
grad_output_n = grad_output.select(0, elt);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
#include <ATen/native/vol2col.h>
|
#include <ATen/native/vol2col.h>
|
||||||
#include <ATen/Utils.h>
|
#include <ATen/Utils.h>
|
||||||
#include <c10/util/accumulate.h>
|
#include <c10/util/accumulate.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
|
|
@ -204,7 +205,7 @@ void slow_conv_dilated_all_cpu_template(
|
||||||
|
|
||||||
AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Long, input.scalar_type(), "slow_conv_dilated<>", [&] {
|
AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Long, input.scalar_type(), "slow_conv_dilated<>", [&] {
|
||||||
// For each elt in batch, do:
|
// For each elt in batch, do:
|
||||||
for (int elt = 0; elt < batchSize; elt++) {
|
for (const auto elt : c10::irange(batchSize)) {
|
||||||
// Matrix multiply per output:
|
// Matrix multiply per output:
|
||||||
Tensor input_n = input.select(0, elt);
|
Tensor input_n = input.select(0, elt);
|
||||||
|
|
||||||
|
|
@ -234,7 +235,7 @@ void slow_conv_dilated_all_cpu_template(
|
||||||
*/
|
*/
|
||||||
// The following for-loop is equivalent to the above
|
// The following for-loop is equivalent to the above
|
||||||
// gemm setup but avoids allocation of ones tensor:
|
// gemm setup but avoids allocation of ones tensor:
|
||||||
for (int n = 0; n < nOutputPlane; n++) {
|
for (const auto n : c10::irange(nOutputPlane)) {
|
||||||
output_n.select(0, n).fill_(bias[n]);
|
output_n.select(0, n).fill_(bias[n]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@
|
||||||
|
|
||||||
#include <ATen/NamedTensorUtils.h>
|
#include <ATen/NamedTensorUtils.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
|
||||||
namespace at { namespace native {
|
namespace at { namespace native {
|
||||||
|
|
@ -143,7 +145,7 @@ static Tensor align(const Tensor& tensor, DimnameList names, bool is_aligning_tw
|
||||||
|
|
||||||
static int64_t countUnset(std::bitset<kMaxNamedTensorDim> set, int64_t up_to_idx) {
|
static int64_t countUnset(std::bitset<kMaxNamedTensorDim> set, int64_t up_to_idx) {
|
||||||
int64_t result = 0;
|
int64_t result = 0;
|
||||||
for (auto i = 0; i < up_to_idx; ++i) {
|
for (const auto i : c10::irange(up_to_idx)) {
|
||||||
if (!set.test(i)) result++;
|
if (!set.test(i)) result++;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
|
@ -188,7 +190,7 @@ Tensor align_to(const Tensor& tensor, DimnameList order, int64_t ellipsis_idx) {
|
||||||
// appears in the jth element of tensor.
|
// appears in the jth element of tensor.
|
||||||
std::vector<int64_t> tensor_idx_for(order.size(), not_found);
|
std::vector<int64_t> tensor_idx_for(order.size(), not_found);
|
||||||
|
|
||||||
for (auto order_idx = 0U; order_idx < order.size(); ++order_idx) {
|
for (const auto order_idx : c10::irange(order.size())) {
|
||||||
const auto name = order[order_idx];
|
const auto name = order[order_idx];
|
||||||
TORCH_CHECK(name.isBasic(),
|
TORCH_CHECK(name.isBasic(),
|
||||||
"align_to: the desired order of dimensions cannot contain a None name, got ",
|
"align_to: the desired order of dimensions cannot contain a None name, got ",
|
||||||
|
|
@ -233,7 +235,7 @@ Tensor align_to(const Tensor& tensor, DimnameList order, int64_t ellipsis_idx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill in the ellipsis dimensions
|
// Fill in the ellipsis dimensions
|
||||||
for (auto tensor_idx = 0U; tensor_idx < tensor_dim; ++tensor_idx) {
|
for (const auto tensor_idx : c10::irange(tensor_dim)) {
|
||||||
if (order_has_tensor_name.test(tensor_idx)) {
|
if (order_has_tensor_name.test(tensor_idx)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -259,7 +261,7 @@ Tensor align_to(const Tensor& tensor, DimnameList names) {
|
||||||
std::vector<int64_t> new_sizes(names.size(), 1);
|
std::vector<int64_t> new_sizes(names.size(), 1);
|
||||||
std::vector<int64_t> new_strides(names.size(), 0);
|
std::vector<int64_t> new_strides(names.size(), 0);
|
||||||
|
|
||||||
for (auto idx = 0U; idx < tensor_names.size(); ++idx) {
|
for (const auto idx : c10::irange(tensor_names.size())) {
|
||||||
const auto& dim = tensor_names[idx];
|
const auto& dim = tensor_names[idx];
|
||||||
TORCH_CHECK(dim.isBasic(),
|
TORCH_CHECK(dim.isBasic(),
|
||||||
"align_to: All input dims must be named. Found unnamed dim at index ",
|
"align_to: All input dims must be named. Found unnamed dim at index ",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
#include <ATen/ATen.h>
|
#include <ATen/ATen.h>
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at { namespace native {
|
namespace at { namespace native {
|
||||||
|
|
||||||
void checkLongTensor(const Tensor& tensor) {
|
void checkLongTensor(const Tensor& tensor) {
|
||||||
|
|
@ -28,7 +30,7 @@ std::tuple<Tensor, Tensor> _pack_padded_sequence(const Tensor& _input, const Ten
|
||||||
TORCH_CHECK(lengths[batch_size - 1] > 0,
|
TORCH_CHECK(lengths[batch_size - 1] > 0,
|
||||||
"Length of all samples has to be greater than 0, but found an element "
|
"Length of all samples has to be greater than 0, but found an element "
|
||||||
"in 'lengths' that is <= 0");
|
"in 'lengths' that is <= 0");
|
||||||
for(auto i = 0; i < batch_size - 1; i++) {
|
for (const auto i : c10::irange(batch_size - 1)) {
|
||||||
if (lengths[batch_size - 1 - i] > lengths[batch_size - 2 - i]) {
|
if (lengths[batch_size - 1 - i] > lengths[batch_size - 2 - i]) {
|
||||||
// NB: enforce_sorted is implemented at a Python level, but the sortedness
|
// NB: enforce_sorted is implemented at a Python level, but the sortedness
|
||||||
// check lives here. If enforce_sorted=False then this error should never
|
// check lives here. If enforce_sorted=False then this error should never
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@
|
||||||
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
||||||
#include <ATen/native/quantized/cpu/packed_params.h>
|
#include <ATen/native/quantized/cpu/packed_params.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#ifdef USE_FBGEMM
|
#ifdef USE_FBGEMM
|
||||||
#include <fbgemm/Fbgemm.h>
|
#include <fbgemm/Fbgemm.h>
|
||||||
#include <fbgemm/FbgemmFP16.h>
|
#include <fbgemm/FbgemmFP16.h>
|
||||||
|
|
@ -134,7 +136,7 @@ Tensor fbgemm_linear_int8_weight_fp32_activation(
|
||||||
|
|
||||||
// This is the end of the pipeline, pass the resulting matrix through
|
// This is the end of the pipeline, pass the resulting matrix through
|
||||||
fbgemm::DoNothing<float, float> kDoNothingObj{};
|
fbgemm::DoNothing<float, float> kDoNothingObj{};
|
||||||
for (int task_id = begin; task_id < end; ++task_id) {
|
for (const auto task_id : c10::irange(begin, end)) {
|
||||||
// After the uint8 * int8 matrix multiplication is performed, this
|
// After the uint8 * int8 matrix multiplication is performed, this
|
||||||
// operation does:
|
// operation does:
|
||||||
// 1) Add in row and column offsets to the rows and columns, respectively
|
// 1) Add in row and column offsets to the rows and columns, respectively
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@
|
||||||
#include <ATen/native/SharedReduceOps.h>
|
#include <ATen/native/SharedReduceOps.h>
|
||||||
#include <ATen/core/grad_mode.h>
|
#include <ATen/core/grad_mode.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
@ -371,7 +373,7 @@ Tensor cumprod_backward(const Tensor& grad, const Tensor& input, int64_t dim, co
|
||||||
const Tensor ones = at::ones({1}, grad.options()).expand(ones_size);
|
const Tensor ones = at::ones({1}, grad.options()).expand(ones_size);
|
||||||
Tensor prods_from_k_plus_1;
|
Tensor prods_from_k_plus_1;
|
||||||
Tensor omitted_products;
|
Tensor omitted_products;
|
||||||
for (int k = 0; k < dim_size; ++k) {
|
for (const auto k : c10::irange(dim_size)) {
|
||||||
if (k == 0) {
|
if (k == 0) {
|
||||||
prods_from_k_plus_1 = at::cumprod(input_conj.slice(dim, k + 1), dim);
|
prods_from_k_plus_1 = at::cumprod(input_conj.slice(dim, k + 1), dim);
|
||||||
omitted_products = at::cat({ones, prods_from_k_plus_1}, dim);
|
omitted_products = at::cat({ones, prods_from_k_plus_1}, dim);
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,8 @@
|
||||||
#include <ATen/native/Copy.h>
|
#include <ATen/native/Copy.h>
|
||||||
#include <ATen/Parallel.h>
|
#include <ATen/Parallel.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
@ -506,7 +508,7 @@ static void check_indexarray_range(
|
||||||
const IndexType* indices,
|
const IndexType* indices,
|
||||||
int64_t n,
|
int64_t n,
|
||||||
IndexType indexing_axis_dim) {
|
IndexType indexing_axis_dim) {
|
||||||
for (auto i = 0; i < n; ++i) {
|
for (const auto i : c10::irange(n)) {
|
||||||
auto idx = indices[i];
|
auto idx = indices[i];
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
0 <= idx && idx < indexing_axis_dim,
|
0 <= idx && idx < indexing_axis_dim,
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include <ATen/Functions.h>
|
#include <ATen/Functions.h>
|
||||||
#include <ATen/TensorOperators.h>
|
#include <ATen/TensorOperators.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
/// Contains the implementation of parallel reductions in TensorIterator.
|
/// Contains the implementation of parallel reductions in TensorIterator.
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
|
@ -136,7 +138,7 @@ void TensorIteratorBase::foreach_reduced_elt(loop_subiter_t loop, bool paralleli
|
||||||
auto non_reduced_shape = shape.slice(reduce_dims, shape.size() - reduce_dims);
|
auto non_reduced_shape = shape.slice(reduce_dims, shape.size() - reduce_dims);
|
||||||
|
|
||||||
int64_t non_reduced_numel = 1;
|
int64_t non_reduced_numel = 1;
|
||||||
for (int i = 0; i < non_reduced_shape.size(); ++i) {
|
for (const auto i : c10::irange(non_reduced_shape.size())) {
|
||||||
non_reduced_numel *= non_reduced_shape[i];
|
non_reduced_numel *= non_reduced_shape[i];
|
||||||
}
|
}
|
||||||
DimCounter dims {non_reduced_shape, {0, non_reduced_numel}};
|
DimCounter dims {non_reduced_shape, {0, non_reduced_numel}};
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
#include <ATen/ScalarOps.h>
|
#include <ATen/ScalarOps.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
namespace native {
|
namespace native {
|
||||||
|
|
||||||
|
|
@ -19,7 +21,7 @@ Tensor _test_optional_intlist(
|
||||||
Tensor output = at::empty_like(values);
|
Tensor output = at::empty_like(values);
|
||||||
auto inp = values.accessor<int,1>();
|
auto inp = values.accessor<int,1>();
|
||||||
auto out = output.accessor<int,1>();
|
auto out = output.accessor<int,1>();
|
||||||
for(int i = 0; i < values.size(0); ++i) {
|
for (const auto i : c10::irange(values.size(0))) {
|
||||||
out[i] = inp[i] + addends->at(i);
|
out[i] = inp[i] + addends->at(i);
|
||||||
}
|
}
|
||||||
return output;
|
return output;
|
||||||
|
|
@ -37,7 +39,7 @@ Tensor _test_optional_floatlist(
|
||||||
Tensor output = at::empty_like(values);
|
Tensor output = at::empty_like(values);
|
||||||
auto inp = values.accessor<float,1>();
|
auto inp = values.accessor<float,1>();
|
||||||
auto out = output.accessor<float,1>();
|
auto out = output.accessor<float,1>();
|
||||||
for(int i = 0; i < values.size(0); ++i) {
|
for (const auto i : c10::irange(values.size(0))) {
|
||||||
out[i] = inp[i] + addends->at(i);
|
out[i] = inp[i] + addends->at(i);
|
||||||
}
|
}
|
||||||
return output;
|
return output;
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@
|
||||||
#include <ATen/quantized/QTensorImpl.h>
|
#include <ATen/quantized/QTensorImpl.h>
|
||||||
#include <ATen/quantized/Quantizer.h>
|
#include <ATen/quantized/Quantizer.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
namespace native {
|
namespace native {
|
||||||
|
|
||||||
|
|
@ -24,7 +26,7 @@ std::vector<Tensor> quantize_per_tensor_list_cpu(
|
||||||
const Tensor& zero_points,
|
const Tensor& zero_points,
|
||||||
ScalarType dtype) {
|
ScalarType dtype) {
|
||||||
std::vector<Tensor> quantized_tensors;
|
std::vector<Tensor> quantized_tensors;
|
||||||
for (auto i = 0; i < tensors.size(); ++i) {
|
for (const auto i : c10::irange(tensors.size())) {
|
||||||
quantized_tensors.push_back(at::quantize_per_tensor(
|
quantized_tensors.push_back(at::quantize_per_tensor(
|
||||||
tensors[i],
|
tensors[i],
|
||||||
scales[i].item<double>(),
|
scales[i].item<double>(),
|
||||||
|
|
@ -54,7 +56,7 @@ Tensor dequantize_quantized_cpu(const Tensor& self) {
|
||||||
|
|
||||||
std::vector<Tensor> dequantize_tensors_quantized_cpu(TensorList tensors) {
|
std::vector<Tensor> dequantize_tensors_quantized_cpu(TensorList tensors) {
|
||||||
std::vector<Tensor> dequantized_tensors;
|
std::vector<Tensor> dequantized_tensors;
|
||||||
for (auto i = 0; i < tensors.size(); ++i) {
|
for (const auto i : c10::irange(tensors.size())) {
|
||||||
dequantized_tensors.push_back(tensors[i].dequantize());
|
dequantized_tensors.push_back(tensors[i].dequantize());
|
||||||
}
|
}
|
||||||
return dequantized_tensors;
|
return dequantized_tensors;
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@
|
||||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// To have a sanity check for maximum matrix size.
|
// To have a sanity check for maximum matrix size.
|
||||||
constexpr int64_t kReasonableMaxDim = 1000000;
|
constexpr int64_t kReasonableMaxDim = 1000000;
|
||||||
|
|
@ -453,7 +455,7 @@ at::Tensor PackedConvWeight<kSpatialDim>::apply_impl(
|
||||||
const int num_tasks = at::get_num_threads();
|
const int num_tasks = at::get_num_threads();
|
||||||
at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
|
at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
|
||||||
fbgemm::DoNothing<> kNoOpObj{};
|
fbgemm::DoNothing<> kNoOpObj{};
|
||||||
for (int task_id = begin; task_id < end; ++task_id) {
|
for (const auto task_id : c10::irange(begin, end)) {
|
||||||
if (q_scheme == c10::kPerTensorAffine) {
|
if (q_scheme == c10::kPerTensorAffine) {
|
||||||
fbgemm::ReQuantizeOutput<
|
fbgemm::ReQuantizeOutput<
|
||||||
kReluFused,
|
kReluFused,
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@
|
||||||
#include <ATen/quantized/Quantizer.h>
|
#include <ATen/quantized/Quantizer.h>
|
||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#ifdef USE_FBGEMM
|
#ifdef USE_FBGEMM
|
||||||
template <int kSpatialDim>
|
template <int kSpatialDim>
|
||||||
c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeight<
|
c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeight<
|
||||||
|
|
@ -114,7 +116,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeight<
|
||||||
const int output_channels_per_group = output_channels / groups;
|
const int output_channels_per_group = output_channels / groups;
|
||||||
const int inner_size =
|
const int inner_size =
|
||||||
kernel_d * kernel_h * kernel_w * input_channels_per_group;
|
kernel_d * kernel_h * kernel_w * input_channels_per_group;
|
||||||
for (int g = 0; g < groups; ++g) {
|
for (const auto g : c10::irange(groups)) {
|
||||||
for (int i = 0; i < output_channels_per_group; ++i) {
|
for (int i = 0; i < output_channels_per_group; ++i) {
|
||||||
const int c = g * output_channels_per_group + i;
|
const int c = g * output_channels_per_group + i;
|
||||||
int32_t sum = 0;
|
int32_t sum = 0;
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
#include <ATen/native/quantized/cpu/fbgemm_utils.h>
|
||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
torch::class_<EmbeddingPackedParamsBase> register_embedding_params();
|
torch::class_<EmbeddingPackedParamsBase> register_embedding_params();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -271,7 +273,7 @@ Tensor _qembeddingbag_nbit_prepack_helper(
|
||||||
output_row_scale_zp[1] = Xmin;
|
output_row_scale_zp[1] = Xmin;
|
||||||
|
|
||||||
// Pack the weight values.
|
// Pack the weight values.
|
||||||
for (int col = 0; col < embedding_cols; ++col) {
|
for (const auto col : c10::irange(embedding_cols)) {
|
||||||
float X = input_row[col];
|
float X = input_row[col];
|
||||||
std::uint8_t quantized = std::max(
|
std::uint8_t quantized = std::max(
|
||||||
0,
|
0,
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@
|
||||||
#include <torch/custom_class.h>
|
#include <torch/custom_class.h>
|
||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|
@ -65,7 +67,7 @@ at::Tensor PackedLinearWeight::apply_impl(
|
||||||
// Process the per channel quantization.
|
// Process the per channel quantization.
|
||||||
output_multiplier_float.resize(N, 0.0);
|
output_multiplier_float.resize(N, 0.0);
|
||||||
act_times_w_scale.resize(N, 1.0f);
|
act_times_w_scale.resize(N, 1.0f);
|
||||||
for (int i = 0; i < N; ++i) {
|
for (const auto i : c10::irange(N)) {
|
||||||
act_times_w_scale[i] = (input_scale_float * w_scale[i]);
|
act_times_w_scale[i] = (input_scale_float * w_scale[i]);
|
||||||
output_multiplier_float[i] =
|
output_multiplier_float[i] =
|
||||||
act_times_w_scale[i] / static_cast<float>(output_scale);
|
act_times_w_scale[i] / static_cast<float>(output_scale);
|
||||||
|
|
@ -101,7 +103,7 @@ at::Tensor PackedLinearWeight::apply_impl(
|
||||||
|
|
||||||
int num_tasks = at::get_num_threads();
|
int num_tasks = at::get_num_threads();
|
||||||
at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
|
at::parallel_for(0, num_tasks, 1, [&](int64_t begin, int64_t end) {
|
||||||
for (int task_id = begin; task_id < end; ++task_id) {
|
for (const auto task_id : c10::irange(begin, end)) {
|
||||||
// This operation does the following:
|
// This operation does the following:
|
||||||
// 1) Creates a "row buffer" vector with offset values that must be
|
// 1) Creates a "row buffer" vector with offset values that must be
|
||||||
// added to the integer matrix multiplication operation to ensure
|
// added to the integer matrix multiplication operation to ensure
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@
|
||||||
|
|
||||||
#include <torch/custom_class.h>
|
#include <torch/custom_class.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|
@ -135,7 +137,7 @@ at::Tensor PackedLinearWeight::apply_dynamic_impl(at::Tensor input, bool reduce_
|
||||||
// This is the end of the pipeline, pass the resulting matrix through.
|
// This is the end of the pipeline, pass the resulting matrix through.
|
||||||
fbgemm::DoNothing<float, float> doNothingObj{};
|
fbgemm::DoNothing<float, float> doNothingObj{};
|
||||||
|
|
||||||
for (int task_id = begin; task_id < end; ++task_id) {
|
for (const auto task_id : c10::irange(begin, end)) {
|
||||||
if (q_scheme == c10::kPerTensorAffine) {
|
if (q_scheme == c10::kPerTensorAffine) {
|
||||||
// Process the per tensor quantization.
|
// Process the per tensor quantization.
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,9 @@
|
||||||
#include <ATen/quantized/Quantizer.h>
|
#include <ATen/quantized/Quantizer.h>
|
||||||
#include <torch/custom_class.h>
|
#include <torch/custom_class.h>
|
||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
@ -59,7 +62,7 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeight::prepack(
|
||||||
weight_zero_points_int32[0] = weight.q_zero_point();
|
weight_zero_points_int32[0] = weight.q_zero_point();
|
||||||
} else if (qtype == c10::kPerChannelAffine) {
|
} else if (qtype == c10::kPerChannelAffine) {
|
||||||
weight_zero_points_int32.resize(N, 0);
|
weight_zero_points_int32.resize(N, 0);
|
||||||
for (int i = 0; i < N; ++i) {
|
for (const auto i : c10::irange(N)) {
|
||||||
weight_zero_points_int32[i] =
|
weight_zero_points_int32[i] =
|
||||||
weight.q_per_channel_zero_points()[i].item<int32_t>();
|
weight.q_per_channel_zero_points()[i].item<int32_t>();
|
||||||
}
|
}
|
||||||
|
|
@ -69,7 +72,7 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeight::prepack(
|
||||||
weight_scales_float[0] = weight.q_scale();
|
weight_scales_float[0] = weight.q_scale();
|
||||||
} else if (qtype == c10::kPerChannelAffine) {
|
} else if (qtype == c10::kPerChannelAffine) {
|
||||||
weight_scales_float.resize(N, 0.0);
|
weight_scales_float.resize(N, 0.0);
|
||||||
for (int i = 0; i < N; ++i) {
|
for (const auto i : c10::irange(N)) {
|
||||||
weight_scales_float[i] = weight.q_per_channel_scales()[i].item<float>();
|
weight_scales_float[i] = weight.q_per_channel_scales()[i].item<float>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@
|
||||||
#include <ATen/native/cpu/Loops.h>
|
#include <ATen/native/cpu/Loops.h>
|
||||||
#include <ATen/native/quantized/cpu/quantized_ops.h>
|
#include <ATen/native/quantized/cpu/quantized_ops.h>
|
||||||
#include <ATen/quantized/Quantizer.h>
|
#include <ATen/quantized/Quantizer.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -70,7 +73,7 @@ static void upsample_nearest2d_out_frame_nhwc(
|
||||||
float height_scale = compute_scales_value<float>(scales_h, input_height, output_height);
|
float height_scale = compute_scales_value<float>(scales_h, input_height, output_height);
|
||||||
float width_scale = compute_scales_value<float>(scales_w, input_width, output_width);
|
float width_scale = compute_scales_value<float>(scales_w, input_width, output_width);
|
||||||
|
|
||||||
for (int b = 0; b < nbatch; b++) {
|
for (const auto b : c10::irange(nbatch)) {
|
||||||
auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(idata + b * input_height * input_width * channels);
|
auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(idata + b * input_height * input_width * channels);
|
||||||
auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata + b * output_height * output_width * channels);
|
auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata + b * output_height * output_width * channels);
|
||||||
// special case: just copy
|
// special case: just copy
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@
|
||||||
#include <ATen/native/cpu/Loops.h>
|
#include <ATen/native/cpu/Loops.h>
|
||||||
#include <ATen/native/quantized/cpu/quantized_ops.h>
|
#include <ATen/native/quantized/cpu/quantized_ops.h>
|
||||||
#include <ATen/quantized/Quantizer.h>
|
#include <ATen/quantized/Quantizer.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -83,7 +86,7 @@ static void upsample_nearest3d_out_frame_nhwc(
|
||||||
float height_scale = compute_scales_value<float>(scales_h, input_height, output_height);
|
float height_scale = compute_scales_value<float>(scales_h, input_height, output_height);
|
||||||
float width_scale = compute_scales_value<float>(scales_w, input_width, output_width);
|
float width_scale = compute_scales_value<float>(scales_w, input_width, output_width);
|
||||||
|
|
||||||
for (int b = 0; b < nbatch; b++) {
|
for (const auto b : c10::irange(nbatch)) {
|
||||||
auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(idata + b * input_depth * input_height * input_width * channels);
|
auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(idata + b * input_depth * input_height * input_width * channels);
|
||||||
auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata + b * output_depth * output_height * output_width * channels);
|
auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata + b * output_depth * output_height * output_width * channels);
|
||||||
// special case: just copy
|
// special case: just copy
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include <ATen/native/cpu/Loops.h>
|
#include <ATen/native/cpu/Loops.h>
|
||||||
#include <ATen/native/quantized/fake_quant_affine.h>
|
#include <ATen/native/quantized/fake_quant_affine.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
// FakeQuantize Op for PerChannelAffine quantization scheme.
|
// FakeQuantize Op for PerChannelAffine quantization scheme.
|
||||||
namespace at {
|
namespace at {
|
||||||
namespace native {
|
namespace native {
|
||||||
|
|
@ -243,10 +245,10 @@ std::tuple<Tensor, Tensor, Tensor> _fake_quantize_learnable_per_channel_affine_b
|
||||||
// Create a collection of axes that include all but the channel axis for
|
// Create a collection of axes that include all but the channel axis for
|
||||||
// reduction when summing over the dScale and dZeroPoint tensors.
|
// reduction when summing over the dScale and dZeroPoint tensors.
|
||||||
int64_t* axis_for_reduction = (int64_t*) calloc(numElements, sizeof(int64_t));
|
int64_t* axis_for_reduction = (int64_t*) calloc(numElements, sizeof(int64_t));
|
||||||
for (int i = 0; i < axis; ++i) {
|
for (const auto i : c10::irange(axis)) {
|
||||||
axis_for_reduction[i] = i;
|
axis_for_reduction[i] = i;
|
||||||
}
|
}
|
||||||
for (int i = axis; i < numElements; ++i) {
|
for (const auto i : c10::irange(axis, numElements)) {
|
||||||
axis_for_reduction[i] = i + 1;
|
axis_for_reduction[i] = i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
#include <ATen/TensorNames.h>
|
#include <ATen/TensorNames.h>
|
||||||
#include <c10/util/Exception.h>
|
#include <c10/util/Exception.h>
|
||||||
#include <c10/util/C++17.h>
|
#include <c10/util/C++17.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
using at::Dimname;
|
using at::Dimname;
|
||||||
using at::DimnameList;
|
using at::DimnameList;
|
||||||
|
|
@ -38,7 +39,7 @@ static bool dimnames_equal(at::DimnameList names, at::DimnameList other) {
|
||||||
if (names.size() != other.size()) {
|
if (names.size() != other.size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (auto i = 0; i < names.size(); i++) {
|
for (const auto i : c10::irange(names.size())) {
|
||||||
const auto& name = names[i];
|
const auto& name = names[i];
|
||||||
const auto& other_name = other[i];
|
const auto& other_name = other[i];
|
||||||
if (name.type() != other_name.type() || name.symbol() != other_name.symbol()) {
|
if (name.type() != other_name.type() || name.symbol() != other_name.symbol()) {
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
#include <c10/util/intrusive_ptr.h>
|
#include <c10/util/intrusive_ptr.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <benchmark/benchmark.h>
|
#include <benchmark/benchmark.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
@ -45,10 +46,10 @@ static void BM_IntrusivePtrArray(benchmark::State& state) {
|
||||||
const size_t kLength = state.range(0);
|
const size_t kLength = state.range(0);
|
||||||
std::vector<intrusive_ptr<Foo> > vararray(kLength);
|
std::vector<intrusive_ptr<Foo> > vararray(kLength);
|
||||||
while (state.KeepRunning()) {
|
while (state.KeepRunning()) {
|
||||||
for (int i = 0; i < kLength; ++i) {
|
for (const auto i : c10::irange(kLength)) {
|
||||||
vararray[i] = var;
|
vararray[i] = var;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < kLength; ++i) {
|
for (const auto i : c10::irange(kLength)) {
|
||||||
vararray[i].reset();
|
vararray[i].reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -60,10 +61,10 @@ static void BM_SharedPtrArray(benchmark::State& state) {
|
||||||
const size_t kLength = state.range(0);
|
const size_t kLength = state.range(0);
|
||||||
std::vector<std::shared_ptr<Bar> > vararray(kLength);
|
std::vector<std::shared_ptr<Bar> > vararray(kLength);
|
||||||
while (state.KeepRunning()) {
|
while (state.KeepRunning()) {
|
||||||
for (int i = 0; i < kLength; ++i) {
|
for (const auto i : c10::irange(kLength)) {
|
||||||
vararray[i] = var;
|
vararray[i] = var;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < kLength; ++i) {
|
for (const auto i : c10::irange(kLength)) {
|
||||||
vararray[i].reset();
|
vararray[i].reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
|
||||||
#include <c10/mobile/CPUProfilingAllocator.h>
|
#include <c10/mobile/CPUProfilingAllocator.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace c10 {
|
namespace c10 {
|
||||||
|
|
||||||
|
|
@ -304,7 +305,7 @@ void AllocationPlanner::formulate_plan() {
|
||||||
formulate_greedy_allocation_plan(
|
formulate_greedy_allocation_plan(
|
||||||
allocation_plan_->allocation_sizes, allocation_plan_->allocation_lifetimes);
|
allocation_plan_->allocation_sizes, allocation_plan_->allocation_lifetimes);
|
||||||
allocation_plan_->total_size = 0;
|
allocation_plan_->total_size = 0;
|
||||||
for (auto i = 0; i < allocation_plan_->allocation_sizes.size(); ++i) {
|
for (const auto i : c10::irange(allocation_plan_->allocation_sizes.size())) {
|
||||||
if (allocation_plan_->allocation_lifetimes[i] ==
|
if (allocation_plan_->allocation_lifetimes[i] ==
|
||||||
std::numeric_limits<uint64_t>::max()) {
|
std::numeric_limits<uint64_t>::max()) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include <c10/core/impl/SizesAndStrides.h>
|
#include <c10/core/impl/SizesAndStrides.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
using namespace c10;
|
using namespace c10;
|
||||||
using namespace c10::impl;
|
using namespace c10::impl;
|
||||||
|
|
@ -55,7 +56,7 @@ TEST(SizesAndStridesTest, Resize) {
|
||||||
sz.resize(5);
|
sz.resize(5);
|
||||||
checkData(sz, {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0});
|
checkData(sz, {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0});
|
||||||
|
|
||||||
for (int ii = 0; ii < sz.size(); ++ii) {
|
for (const auto ii : c10::irange(sz.size())) {
|
||||||
sz.size_at_unchecked(ii) = ii + 1;
|
sz.size_at_unchecked(ii) = ii + 1;
|
||||||
sz.stride_at_unchecked(ii) = 2 * (ii + 1);
|
sz.stride_at_unchecked(ii) = 2 * (ii + 1);
|
||||||
}
|
}
|
||||||
|
|
@ -113,7 +114,7 @@ TEST(SizesAndStridesTest, Resize) {
|
||||||
// Give it different data than it had when it was small to avoid
|
// Give it different data than it had when it was small to avoid
|
||||||
// getting it right by accident (i.e., because of leftover inline
|
// getting it right by accident (i.e., because of leftover inline
|
||||||
// storage when going small to big).
|
// storage when going small to big).
|
||||||
for (int ii = 0; ii < sz.size(); ++ii) {
|
for (const auto ii : c10::irange(sz.size())) {
|
||||||
sz.size_at_unchecked(ii) = ii - 1;
|
sz.size_at_unchecked(ii) = ii - 1;
|
||||||
sz.stride_at_unchecked(ii) = 2 * (ii - 1);
|
sz.stride_at_unchecked(ii) = 2 * (ii - 1);
|
||||||
}
|
}
|
||||||
|
|
@ -175,7 +176,7 @@ TEST(SizesAndStridesTest, SetViaData) {
|
||||||
static SizesAndStrides makeSmall(int offset = 0) {
|
static SizesAndStrides makeSmall(int offset = 0) {
|
||||||
SizesAndStrides small;
|
SizesAndStrides small;
|
||||||
small.resize(3);
|
small.resize(3);
|
||||||
for (int ii = 0; ii < small.size(); ++ii) {
|
for (const auto ii : c10::irange(small.size())) {
|
||||||
small.size_at_unchecked(ii) = ii + 1 + offset;
|
small.size_at_unchecked(ii) = ii + 1 + offset;
|
||||||
small.stride_at_unchecked(ii) = 2 * (ii + 1 + offset);
|
small.stride_at_unchecked(ii) = 2 * (ii + 1 + offset);
|
||||||
}
|
}
|
||||||
|
|
@ -186,7 +187,7 @@ static SizesAndStrides makeSmall(int offset = 0) {
|
||||||
static SizesAndStrides makeBig(int offset = 0) {
|
static SizesAndStrides makeBig(int offset = 0) {
|
||||||
SizesAndStrides big;
|
SizesAndStrides big;
|
||||||
big.resize(8);
|
big.resize(8);
|
||||||
for (int ii = 0; ii < big.size(); ++ii) {
|
for (const auto ii : c10::irange(big.size())) {
|
||||||
big.size_at_unchecked(ii) = ii - 1 + offset;
|
big.size_at_unchecked(ii) = ii - 1 + offset;
|
||||||
big.stride_at_unchecked(ii) = 2 * (ii - 1 + offset);
|
big.stride_at_unchecked(ii) = 2 * (ii - 1 + offset);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include <c10/util/Exception.h>
|
#include <c10/util/Exception.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
@ -12,7 +13,7 @@ namespace c10 {
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
template <typename I, std::enable_if_t<std::is_integral<I>{}, int> = 0>
|
template <typename I, typename std::enable_if<std::is_integral<I>::value, int>::type = 0>
|
||||||
struct integer_iterator : std::iterator<std::input_iterator_tag, I> {
|
struct integer_iterator : std::iterator<std::input_iterator_tag, I> {
|
||||||
explicit integer_iterator(I value) : value(value) {}
|
explicit integer_iterator(I value) : value(value) {}
|
||||||
|
|
||||||
|
|
@ -45,7 +46,7 @@ struct integer_iterator : std::iterator<std::input_iterator_tag, I> {
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
template <typename I, std::enable_if_t<std::is_integral<I>{}, bool> = true>
|
template <typename I, typename std::enable_if<std::is_integral<I>::value, bool>::type = true>
|
||||||
struct integer_range {
|
struct integer_range {
|
||||||
public:
|
public:
|
||||||
integer_range(I begin, I end) : begin_(begin), end_(end) {}
|
integer_range(I begin, I end) : begin_(begin), end_(end) {}
|
||||||
|
|
@ -64,8 +65,8 @@ struct integer_range {
|
||||||
template <
|
template <
|
||||||
typename Integer1,
|
typename Integer1,
|
||||||
typename Integer2,
|
typename Integer2,
|
||||||
std::enable_if_t<std::is_integral<Integer1>::value, bool> = true,
|
typename std::enable_if<std::is_integral<Integer1>::value, bool>::type = true,
|
||||||
std::enable_if_t<std::is_integral<Integer2>::value, bool> = true
|
typename std::enable_if<std::is_integral<Integer2>::value, bool>::type = true
|
||||||
>
|
>
|
||||||
integer_range<Integer2> irange(Integer1 begin, Integer2 end) {
|
integer_range<Integer2> irange(Integer1 begin, Integer2 end) {
|
||||||
//If end<=begin then the range is empty; we can achieve this effect by
|
//If end<=begin then the range is empty; we can achieve this effect by
|
||||||
|
|
@ -75,10 +76,11 @@ integer_range<Integer2> irange(Integer1 begin, Integer2 end) {
|
||||||
|
|
||||||
/// Creates an integer range for the half-open interval [0, end)
|
/// Creates an integer range for the half-open interval [0, end)
|
||||||
/// If end<=begin, then the range is empty
|
/// If end<=begin, then the range is empty
|
||||||
template <typename Integer, std::enable_if_t<std::is_integral<Integer>::value, bool> = true>
|
template <typename Integer, typename std::enable_if<std::is_integral<Integer>::value, bool>::type = true>
|
||||||
integer_range<Integer> irange(Integer end) {
|
integer_range<Integer> irange(Integer end) {
|
||||||
//If end<=begin then the range is empty; we can achieve this effect by
|
//If end<=begin then the range is empty; we can achieve this effect by
|
||||||
//choosing the larger of {0, end} as the loop terminator
|
//choosing the larger of {0, end} as the loop terminator
|
||||||
|
//Handles the case where end<0. irange only works for ranges >=0
|
||||||
return {Integer(), std::max(Integer(), end)};
|
return {Integer(), std::max(Integer(), end)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
#include <c10/util/string_view.h>
|
#include <c10/util/string_view.h>
|
||||||
|
|
||||||
#include "caffe2/core/blob.h"
|
#include "caffe2/core/blob.h"
|
||||||
|
|
@ -249,7 +250,7 @@ void TensorSerializer::SerializeWithOptions(
|
||||||
std::vector<std::future<void>> futures;
|
std::vector<std::future<void>> futures;
|
||||||
if (tensor.numel() > chunk_size) {
|
if (tensor.numel() > chunk_size) {
|
||||||
futures.reserve(FLAGS_caffe2_max_tensor_serializer_threads);
|
futures.reserve(FLAGS_caffe2_max_tensor_serializer_threads);
|
||||||
for (int i = 0; i < FLAGS_caffe2_max_tensor_serializer_threads; ++i) {
|
for (const auto i : c10::irange(FLAGS_caffe2_max_tensor_serializer_threads)) {
|
||||||
futures.emplace_back(std::async(std::launch::async, task));
|
futures.emplace_back(std::async(std::launch::async, task));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -449,7 +450,7 @@ void TensorSerializer::Serialize(
|
||||||
proto.mutable_segment()->set_begin(chunkBegin);
|
proto.mutable_segment()->set_begin(chunkBegin);
|
||||||
proto.mutable_segment()->set_end(chunkBegin + chunkSize);
|
proto.mutable_segment()->set_end(chunkBegin + chunkSize);
|
||||||
|
|
||||||
for (int i = 0; i < input.dim(); ++i) {
|
for (const auto i : c10::irange(input.dim())) {
|
||||||
proto.add_dims(input.size(i));
|
proto.add_dims(input.size(i));
|
||||||
}
|
}
|
||||||
StoreDeviceDetail(input, &proto);
|
StoreDeviceDetail(input, &proto);
|
||||||
|
|
@ -479,7 +480,7 @@ void TensorSerializer::Serialize(
|
||||||
proto.mutable_string_data()->Reserve(chunkSize);
|
proto.mutable_string_data()->Reserve(chunkSize);
|
||||||
if (chunkSize > 0) {
|
if (chunkSize > 0) {
|
||||||
const char* raw_data = static_cast<const char*>(input.raw_data());
|
const char* raw_data = static_cast<const char*>(input.raw_data());
|
||||||
for (int i = chunkBegin; i < chunkBegin + chunkSize; ++i) {
|
for (const auto i : c10::irange(chunkBegin, chunkBegin + chunkSize)) {
|
||||||
proto.add_string_data(SerializeBlob(
|
proto.add_string_data(SerializeBlob(
|
||||||
raw_data + i * input.itemsize(), input.dtype(), ""));
|
raw_data + i * input.itemsize(), input.dtype(), ""));
|
||||||
}
|
}
|
||||||
|
|
@ -803,7 +804,7 @@ DESERIALIZE_IMPL(std::string, FMT_PROTOBUF) {
|
||||||
params.dest.size(),
|
params.dest.size(),
|
||||||
" != ",
|
" != ",
|
||||||
params.tensor_proto.string_data().size());
|
params.tensor_proto.string_data().size());
|
||||||
for (int i = 0; i < params.dest.size(); ++i) {
|
for (const auto i : c10::irange(params.dest.size())) {
|
||||||
params.dest[i] = params.tensor_proto.string_data(i);
|
params.dest[i] = params.tensor_proto.string_data(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -910,7 +911,7 @@ void DeserializeTensor(
|
||||||
case TensorProto_DataType_UNDEFINED: {
|
case TensorProto_DataType_UNDEFINED: {
|
||||||
Blob temp_blob;
|
Blob temp_blob;
|
||||||
void* raw_ptr = nullptr;
|
void* raw_ptr = nullptr;
|
||||||
for (int i = 0; i < chunkSize; ++i) {
|
for (const auto i : c10::irange(chunkSize)) {
|
||||||
DeserializeBlob(tensor_proto.string_data(i), &temp_blob);
|
DeserializeBlob(tensor_proto.string_data(i), &temp_blob);
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
raw_ptr = tensor->raw_mutable_data(temp_blob.meta());
|
raw_ptr = tensor->raw_mutable_data(temp_blob.meta());
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
#include "caffe2/core/operator_schema.h"
|
#include "caffe2/core/operator_schema.h"
|
||||||
#include "caffe2/core/logging.h"
|
#include "caffe2/core/logging.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
OpSchema::OpSchema(const string& type, const string& file, const int line)
|
OpSchema::OpSchema(const string& type, const string& file, const int line)
|
||||||
|
|
@ -256,7 +258,7 @@ OpSchema& OpSchema::IdenticalTypeAndShapeOfMultipleInputs(
|
||||||
return TensorInferenceFunction(
|
return TensorInferenceFunction(
|
||||||
[indices](const OperatorDef&, const vector<TensorShape>& input_types) {
|
[indices](const OperatorDef&, const vector<TensorShape>& input_types) {
|
||||||
vector<TensorShape> out(indices.size());
|
vector<TensorShape> out(indices.size());
|
||||||
for (int i = 0; i < indices.size(); i++) {
|
for (const auto i : c10::irange(indices.size())) {
|
||||||
out[i] = input_types[indices.at(i)];
|
out[i] = input_types[indices.at(i)];
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
ProfDAGCounters::ProfDAGCounters(const std::shared_ptr<const NetDef>& net_def) {
|
ProfDAGCounters::ProfDAGCounters(const std::shared_ptr<const NetDef>& net_def) {
|
||||||
|
|
@ -81,7 +83,7 @@ void ProfDAGCounters::ReportRunEnd() {
|
||||||
CaffeMap<std::string, float> cum_per_type_time_run;
|
CaffeMap<std::string, float> cum_per_type_time_run;
|
||||||
CaffeMap<std::string, float> cum_per_type_invocations_run;
|
CaffeMap<std::string, float> cum_per_type_invocations_run;
|
||||||
std::vector<float> per_op_time_run(report_.op_types_.size(), 0.0);
|
std::vector<float> per_op_time_run(report_.op_types_.size(), 0.0);
|
||||||
for (auto op_id = 0U; op_id < report_.op_types_.size(); ++op_id) {
|
for (const auto op_id : c10::irange(report_.op_types_.size())) {
|
||||||
// check that we have valid times, otherwise return;
|
// check that we have valid times, otherwise return;
|
||||||
// times might not be valid if network execution ended prematurely
|
// times might not be valid if network execution ended prematurely
|
||||||
// because of operator errors
|
// because of operator errors
|
||||||
|
|
@ -109,7 +111,7 @@ void ProfDAGCounters::ReportRunEnd() {
|
||||||
// all operator times are valid, update report stats
|
// all operator times are valid, update report stats
|
||||||
report_.runtime_stats_ += ProfDAGStats(runtime);
|
report_.runtime_stats_ += ProfDAGStats(runtime);
|
||||||
|
|
||||||
for (auto op_id = 0U; op_id < report_.op_types_.size(); ++op_id) {
|
for (const auto op_id : c10::irange(report_.op_types_.size())) {
|
||||||
report_.time_per_op_total_[op_id] += ProfDAGStats(per_op_time_run[op_id]);
|
report_.time_per_op_total_[op_id] += ProfDAGStats(per_op_time_run[op_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -159,7 +161,7 @@ ProfDAGProtos ProfDAGReport::GetPerOperatorCost() const {
|
||||||
ProfDAGProtos prof_dag_protos;
|
ProfDAGProtos prof_dag_protos;
|
||||||
prof_dag_protos.set_net_name(net_name_);
|
prof_dag_protos.set_net_name(net_name_);
|
||||||
if (hasStats()) {
|
if (hasStats()) {
|
||||||
for (auto op_id = 0U; op_id < op_types_.size(); op_id++) {
|
for (const auto op_id : c10::irange(op_types_.size())) {
|
||||||
const string& op_type = op_types_[op_id];
|
const string& op_type = op_types_[op_id];
|
||||||
auto buf = prof_dag_protos.add_stats();
|
auto buf = prof_dag_protos.add_stats();
|
||||||
std::string op_output_name =
|
std::string op_output_name =
|
||||||
|
|
@ -208,7 +210,7 @@ ProfDAGReport& ProfDAGReport::operator+=(const ProfDAGReport& rhs) {
|
||||||
op_types_.size(),
|
op_types_.size(),
|
||||||
rhs.op_types_.size(),
|
rhs.op_types_.size(),
|
||||||
"Incompatible nets to add counters");
|
"Incompatible nets to add counters");
|
||||||
for (auto idx = 0U; idx < op_types_.size(); ++idx) {
|
for (const auto idx : c10::irange(op_types_.size())) {
|
||||||
CAFFE_ENFORCE_EQ(
|
CAFFE_ENFORCE_EQ(
|
||||||
op_types_[idx],
|
op_types_[idx],
|
||||||
rhs.op_types_[idx],
|
rhs.op_types_[idx],
|
||||||
|
|
@ -229,7 +231,7 @@ ProfDAGReport& ProfDAGReport::operator+=(const ProfDAGReport& rhs) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do the addition
|
// Do the addition
|
||||||
for (auto idx = 0U; idx < time_per_op_total_.size(); ++idx) {
|
for (const auto idx : c10::irange(time_per_op_total_.size())) {
|
||||||
time_per_op_total_[idx] += rhs.time_per_op_total_.at(idx);
|
time_per_op_total_[idx] += rhs.time_per_op_total_.at(idx);
|
||||||
}
|
}
|
||||||
for (auto& item : time_per_op_type_total_) {
|
for (auto& item : time_per_op_type_total_) {
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
#include "caffe2/operators/generate_proposals_op_util_boxes.h"
|
#include "caffe2/operators/generate_proposals_op_util_boxes.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
static void AddConstInput(
|
static void AddConstInput(
|
||||||
|
|
@ -719,7 +721,7 @@ TEST(GenerateProposalsTest, TestRealDownSampledRotated) {
|
||||||
// Verify that the resulting angles are correct
|
// Verify that the resulting angles are correct
|
||||||
auto rois_data =
|
auto rois_data =
|
||||||
Eigen::Map<const ERMatXf>(rois.data<float>(), rois.size(0), rois.size(1));
|
Eigen::Map<const ERMatXf>(rois.data<float>(), rois.size(0), rois.size(1));
|
||||||
for (int i = 0; i < rois.size(0); ++i) {
|
for (const auto i : c10::irange(rois.size(0))) {
|
||||||
EXPECT_LE(std::abs(rois_data(i, 5) - expected_angle), 1e-4);
|
EXPECT_LE(std::abs(rois_data(i, 5) - expected_angle), 1e-4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include "caffe2/utils/eigen_utils.h"
|
#include "caffe2/utils/eigen_utils.h"
|
||||||
#include "caffe2/utils/math.h"
|
#include "caffe2/utils/math.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
// Bounding box utils for generate_proposals_op
|
// Bounding box utils for generate_proposals_op
|
||||||
// Reference: facebookresearch/Detectron/detectron/utils/boxes.py
|
// Reference: facebookresearch/Detectron/detectron/utils/boxes.py
|
||||||
|
|
||||||
|
|
@ -148,7 +150,7 @@ EArrXXt<typename Derived1::Scalar> bbox_transform_rotated(
|
||||||
const int period = angle_bound_hi - angle_bound_lo;
|
const int period = angle_bound_hi - angle_bound_lo;
|
||||||
CAFFE_ENFORCE(period > 0 && period % 180 == 0);
|
CAFFE_ENFORCE(period > 0 && period % 180 == 0);
|
||||||
auto angles = pred_boxes.col(4);
|
auto angles = pred_boxes.col(4);
|
||||||
for (int i = 0; i < angles.size(); ++i) {
|
for (const auto i : c10::irange(angles.size())) {
|
||||||
if (angles[i] < angle_bound_lo) {
|
if (angles[i] < angle_bound_lo) {
|
||||||
angles[i] += T(period);
|
angles[i] += T(period);
|
||||||
} else if (angles[i] > angle_bound_hi) {
|
} else if (angles[i] > angle_bound_hi) {
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@
|
||||||
#include "caffe2/utils/eigen_utils.h"
|
#include "caffe2/utils/eigen_utils.h"
|
||||||
#include "caffe2/utils/math.h"
|
#include "caffe2/utils/math.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
namespace utils {
|
namespace utils {
|
||||||
|
|
||||||
|
|
@ -148,7 +150,7 @@ std::vector<int> soft_nms_cpu_upright(
|
||||||
EArrX ovr = inter / (areas[i] + GetSubArray(areas, rest_indices) - inter);
|
EArrX ovr = inter / (areas[i] + GetSubArray(areas, rest_indices) - inter);
|
||||||
|
|
||||||
// Update scores based on computed IoU, overlap threshold and NMS method
|
// Update scores based on computed IoU, overlap threshold and NMS method
|
||||||
for (int j = 0; j < rest_indices.size(); ++j) {
|
for (const auto j : c10::irange(rest_indices.size())) {
|
||||||
typename Derived2::Scalar weight;
|
typename Derived2::Scalar weight;
|
||||||
switch (method) {
|
switch (method) {
|
||||||
case 1: // Linear
|
case 1: // Linear
|
||||||
|
|
@ -569,7 +571,7 @@ std::vector<int> nms_cpu_rotated(
|
||||||
order.data() + 1, order.size() - 1);
|
order.data() + 1, order.size() - 1);
|
||||||
|
|
||||||
EArrX inter(rest_indices.size());
|
EArrX inter(rest_indices.size());
|
||||||
for (int j = 0; j < rest_indices.size(); ++j) {
|
for (const auto j : c10::irange(rest_indices.size())) {
|
||||||
inter[j] = rotated_rect_intersection(
|
inter[j] = rotated_rect_intersection(
|
||||||
rotated_rects[i], rotated_rects[rest_indices[j]]);
|
rotated_rects[i], rotated_rects[rest_indices[j]]);
|
||||||
}
|
}
|
||||||
|
|
@ -638,7 +640,7 @@ std::vector<int> soft_nms_cpu_rotated(
|
||||||
std::swap(pending(0), pending(max_pos));
|
std::swap(pending(0), pending(max_pos));
|
||||||
const auto& rest_indices = pending.tail(pending.size() - 1);
|
const auto& rest_indices = pending.tail(pending.size() - 1);
|
||||||
EArrX inter(rest_indices.size());
|
EArrX inter(rest_indices.size());
|
||||||
for (int j = 0; j < rest_indices.size(); ++j) {
|
for (const auto j : c10::irange(rest_indices.size())) {
|
||||||
inter[j] = rotated_rect_intersection(
|
inter[j] = rotated_rect_intersection(
|
||||||
rotated_rects[i], rotated_rects[rest_indices[j]]);
|
rotated_rects[i], rotated_rects[rest_indices[j]]);
|
||||||
}
|
}
|
||||||
|
|
@ -646,7 +648,7 @@ std::vector<int> soft_nms_cpu_rotated(
|
||||||
|
|
||||||
// Update scores based on computed IoU, overlap threshold and NMS method
|
// Update scores based on computed IoU, overlap threshold and NMS method
|
||||||
// TODO (viswanath): Should angle info be included as well while filtering?
|
// TODO (viswanath): Should angle info be included as well while filtering?
|
||||||
for (int j = 0; j < rest_indices.size(); ++j) {
|
for (const auto j : c10::irange(rest_indices.size())) {
|
||||||
typename Derived2::Scalar weight;
|
typename Derived2::Scalar weight;
|
||||||
switch (method) {
|
switch (method) {
|
||||||
case 1: // Linear
|
case 1: // Linear
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
TEST(UtilsNMSTest, TestNMS) {
|
TEST(UtilsNMSTest, TestNMS) {
|
||||||
|
|
@ -18,7 +20,7 @@ TEST(UtilsNMSTest, TestNMS) {
|
||||||
// test utils::nms_cpu without indices input
|
// test utils::nms_cpu without indices input
|
||||||
auto proposals = input.block(0, 0, input.rows(), 4);
|
auto proposals = input.block(0, 0, input.rows(), 4);
|
||||||
auto scores = input.col(4);
|
auto scores = input.col(4);
|
||||||
for (int i = 0; i < input_thresh.size(); i++) {
|
for (const auto i : c10::irange(input_thresh.size())) {
|
||||||
auto cur_out = utils::nms_cpu(
|
auto cur_out = utils::nms_cpu(
|
||||||
proposals, scores, input_thresh[i], true /* legacy_plus_one */);
|
proposals, scores, input_thresh[i], true /* legacy_plus_one */);
|
||||||
EXPECT_EQ(output_gt[i], cur_out);
|
EXPECT_EQ(output_gt[i], cur_out);
|
||||||
|
|
@ -31,7 +33,7 @@ TEST(UtilsNMSTest, TestNMS) {
|
||||||
indices.data(),
|
indices.data(),
|
||||||
indices.data() + indices.size(),
|
indices.data() + indices.size(),
|
||||||
[&scores](int lhs, int rhs) { return scores(lhs) > scores(rhs); });
|
[&scores](int lhs, int rhs) { return scores(lhs) > scores(rhs); });
|
||||||
for (int i = 0; i < input_thresh.size(); i++) {
|
for (const auto i : c10::irange(input_thresh.size())) {
|
||||||
auto cur_out = utils::nms_cpu(
|
auto cur_out = utils::nms_cpu(
|
||||||
proposals,
|
proposals,
|
||||||
scores,
|
scores,
|
||||||
|
|
@ -45,7 +47,7 @@ TEST(UtilsNMSTest, TestNMS) {
|
||||||
// test utils::nms_cpu with topN
|
// test utils::nms_cpu with topN
|
||||||
std::vector<int> top_n = {1, 1, 2, 2, 3};
|
std::vector<int> top_n = {1, 1, 2, 2, 3};
|
||||||
auto gt_out = output_gt;
|
auto gt_out = output_gt;
|
||||||
for (int i = 0; i < input_thresh.size(); i++) {
|
for (const auto i : c10::irange(input_thresh.size())) {
|
||||||
auto cur_out = utils::nms_cpu(
|
auto cur_out = utils::nms_cpu(
|
||||||
proposals,
|
proposals,
|
||||||
scores,
|
scores,
|
||||||
|
|
@ -149,7 +151,7 @@ TEST(UtilsNMSTest, TestSoftNMS) {
|
||||||
9.99834776e-01, 9.99737203e-01;
|
9.99834776e-01, 9.99737203e-01;
|
||||||
|
|
||||||
Eigen::ArrayXf out_scores;
|
Eigen::ArrayXf out_scores;
|
||||||
for (int i = 0; i < method.size(); ++i) {
|
for (const auto i : c10::irange(method.size())) {
|
||||||
LOG(INFO) << "Testing SoftNMS with method=" << method[i]
|
LOG(INFO) << "Testing SoftNMS with method=" << method[i]
|
||||||
<< ", overlap_thresh=" << overlap_thresh[i];
|
<< ", overlap_thresh=" << overlap_thresh[i];
|
||||||
const auto& expected_scores = scores_gt.col(i);
|
const auto& expected_scores = scores_gt.col(i);
|
||||||
|
|
@ -254,7 +256,7 @@ TEST(UtilsNMSTest, TestNMSRotatedAngle0) {
|
||||||
proposals.col(3) = input.col(3) - input.col(1) + 1.0; // h = y2 - y1 + 1
|
proposals.col(3) = input.col(3) - input.col(1) + 1.0; // h = y2 - y1 + 1
|
||||||
|
|
||||||
auto scores = input.col(4);
|
auto scores = input.col(4);
|
||||||
for (int i = 0; i < input_thresh.size(); i++) {
|
for (const auto i : c10::irange(input_thresh.size())) {
|
||||||
auto cur_out = utils::nms_cpu(
|
auto cur_out = utils::nms_cpu(
|
||||||
proposals, scores, input_thresh[i], true /* legacy_plus_one */);
|
proposals, scores, input_thresh[i], true /* legacy_plus_one */);
|
||||||
EXPECT_EQ(output_gt[i], cur_out);
|
EXPECT_EQ(output_gt[i], cur_out);
|
||||||
|
|
@ -267,7 +269,7 @@ TEST(UtilsNMSTest, TestNMSRotatedAngle0) {
|
||||||
indices.data(),
|
indices.data(),
|
||||||
indices.data() + indices.size(),
|
indices.data() + indices.size(),
|
||||||
[&scores](int lhs, int rhs) { return scores(lhs) > scores(rhs); });
|
[&scores](int lhs, int rhs) { return scores(lhs) > scores(rhs); });
|
||||||
for (int i = 0; i < input_thresh.size(); i++) {
|
for (const auto i : c10::irange(input_thresh.size())) {
|
||||||
auto cur_out = utils::nms_cpu(
|
auto cur_out = utils::nms_cpu(
|
||||||
proposals,
|
proposals,
|
||||||
scores,
|
scores,
|
||||||
|
|
@ -281,7 +283,7 @@ TEST(UtilsNMSTest, TestNMSRotatedAngle0) {
|
||||||
// test utils::nms_cpu with topN
|
// test utils::nms_cpu with topN
|
||||||
std::vector<int> top_n = {1, 1, 2, 2, 3};
|
std::vector<int> top_n = {1, 1, 2, 2, 3};
|
||||||
auto gt_out = output_gt;
|
auto gt_out = output_gt;
|
||||||
for (int i = 0; i < input_thresh.size(); i++) {
|
for (const auto i : c10::irange(input_thresh.size())) {
|
||||||
auto cur_out = utils::nms_cpu(
|
auto cur_out = utils::nms_cpu(
|
||||||
proposals,
|
proposals,
|
||||||
scores,
|
scores,
|
||||||
|
|
@ -342,7 +344,7 @@ TEST(UtilsNMSTest, TestSoftNMSRotatedAngle0) {
|
||||||
9.99834776e-01, 9.99737203e-01;
|
9.99834776e-01, 9.99737203e-01;
|
||||||
|
|
||||||
Eigen::ArrayXf out_scores;
|
Eigen::ArrayXf out_scores;
|
||||||
for (int i = 0; i < method.size(); ++i) {
|
for (const auto i : c10::irange(method.size())) {
|
||||||
LOG(INFO) << "Testing SoftNMS with method=" << method[i]
|
LOG(INFO) << "Testing SoftNMS with method=" << method[i]
|
||||||
<< ", overlap_thresh=" << overlap_thresh[i];
|
<< ", overlap_thresh=" << overlap_thresh[i];
|
||||||
const auto& expected_scores = scores_gt.col(i);
|
const auto& expected_scores = scores_gt.col(i);
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include "caffe2/operators/half_float_ops.h"
|
#include "caffe2/operators/half_float_ops.h"
|
||||||
#include "caffe2/utils/conversions.h"
|
#include "caffe2/utils/conversions.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
C10_DECLARE_string(caffe_test_root);
|
C10_DECLARE_string(caffe_test_root);
|
||||||
|
|
||||||
|
|
@ -57,7 +59,7 @@ TEST(Float16, SimpleTest) {
|
||||||
const TensorCPU& resultTensor = resultBlob->Get<Tensor>();
|
const TensorCPU& resultTensor = resultBlob->Get<Tensor>();
|
||||||
EXPECT_EQ(resultTensor.numel(), 5);
|
EXPECT_EQ(resultTensor.numel(), 5);
|
||||||
|
|
||||||
for (auto i = 0; i < data.size(); ++i) {
|
for (const auto i : c10::irange(data.size())) {
|
||||||
EXPECT_NEAR(resultTensor.data<float>()[i], data[i], 0.01);
|
EXPECT_NEAR(resultTensor.data<float>()[i], data[i], 0.01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -71,7 +73,7 @@ TEST(Float16, UniformDistributionTest) {
|
||||||
int64_t size = 5000000L;
|
int64_t size = 5000000L;
|
||||||
std::vector<int64_t> shape = {size, 32};
|
std::vector<int64_t> shape = {size, 32};
|
||||||
long tot_size = shape[0];
|
long tot_size = shape[0];
|
||||||
for (int i = 1; i < shape.size(); i++) {
|
for (const auto i : c10::irange(1, shape.size())) {
|
||||||
tot_size *= shape[i];
|
tot_size *= shape[i];
|
||||||
}
|
}
|
||||||
caffe2::AddArgument<std::vector<int64_t>>("shape", shape, &def);
|
caffe2::AddArgument<std::vector<int64_t>>("shape", shape, &def);
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@
|
||||||
|
|
||||||
#include "caffe2/operators/string_ops.h"
|
#include "caffe2/operators/string_ops.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
class StringJoinOpTest : public testing::Test {
|
class StringJoinOpTest : public testing::Test {
|
||||||
|
|
@ -43,7 +45,7 @@ TEST_F(StringJoinOpTest, testString1DJoin) {
|
||||||
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
||||||
tensor->Resize(input.size());
|
tensor->Resize(input.size());
|
||||||
auto* data = tensor->template mutable_data<std::string>();
|
auto* data = tensor->template mutable_data<std::string>();
|
||||||
for (int i = 0; i < input.size(); ++i) {
|
for (const auto i : c10::irange(input.size())) {
|
||||||
*data++ = input[i];
|
*data++ = input[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,8 +65,8 @@ TEST_F(StringJoinOpTest, testString2DJoin) {
|
||||||
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
||||||
tensor->Resize(input.size(), input[0].size());
|
tensor->Resize(input.size(), input[0].size());
|
||||||
auto* data = tensor->template mutable_data<std::string>();
|
auto* data = tensor->template mutable_data<std::string>();
|
||||||
for (int i = 0; i < input.size(); ++i) {
|
for (const auto i : c10::irange(input.size())) {
|
||||||
for (int j = 0; j < input[0].size(); ++j) {
|
for (const auto j : c10::irange(input[0].size())) {
|
||||||
*data++ = input[i][j];
|
*data++ = input[i][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -83,7 +85,7 @@ TEST_F(StringJoinOpTest, testFloat1DJoin) {
|
||||||
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
||||||
tensor->Resize(input.size());
|
tensor->Resize(input.size());
|
||||||
auto* data = tensor->template mutable_data<float>();
|
auto* data = tensor->template mutable_data<float>();
|
||||||
for (int i = 0; i < input.size(); ++i) {
|
for (const auto i : c10::irange(input.size())) {
|
||||||
*data++ = input[i];
|
*data++ = input[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -103,8 +105,8 @@ TEST_F(StringJoinOpTest, testFloat2DJoin) {
|
||||||
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
||||||
tensor->Resize(input.size(), input[0].size());
|
tensor->Resize(input.size(), input[0].size());
|
||||||
auto* data = tensor->template mutable_data<float>();
|
auto* data = tensor->template mutable_data<float>();
|
||||||
for (int i = 0; i < input.size(); ++i) {
|
for (const auto i : c10::irange(input.size())) {
|
||||||
for (int j = 0; j < input[0].size(); ++j) {
|
for (const auto j : c10::irange(input[0].size())) {
|
||||||
*data++ = input[i][j];
|
*data++ = input[i][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -123,8 +125,8 @@ TEST_F(StringJoinOpTest, testLong2DJoin) {
|
||||||
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
auto* tensor = BlobGetMutableTensor(blob.get(), CPU);
|
||||||
tensor->Resize(input.size(), input[0].size());
|
tensor->Resize(input.size(), input[0].size());
|
||||||
auto* data = tensor->template mutable_data<int64_t>();
|
auto* data = tensor->template mutable_data<int64_t>();
|
||||||
for (int i = 0; i < input.size(); ++i) {
|
for (const auto i : c10::irange(input.size())) {
|
||||||
for (int j = 0; j < input[0].size(); ++j) {
|
for (const auto j : c10::irange(input[0].size())) {
|
||||||
*data++ = input[i][j];
|
*data++ = input[i][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,13 +5,15 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
Tokenizer::Tokenizer(const std::vector<char>& delims, char escape)
|
Tokenizer::Tokenizer(const std::vector<char>& delims, char escape)
|
||||||
: escape_(escape) {
|
: escape_(escape) {
|
||||||
reset();
|
reset();
|
||||||
std::memset(delimTable_, 0, sizeof(delimTable_));
|
std::memset(delimTable_, 0, sizeof(delimTable_));
|
||||||
for (int i = 0; i < delims.size(); ++i) {
|
for (const auto i : c10::irange(delims.size())) {
|
||||||
delimTable_[(unsigned char)delims.at(i)] = i + 1;
|
delimTable_[(unsigned char)delims.at(i)] = i + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,8 @@
|
||||||
#include "caffe2/operators/text_file_reader_utils.h"
|
#include "caffe2/operators/text_file_reader_utils.h"
|
||||||
#include "caffe2/utils/string_utils.h"
|
#include "caffe2/utils/string_utils.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
|
|
@ -31,20 +33,20 @@ TEST(TextFileReaderUtilsTest, TokenizeTest) {
|
||||||
{1, "Second"}};
|
{1, "Second"}};
|
||||||
|
|
||||||
EXPECT_EQ(expected.size(), tokenized.tokens().size());
|
EXPECT_EQ(expected.size(), tokenized.tokens().size());
|
||||||
for (int i = 0; i < expected.size(); ++i) {
|
for (const auto i : c10::irange(expected.size())) {
|
||||||
const auto& token = tokenized.tokens().at(i);
|
const auto& token = tokenized.tokens().at(i);
|
||||||
EXPECT_EQ(expected.at(i).first, token.startDelimId);
|
EXPECT_EQ(expected.at(i).first, token.startDelimId);
|
||||||
EXPECT_EQ(expected.at(i).second, std::string(token.start, token.end));
|
EXPECT_EQ(expected.at(i).second, std::string(token.start, token.end));
|
||||||
}
|
}
|
||||||
|
|
||||||
// try each of the subsplits
|
// try each of the subsplits
|
||||||
for (int i = 0; i < ch.size() - 1; ++i) {
|
for (const auto i : c10::irange(ch.size() - 1)) {
|
||||||
tokenizer.reset();
|
tokenizer.reset();
|
||||||
char* mid = &ch.front() + i;
|
char* mid = &ch.front() + i;
|
||||||
|
|
||||||
tokenizer.next(&ch.front(), mid, tokenized);
|
tokenizer.next(&ch.front(), mid, tokenized);
|
||||||
EXPECT_GE(expected.size(), tokenized.tokens().size());
|
EXPECT_GE(expected.size(), tokenized.tokens().size());
|
||||||
for (int j = 0; j < tokenized.tokens().size(); ++j) {
|
for (const auto j : c10::irange(tokenized.tokens().size())) {
|
||||||
const auto& token = tokenized.tokens().at(j);
|
const auto& token = tokenized.tokens().at(j);
|
||||||
EXPECT_EQ(expected.at(j).first, token.startDelimId);
|
EXPECT_EQ(expected.at(j).first, token.startDelimId);
|
||||||
EXPECT_EQ(expected.at(j).second, std::string(token.start, token.end));
|
EXPECT_EQ(expected.at(j).second, std::string(token.start, token.end));
|
||||||
|
|
@ -53,7 +55,7 @@ TEST(TextFileReaderUtilsTest, TokenizeTest) {
|
||||||
|
|
||||||
tokenizer.next(mid, &ch.back() + 1, tokenized);
|
tokenizer.next(mid, &ch.back() + 1, tokenized);
|
||||||
EXPECT_EQ(expected.size(), s1 + tokenized.tokens().size());
|
EXPECT_EQ(expected.size(), s1 + tokenized.tokens().size());
|
||||||
for (int j = 0; j < tokenized.tokens().size(); ++j) {
|
for (const auto j : c10::irange(tokenized.tokens().size())) {
|
||||||
const auto& token = tokenized.tokens().at(j);
|
const auto& token = tokenized.tokens().at(j);
|
||||||
EXPECT_EQ(expected.at(j + s1).first, token.startDelimId);
|
EXPECT_EQ(expected.at(j + s1).first, token.startDelimId);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ(
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@
|
||||||
#include "caffe2/core/types.h"
|
#include "caffe2/core/types.h"
|
||||||
#include "caffe2/perfkernels/common.h"
|
#include "caffe2/perfkernels/common.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -27,7 +29,7 @@ static bool EmbeddingLookupGenericSlow(
|
||||||
bool normalize_by_lengths,
|
bool normalize_by_lengths,
|
||||||
OutType* out) {
|
OutType* out) {
|
||||||
int64_t current = 0;
|
int64_t current = 0;
|
||||||
for (int m = 0; m < output_size; ++m) {
|
for (const auto m : c10::irange(output_size)) {
|
||||||
memset(out, 0, sizeof(OutType) * block_size);
|
memset(out, 0, sizeof(OutType) * block_size);
|
||||||
if (current + lengths[m] > index_size) {
|
if (current + lengths[m] > index_size) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -52,7 +54,7 @@ static bool EmbeddingLookupGenericSlow(
|
||||||
w = w * scale_bias[2 * indices[current]];
|
w = w * scale_bias[2 * indices[current]];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] += w * input[block_size * indices[current] + j] + b;
|
out[j] += w * input[block_size * indices[current] + j] + b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -60,7 +62,7 @@ static bool EmbeddingLookupGenericSlow(
|
||||||
}
|
}
|
||||||
if (normalize_by_lengths && lengths[m]) {
|
if (normalize_by_lengths && lengths[m]) {
|
||||||
float scale = 1.f / lengths[m];
|
float scale = 1.f / lengths[m];
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] *= scale;
|
out[j] *= scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#include "caffe2/perfkernels/embedding_lookup_idx.h"
|
#include "caffe2/perfkernels/embedding_lookup_idx.h"
|
||||||
|
|
||||||
#include <c10/util/Half.h>
|
#include <c10/util/Half.h>
|
||||||
|
#include <c10/util/irange.h>
|
||||||
#include "caffe2/core/common.h"
|
#include "caffe2/core/common.h"
|
||||||
#include "caffe2/core/logging.h"
|
#include "caffe2/core/logging.h"
|
||||||
#include "caffe2/perfkernels/common.h"
|
#include "caffe2/perfkernels/common.h"
|
||||||
|
|
@ -29,7 +30,7 @@ static bool EmbeddingLookupGenericSlowIdx(
|
||||||
bool normalize_by_lengths,
|
bool normalize_by_lengths,
|
||||||
OutType* out) {
|
OutType* out) {
|
||||||
int64_t current = 0;
|
int64_t current = 0;
|
||||||
for (int m = 0; m < output_size; ++m) {
|
for (const auto m : c10::irange(output_size)) {
|
||||||
memset(out, 0, sizeof(OutType) * block_size);
|
memset(out, 0, sizeof(OutType) * block_size);
|
||||||
if (current != offsets[m] - offsets[0]) {
|
if (current != offsets[m] - offsets[0]) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -37,7 +38,7 @@ static bool EmbeddingLookupGenericSlowIdx(
|
||||||
int64_t start_offset = offsets[m];
|
int64_t start_offset = offsets[m];
|
||||||
int64_t end_offset = offsets[m + 1];
|
int64_t end_offset = offsets[m + 1];
|
||||||
int64_t length = end_offset - start_offset;
|
int64_t length = end_offset - start_offset;
|
||||||
for (int i = start_offset; i < end_offset; ++i) {
|
for (const auto i : c10::irange(start_offset, end_offset)) {
|
||||||
int64_t idx = indices[current];
|
int64_t idx = indices[current];
|
||||||
if (idx < 0 || idx >= data_size) {
|
if (idx < 0 || idx >= data_size) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -57,7 +58,7 @@ static bool EmbeddingLookupGenericSlowIdx(
|
||||||
w = w * scale_bias[2 * indices[current]];
|
w = w * scale_bias[2 * indices[current]];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] += w * input[block_size * indices[current] + j] + b;
|
out[j] += w * input[block_size * indices[current] + j] + b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -65,7 +66,7 @@ static bool EmbeddingLookupGenericSlowIdx(
|
||||||
}
|
}
|
||||||
if (normalize_by_lengths && length) {
|
if (normalize_by_lengths && length) {
|
||||||
float scale = 1.f / length;
|
float scale = 1.f / length;
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] *= scale;
|
out[j] *= scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include "caffe2/perfkernels/common.h"
|
#include "caffe2/perfkernels/common.h"
|
||||||
#include "caffe2/utils/cpuid.h"
|
#include "caffe2/utils/cpuid.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -31,7 +33,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlow(
|
||||||
const auto scale_bias_offset = 8 / sizeof(InType);
|
const auto scale_bias_offset = 8 / sizeof(InType);
|
||||||
const int64_t fused_block_size = block_size + scale_bias_offset;
|
const int64_t fused_block_size = block_size + scale_bias_offset;
|
||||||
int64_t current = 0;
|
int64_t current = 0;
|
||||||
for (int m = 0; m < output_size; ++m) {
|
for (const auto m : c10::irange(output_size)) {
|
||||||
memset(out, 0, sizeof(OutType) * block_size);
|
memset(out, 0, sizeof(OutType) * block_size);
|
||||||
if (current + lengths[m] > index_size) {
|
if (current + lengths[m] > index_size) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -58,7 +60,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlow(
|
||||||
const float scale = weight * scale_bias[0];
|
const float scale = weight * scale_bias[0];
|
||||||
const float bias = weight * scale_bias[1];
|
const float bias = weight * scale_bias[1];
|
||||||
|
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] += scale * input[fused_block_size * indices[current] + j] + bias;
|
out[j] += scale * input[fused_block_size * indices[current] + j] + bias;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -66,7 +68,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlow(
|
||||||
}
|
}
|
||||||
if (normalize_by_lengths && lengths[m]) {
|
if (normalize_by_lengths && lengths[m]) {
|
||||||
float scale = 1.f / lengths[m];
|
float scale = 1.f / lengths[m];
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] *= scale;
|
out[j] *= scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include "caffe2/perfkernels/common.h"
|
#include "caffe2/perfkernels/common.h"
|
||||||
#include "caffe2/utils/cpuid.h"
|
#include "caffe2/utils/cpuid.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -31,7 +33,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
|
||||||
const auto scale_bias_offset = 8 / sizeof(InType);
|
const auto scale_bias_offset = 8 / sizeof(InType);
|
||||||
const int64_t fused_block_size = block_size + scale_bias_offset;
|
const int64_t fused_block_size = block_size + scale_bias_offset;
|
||||||
int64_t current = 0;
|
int64_t current = 0;
|
||||||
for (int m = 0; m < output_size; ++m) {
|
for (const auto m : c10::irange(output_size)) {
|
||||||
memset(out, 0, sizeof(OutType) * block_size);
|
memset(out, 0, sizeof(OutType) * block_size);
|
||||||
if (current != offsets[m] - offsets[0]) {
|
if (current != offsets[m] - offsets[0]) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -39,7 +41,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
|
||||||
int64_t start_offset = offsets[m];
|
int64_t start_offset = offsets[m];
|
||||||
int64_t end_offset = offsets[m + 1];
|
int64_t end_offset = offsets[m + 1];
|
||||||
int64_t length = end_offset - start_offset;
|
int64_t length = end_offset - start_offset;
|
||||||
for (int i = start_offset; i < end_offset; ++i) {
|
for (const auto i : c10::irange(start_offset, end_offset)) {
|
||||||
int64_t idx = indices[current];
|
int64_t idx = indices[current];
|
||||||
if (idx < 0 || idx >= data_size) {
|
if (idx < 0 || idx >= data_size) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -61,7 +63,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
|
||||||
const float scale = weight * scale_bias[0];
|
const float scale = weight * scale_bias[0];
|
||||||
const float bias = weight * scale_bias[1];
|
const float bias = weight * scale_bias[1];
|
||||||
|
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] += scale * input[fused_block_size * indices[current] + j] + bias;
|
out[j] += scale * input[fused_block_size * indices[current] + j] + bias;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,7 +71,7 @@ static bool Fused8BitRowwiseEmbeddingLookupGenericSlowIdx(
|
||||||
}
|
}
|
||||||
if (normalize_by_lengths && length) {
|
if (normalize_by_lengths && length) {
|
||||||
float scale = 1.f / length;
|
float scale = 1.f / length;
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (const auto j : c10::irange(block_size)) {
|
||||||
out[j] *= scale;
|
out[j] *= scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
using std::uint64_t;
|
using std::uint64_t;
|
||||||
using std::uint8_t;
|
using std::uint8_t;
|
||||||
|
|
||||||
|
|
@ -65,7 +67,7 @@ void quantize_and_compress__avx2(
|
||||||
|
|
||||||
// basic info
|
// basic info
|
||||||
float minimum_element = INFINITY, maximum_element = -INFINITY;
|
float minimum_element = INFINITY, maximum_element = -INFINITY;
|
||||||
for (auto i = 0; i < input_size; ++i) {
|
for (const auto i : c10::irange(input_size)) {
|
||||||
minimum_element =
|
minimum_element =
|
||||||
(input_data[i] < minimum_element) ? input_data[i] : minimum_element;
|
(input_data[i] < minimum_element) ? input_data[i] : minimum_element;
|
||||||
maximum_element =
|
maximum_element =
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,8 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
using std::uint64_t;
|
using std::uint64_t;
|
||||||
using std::uint8_t;
|
using std::uint8_t;
|
||||||
|
|
||||||
|
|
@ -32,7 +34,7 @@ void quantize_and_compress__base(
|
||||||
|
|
||||||
// basic info
|
// basic info
|
||||||
float minimum_element = INFINITY, maximum_element = -INFINITY;
|
float minimum_element = INFINITY, maximum_element = -INFINITY;
|
||||||
for (auto i = 0; i < input_size; ++i) {
|
for (const auto i : c10::irange(input_size)) {
|
||||||
minimum_element =
|
minimum_element =
|
||||||
input_data[i] < minimum_element ? input_data[i] : minimum_element;
|
input_data[i] < minimum_element ? input_data[i] : minimum_element;
|
||||||
maximum_element =
|
maximum_element =
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
@ -80,7 +82,7 @@ void fp32_to_bfp14(const float* source, size_t size, float* dest) {
|
||||||
|
|
||||||
void fp32_to_bfp16_scalar(const float* source, size_t size, float* dest) {
|
void fp32_to_bfp16_scalar(const float* source, size_t size, float* dest) {
|
||||||
constexpr int mask = 0xFFFF0000;
|
constexpr int mask = 0xFFFF0000;
|
||||||
for (auto i = 0; i < size; i++) {
|
for (const auto i : c10::irange(size)) {
|
||||||
*(int*)(dest + i) = *(int*)(source + i) & mask;
|
*(int*)(dest + i) = *(int*)(source + i) & mask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace dnnlowp {
|
namespace dnnlowp {
|
||||||
|
|
@ -306,7 +308,7 @@ TensorQuantizationParams NormMinimization::ChooseQuantizationParams(
|
||||||
}
|
}
|
||||||
|
|
||||||
float total_sum = 0;
|
float total_sum = 0;
|
||||||
for (int i = 0; i < bins_f.size(); ++i) {
|
for (const auto i : c10::irange(bins_f.size())) {
|
||||||
total_sum += bins_f[i];
|
total_sum += bins_f[i];
|
||||||
}
|
}
|
||||||
float selected_sum = 0;
|
float selected_sum = 0;
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@
|
||||||
#include "caffe2/core/timer.h"
|
#include "caffe2/core/timer.h"
|
||||||
#include "caffe2/core/workspace.h"
|
#include "caffe2/core/workspace.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
// Constants for user tracepoints
|
// Constants for user tracepoints
|
||||||
|
|
@ -88,7 +90,7 @@ bool BlobsQueue::blockingRead(
|
||||||
DCHECK(canRead());
|
DCHECK(canRead());
|
||||||
auto& result = queue_[reader_ % queue_.size()];
|
auto& result = queue_[reader_ % queue_.size()];
|
||||||
CAFFE_ENFORCE(inputs.size() >= result.size());
|
CAFFE_ENFORCE(inputs.size() >= result.size());
|
||||||
for (auto i = 0; i < result.size(); ++i) {
|
for (const auto i : c10::irange(result.size())) {
|
||||||
auto bytes = BlobStat::sizeBytes(*result[i]);
|
auto bytes = BlobStat::sizeBytes(*result[i]);
|
||||||
CAFFE_EVENT(stats_, queue_dequeued_bytes, bytes, i);
|
CAFFE_EVENT(stats_, queue_dequeued_bytes, bytes, i);
|
||||||
using std::swap;
|
using std::swap;
|
||||||
|
|
@ -160,7 +162,7 @@ void BlobsQueue::doWrite(const std::vector<Blob*>& inputs) {
|
||||||
auto& result = queue_[writer_ % queue_.size()];
|
auto& result = queue_[writer_ % queue_.size()];
|
||||||
CAFFE_ENFORCE(inputs.size() >= result.size());
|
CAFFE_ENFORCE(inputs.size() >= result.size());
|
||||||
const auto& name = name_.c_str();
|
const auto& name = name_.c_str();
|
||||||
for (auto i = 0; i < result.size(); ++i) {
|
for (const auto i : c10::irange(result.size())) {
|
||||||
using std::swap;
|
using std::swap;
|
||||||
swap(*(inputs[i]), *(result[i]));
|
swap(*(inputs[i]), *(result[i]));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include "caffe2/core/net.h"
|
#include "caffe2/core/net.h"
|
||||||
#include "caffe2/proto/caffe2_pb.h"
|
#include "caffe2/proto/caffe2_pb.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
using transform::Graph;
|
using transform::Graph;
|
||||||
|
|
@ -101,7 +103,7 @@ bool CommonSubexpressionEliminationTransform::ReplaceRule(
|
||||||
g.node(parent_idx).children[new_idx] = new_op_parents.at(parent_idx);
|
g.node(parent_idx).children[new_idx] = new_op_parents.at(parent_idx);
|
||||||
|
|
||||||
// Make the parents disown all our outdated siblings.
|
// Make the parents disown all our outdated siblings.
|
||||||
for (int i = 0; i < subgraph.size(); i++) {
|
for (const auto i : c10::irange(subgraph.size())) {
|
||||||
g.node(parent_idx).children.erase(subgraph[i]);
|
g.node(parent_idx).children.erase(subgraph[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include "caffe2/core/net.h"
|
#include "caffe2/core/net.h"
|
||||||
#include "caffe2/proto/caffe2_pb.h"
|
#include "caffe2/proto/caffe2_pb.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
// First, single source traverse through the netdef.
|
// First, single source traverse through the netdef.
|
||||||
|
|
@ -150,7 +152,7 @@ bool PatternNetTransform::ReplaceRule(
|
||||||
std::unordered_map<string, string> external_renaming;
|
std::unordered_map<string, string> external_renaming;
|
||||||
|
|
||||||
// Figure out blob renamings
|
// Figure out blob renamings
|
||||||
for (auto i = 0U; i < match.size(); i++) {
|
for (const auto i : c10::irange(match.size())) {
|
||||||
int g_idx = match[i];
|
int g_idx = match[i];
|
||||||
int p_idx = ordered_ops_[i];
|
int p_idx = ordered_ops_[i];
|
||||||
for (int j = 0; j < p_.node(p_idx).op.input().size(); j++) {
|
for (int j = 0; j < p_.node(p_idx).op.input().size(); j++) {
|
||||||
|
|
@ -179,7 +181,7 @@ bool PatternNetTransform::ReplaceRule(
|
||||||
g.resize_nodes(offset + r_.size());
|
g.resize_nodes(offset + r_.size());
|
||||||
|
|
||||||
// Append all the new operators.
|
// Append all the new operators.
|
||||||
for (auto i = 0U; i < r_.size(); i++) {
|
for (const auto i : c10::irange(r_.size())) {
|
||||||
int new_node_idx = offset + i;
|
int new_node_idx = offset + i;
|
||||||
|
|
||||||
OperatorDef new_op = r_.node(i).op;
|
OperatorDef new_op = r_.node(i).op;
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@
|
||||||
|
|
||||||
#include "caffe2/core/logging.h"
|
#include "caffe2/core/logging.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
// Common Eigen types that we will often use
|
// Common Eigen types that we will often use
|
||||||
|
|
@ -146,7 +148,7 @@ void GetSubArray(
|
||||||
// using T = typename Derived::Scalar;
|
// using T = typename Derived::Scalar;
|
||||||
|
|
||||||
out_array->derived().resize(indices.size());
|
out_array->derived().resize(indices.size());
|
||||||
for (int i = 0; i < indices.size(); i++) {
|
for (const auto i : c10::irange(indices.size())) {
|
||||||
DCHECK_LT(indices[i], array.size());
|
DCHECK_LT(indices[i], array.size());
|
||||||
(*out_array)[i] = array[indices[i]];
|
(*out_array)[i] = array[indices[i]];
|
||||||
}
|
}
|
||||||
|
|
@ -179,7 +181,7 @@ void GetSubArrayRows(
|
||||||
Eigen::ArrayBase<Derived2>* out_array) {
|
Eigen::ArrayBase<Derived2>* out_array) {
|
||||||
out_array->derived().resize(row_indices.size(), array2d.cols());
|
out_array->derived().resize(row_indices.size(), array2d.cols());
|
||||||
|
|
||||||
for (int i = 0; i < row_indices.size(); i++) {
|
for (const auto i : c10::irange(row_indices.size())) {
|
||||||
DCHECK_LT(row_indices[i], array2d.size());
|
DCHECK_LT(row_indices[i], array2d.size());
|
||||||
out_array->row(i) =
|
out_array->row(i) =
|
||||||
array2d.row(row_indices[i]).template cast<typename Derived2::Scalar>();
|
array2d.row(row_indices[i]).template cast<typename Derived2::Scalar>();
|
||||||
|
|
@ -190,7 +192,7 @@ void GetSubArrayRows(
|
||||||
template <class Derived>
|
template <class Derived>
|
||||||
std::vector<int> GetArrayIndices(const Eigen::ArrayBase<Derived>& array) {
|
std::vector<int> GetArrayIndices(const Eigen::ArrayBase<Derived>& array) {
|
||||||
std::vector<int> ret;
|
std::vector<int> ret;
|
||||||
for (int i = 0; i < array.size(); i++) {
|
for (const auto i : c10::irange(array.size())) {
|
||||||
if (array[i]) {
|
if (array[i]) {
|
||||||
ret.push_back(i);
|
ret.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@
|
||||||
#include "caffe2/utils/conversions.h"
|
#include "caffe2/utils/conversions.h"
|
||||||
#include "caffe2/utils/math.h"
|
#include "caffe2/utils/math.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
TEST(MathTest, GemmNoTransNoTrans) {
|
TEST(MathTest, GemmNoTransNoTrans) {
|
||||||
|
|
@ -456,7 +458,7 @@ class BroadcastTest : public testing::Test {
|
||||||
Y_.mutable_data<float>(),
|
Y_.mutable_data<float>(),
|
||||||
cpu_context_.get());
|
cpu_context_.get());
|
||||||
ASSERT_EQ(Y_data.size(), Y_.numel());
|
ASSERT_EQ(Y_data.size(), Y_.numel());
|
||||||
for (int i = 0; i < Y_data.size(); ++i) {
|
for (const auto i : c10::irange(Y_data.size())) {
|
||||||
EXPECT_FLOAT_EQ(Y_data[i], Y_.data<float>()[i]);
|
EXPECT_FLOAT_EQ(Y_data[i], Y_.data<float>()[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include "caffe2/core/init.h"
|
#include "caffe2/core/init.h"
|
||||||
#include "observers/observer_config.h"
|
#include "observers/observer_config.h"
|
||||||
|
|
||||||
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer ";
|
const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer ";
|
||||||
|
|
@ -97,7 +99,7 @@ static std::string get_tensor_shapes(PerformanceInformation p) {
|
||||||
std::stringstream shape_stream;
|
std::stringstream shape_stream;
|
||||||
if (!p.tensor_shapes.empty()) {
|
if (!p.tensor_shapes.empty()) {
|
||||||
shape_stream << "[";
|
shape_stream << "[";
|
||||||
for (int i = 0; i < p.tensor_shapes.size(); i++) {
|
for (const auto i : c10::irange(p.tensor_shapes.size())) {
|
||||||
shape_stream << "[";
|
shape_stream << "[";
|
||||||
for (int j = 0; j < p.tensor_shapes[i].dims_size(); j++) {
|
for (int j = 0; j < p.tensor_shapes[i].dims_size(); j++) {
|
||||||
shape_stream << p.tensor_shapes[i].dims(j) << ", ";
|
shape_stream << p.tensor_shapes[i].dims(j) << ", ";
|
||||||
|
|
@ -117,7 +119,7 @@ static std::string get_op_args(PerformanceInformation p) {
|
||||||
if (!p.args.empty()) {
|
if (!p.args.empty()) {
|
||||||
std::stringstream args;
|
std::stringstream args;
|
||||||
args << "[";
|
args << "[";
|
||||||
for (int i = 0; i < p.args.size(); i++) {
|
for (const auto i : c10::irange(p.args.size())) {
|
||||||
args << "{" << p.args[i].name() << ": ";
|
args << "{" << p.args[i].name() << ": ";
|
||||||
if (p.args[i].has_i()) {
|
if (p.args[i].has_i()) {
|
||||||
args << p.args[i].i();
|
args << p.args[i].i();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user