mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/15084 Codemod generated with clangr shard mode, 25 files per diff, motivation: https://github.com/pytorch/pytorch/pull/12407 Reviewed By: ezyang Differential Revision: D13419711 fbshipit-source-id: dd2b740c3f13d8087085bafc5571aaf908d1af42
154 lines
4.9 KiB
C++
154 lines
4.9 KiB
C++
#include "integral_image_op.h"
|
|
#include "caffe2/utils/eigen_utils.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
namespace {
|
|
template <typename T>
|
|
using EigenMatrixMapRowMajor = Eigen::Map<
|
|
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
|
|
|
|
template <typename T>
|
|
using ConstEigenMatrixMapRowMajor = Eigen::Map<
|
|
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
|
|
} // namespace
|
|
|
|
template <>
|
|
bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
|
|
const auto& X = Input(0);
|
|
|
|
CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");
|
|
|
|
vector<int64_t> out_shape(X.sizes().vec());
|
|
out_shape[2] += 1; // H + 1 output size
|
|
out_shape[3] += 1; // W + 1 output size
|
|
auto* Y = Output(0, out_shape, at::dtype<float>());
|
|
const int ind = X.dim32(0);
|
|
const int chans = X.dim32(1);
|
|
const int rows_in = X.dim32(2);
|
|
const int cols_in = X.dim32(3);
|
|
const int rows_out = Y->dim32(2);
|
|
const int cols_out = Y->dim32(3);
|
|
|
|
const float* input_data = X.template data<float>();
|
|
float* output_data = Y->template mutable_data<float>();
|
|
|
|
const int row_out_pass_size = ind * chans * rows_out;
|
|
const int row_in_pass_size = ind * chans * rows_in;
|
|
EigenMatrixMapRowMajor<float> Y_arr(output_data, row_out_pass_size, cols_out);
|
|
ConstEigenMatrixMapRowMajor<float> X_arr(
|
|
input_data, row_in_pass_size, cols_in);
|
|
|
|
// Row Pass
|
|
for (int i = 0; i < row_out_pass_size; i++) {
|
|
int row = i % rows_out;
|
|
int diff = i / rows_out + 1;
|
|
Y_arr(i, 0) = 0.;
|
|
if (row == 0) {
|
|
for (int j = 1; j < cols_out; ++j) {
|
|
Y_arr(i, j) = 0.;
|
|
}
|
|
} else {
|
|
for (int j = 1; j < cols_out; ++j) {
|
|
Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Col Pass
|
|
const int col_out_pass_size = X.dim32(0) * chans * cols_out;
|
|
for (int i = 0; i < col_out_pass_size; i++) {
|
|
int col = i % cols_out;
|
|
int row = i / cols_out;
|
|
for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
|
|
Y_arr(j, col) += Y_arr(j - 1, col);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
bool IntegralImageGradientOp<float, CPUContext>::RunOnDevice() {
|
|
auto& X = Input(0); // Original input to "forward" op
|
|
auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
|
|
// (aka "gradOutput")
|
|
auto* dX = Output(
|
|
0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
|
|
// "forward" op (aka "gradInput")
|
|
|
|
const int ind = X.dim32(0);
|
|
const int chans = X.dim32(1);
|
|
const int rows_in = dY.dim32(2);
|
|
const int cols_in = dY.dim32(3);
|
|
const int rows_out = dX->dim32(2);
|
|
const int cols_out = dX->dim32(3);
|
|
|
|
const float* input_data = dY.template data<float>();
|
|
float* output_data = dX->template mutable_data<float>();
|
|
|
|
const int row_out_pass_size = ind * chans * rows_out;
|
|
const int row_in_pass_size = ind * chans * rows_in;
|
|
EigenMatrixMapRowMajor<float> dX_arr(
|
|
output_data, row_out_pass_size, cols_out);
|
|
ConstEigenMatrixMapRowMajor<float> dY_arr(
|
|
input_data, row_in_pass_size, cols_in);
|
|
Eigen::MatrixXf tmp(row_in_pass_size, cols_out);
|
|
|
|
// Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
|
|
for (int i = 0; i < row_in_pass_size; i++) {
|
|
tmp(i, 0) = dY_arr(i, 0);
|
|
for (int j = 1; j < cols_out; ++j) {
|
|
tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
|
|
}
|
|
}
|
|
|
|
// Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
|
|
const int col_out_pass_size = X.dim32(0) * chans * cols_out;
|
|
for (int i = 0; i < col_out_pass_size; i++) {
|
|
int col = i % cols_out;
|
|
int row_out_start = (i / cols_out) * rows_out;
|
|
int row_in_start = (i / cols_out) * rows_in;
|
|
dX_arr(row_out_start, col) = tmp(row_in_start, col);
|
|
for (int j = 1; j < rows_out; ++j) {
|
|
dX_arr(row_out_start + j, col) =
|
|
dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(IntegralImage, IntegralImageOp<float, CPUContext>);
|
|
REGISTER_CPU_OPERATOR(
|
|
IntegralImageGradient,
|
|
IntegralImageGradientOp<float, CPUContext>);
|
|
|
|
// Input: X; Output: Y
|
|
OPERATOR_SCHEMA(IntegralImage)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes an integral image, which contains the sum of pixel values within
|
|
an image vertically and horizontally. This integral image can then be used
|
|
with other detection and tracking techniques.
|
|
)DOC")
|
|
.Input(0, "X", "Images tensor of the form (N, C, H, W)")
|
|
.Output(0, "Y", "Integrated image of the form (N, C, H+1, W+1)");
|
|
|
|
// Input: X, dY (aka "gradOutput"); Output: dX (aka "gradInput")
|
|
OPERATOR_SCHEMA(IntegralImageGradient).NumInputs(2).NumOutputs(1);
|
|
|
|
class GetIntegralImageGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
"IntegralImageGradient",
|
|
"",
|
|
vector<string>{I(0), GO(0)},
|
|
vector<string>{GI(0)});
|
|
}
|
|
};
|
|
|
|
REGISTER_GRADIENT(IntegralImage, GetIntegralImageGradient);
|
|
|
|
} // namespace caffe2
|