mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
466 lines
15 KiB
C++
466 lines
15 KiB
C++
#include "caffe2/operators/pad_op.h"
|
|
|
|
#include <algorithm>
|
|
|
|
namespace caffe2 {
|
|
|
|
PadMode StringToPadMode(const string& mode) {
|
|
if (mode == "constant") {
|
|
return PadMode::CONSTANT;
|
|
} else if (mode == "reflect") {
|
|
return PadMode::REFLECT;
|
|
} else if (mode == "edge") {
|
|
return PadMode::EDGE;
|
|
} else {
|
|
CAFFE_THROW("Unknown padding mode: " + mode);
|
|
}
|
|
}
|
|
|
|
using std::min;
|
|
using std::max;
|
|
|
|
template <>
|
|
bool PadImageOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
|
|
auto& X = Input(0);
|
|
auto* Y = Output(0);
|
|
int channels = X.dim32(1);
|
|
int height = X.dim32(2);
|
|
int width = X.dim32(3);
|
|
ConvPoolOpBase::SetOutputSize(X, Y, channels);
|
|
|
|
const float* Xdata = X.data<float>();
|
|
float* Ydata = Y->template mutable_data<float>();
|
|
// The main loop
|
|
int padded_height = Y->dim32(2);
|
|
int padded_width = Y->dim32(3);
|
|
|
|
switch (mode_) {
|
|
case PadMode::CONSTANT:
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
Ydata[ph * padded_width + pw] =
|
|
// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
|
|
(h < 0 || w < 0 || h >= height || w >= width)
|
|
? value_
|
|
: Xdata[h * width + w];
|
|
}
|
|
}
|
|
// Do offset.
|
|
Xdata += height * width;
|
|
Ydata += padded_height * padded_width;
|
|
}
|
|
}
|
|
break;
|
|
case PadMode::REFLECT:
|
|
if (pad_r() >= 0 && pad_t() >= 0 && pad_l() >= 0 && pad_b() >= 0) {
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
// Handle the valid region:
|
|
// i.e. Y[n][c][pad_t:pad_t+h][pad_l:pad_l+w]
|
|
auto* Ystart = Ydata + pad_t() * padded_width + pad_l();
|
|
math::CopyMatrix<CPUContext>(
|
|
sizeof(float),
|
|
height,
|
|
width,
|
|
Xdata,
|
|
width,
|
|
Ystart,
|
|
padded_width,
|
|
&context_);
|
|
|
|
// Fixup areas where we need to reflect
|
|
#define X(ph, pw) \
|
|
int h = ph - pad_t(); \
|
|
int w = pw - pad_l(); \
|
|
h = max(h, -h); \
|
|
h = min(h, 2 * height - h - 2); \
|
|
w = max(w, -w); \
|
|
w = min(w, 2 * width - w - 2); \
|
|
Ydata[ph * padded_width + pw] = Xdata[h * width + w]
|
|
|
|
// Top part
|
|
for (int ph = 0; ph < pad_t(); ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
X(ph, pw);
|
|
}
|
|
}
|
|
|
|
// Bottom part
|
|
for (int ph = padded_height - pad_b(); ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
X(ph, pw);
|
|
}
|
|
}
|
|
|
|
// Interior
|
|
for (int ph = pad_t(); ph < padded_height - pad_b(); ++ph) {
|
|
// Left
|
|
for (int pw = 0; pw < pad_l(); ++pw) {
|
|
X(ph, pw);
|
|
}
|
|
// Right
|
|
for (int pw = padded_width - pad_r(); pw < padded_width; ++pw) {
|
|
X(ph, pw);
|
|
}
|
|
}
|
|
#undef X
|
|
|
|
// Do offset.
|
|
Xdata += height * width;
|
|
Ydata += padded_height * padded_width;
|
|
}
|
|
}
|
|
} else {
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
// max(h, -h) does reflection over 0
|
|
h = max(h, -h);
|
|
// min(h, 2 * height - h - 2) does reflection over height.
|
|
h = min(h, 2 * height - h - 2);
|
|
w = max(w, -w);
|
|
w = min(w, 2 * width - w - 2);
|
|
Ydata[ph * padded_width + pw] = Xdata[h * width + w];
|
|
}
|
|
}
|
|
// Do offset.
|
|
Xdata += height * width;
|
|
Ydata += padded_height * padded_width;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case PadMode::EDGE:
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
// Bounds to the right range.
|
|
int h = min(height - 1, max(ph - pad_t(), 0));
|
|
int w = min(width - 1, max(pw - pad_l(), 0));
|
|
Ydata[ph * padded_width + pw] = Xdata[h * width + w];
|
|
}
|
|
}
|
|
// Do offset.
|
|
Xdata += height * width;
|
|
Ydata += padded_height * padded_width;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
bool PadImageOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
|
|
auto& X = Input(0);
|
|
auto* Y = Output(0);
|
|
int height = X.dim32(1);
|
|
int width = X.dim32(2);
|
|
int channels = X.dim32(3);
|
|
ConvPoolOpBase::SetOutputSize(X, Y, channels);
|
|
const float* Xdata = X.data<float>();
|
|
float* Ydata = Y->template mutable_data<float>();
|
|
|
|
// The main loop
|
|
int padded_height = Y->dim32(1);
|
|
int padded_width = Y->dim32(2);
|
|
|
|
switch (mode_) {
|
|
case PadMode::CONSTANT:
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
const int pad_index = (ph * padded_width + pw) * channels;
|
|
if (h < 0 || w < 0 || h >= height || w >= width) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
Ydata[pad_index + c] = value_;
|
|
}
|
|
} else {
|
|
const int input_index = (h * width + w) * channels;
|
|
for (int c = 0; c < channels; ++c) {
|
|
Ydata[pad_index + c] = Xdata[input_index + c];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
Xdata += X.numel() / X.dim32(0);
|
|
Ydata += Y->numel() / Y->dim32(0);
|
|
}
|
|
break;
|
|
case PadMode::REFLECT:
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
const int pad_index = (ph * padded_width + pw) * channels;
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
// max(h, -h) does reflection over 0
|
|
h = max(h, -h);
|
|
// min(h, 2 * height - h - 2) does reflection over height.
|
|
h = min(h, 2 * height - h - 2);
|
|
w = max(w, -w);
|
|
w = min(w, 2 * width - w - 2);
|
|
const int input_index = (h * width + w) * channels;
|
|
for (int c = 0; c < channels; ++c) {
|
|
Ydata[pad_index + c] = Xdata[input_index + c];
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
Xdata += X.numel() / X.dim32(0);
|
|
Ydata += Y->numel() / Y->dim32(0);
|
|
}
|
|
break;
|
|
case PadMode::EDGE:
|
|
for (int n = 0; n < X.dim32(0); ++n) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
const int pad_index = (ph * padded_width + pw) * channels;
|
|
int h = min(height - 1, max(ph - pad_t(), 0));
|
|
int w = min(width - 1, max(pw - pad_l(), 0));
|
|
const int input_index = (h * width + w) * channels;
|
|
for (int c = 0; c < channels; ++c) {
|
|
Ydata[pad_index + c] = Xdata[input_index + c];
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
Xdata += X.numel() / X.dim32(0);
|
|
Ydata += Y->numel() / Y->dim32(0);
|
|
}
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
bool PadImageGradientOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
|
|
auto& dY = Input(0);
|
|
|
|
auto* dX = Output(
|
|
0,
|
|
{dY.dim32(0),
|
|
dY.dim32(1),
|
|
dY.dim32(2) - pad_t() - pad_b(),
|
|
dY.dim32(3) - pad_l() - pad_r()},
|
|
at::dtype<float>());
|
|
int padded_height = dY.dim32(2);
|
|
int padded_width = dY.dim32(3);
|
|
int channels = dX->dim32(1);
|
|
int height = dX->dim32(2);
|
|
int width = dX->dim32(3);
|
|
|
|
const float* dYdata = dY.data<float>();
|
|
float* dXdata = dX->template mutable_data<float>();
|
|
math::Set<float, CPUContext>(dX->numel(), 0, dXdata, &context_);
|
|
// The main loop
|
|
switch (mode_) {
|
|
case PadMode::CONSTANT:
|
|
for (int n = 0; n < dY.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
if (!(h < 0 || w < 0 || h >= height || w >= width)) {
|
|
dXdata[h * width + w] += dYdata[ph * padded_width + pw];
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
dXdata += height * width;
|
|
dYdata += padded_height * padded_width;
|
|
}
|
|
}
|
|
break;
|
|
case PadMode::REFLECT:
|
|
for (int n = 0; n < dY.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
// max(h, -h) does reflection over 0
|
|
h = max(h, -h);
|
|
// min(h, 2 * height - h - 2) does reflection over height.
|
|
h = min(h, 2 * height - h - 2);
|
|
w = max(w, -w);
|
|
w = min(w, 2 * width - w - 2);
|
|
dXdata[h * width + w] += dYdata[ph * padded_width + pw];
|
|
}
|
|
}
|
|
// Do offset.
|
|
dXdata += height * width;
|
|
dYdata += padded_height * padded_width;
|
|
}
|
|
}
|
|
break;
|
|
case PadMode::EDGE:
|
|
for (int n = 0; n < dY.dim32(0); ++n) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = min(height - 1, max(ph - pad_t(), 0));
|
|
int w = min(width - 1, max(pw - pad_l(), 0));
|
|
dXdata[h * width + w] += dYdata[ph * padded_width + pw];
|
|
}
|
|
}
|
|
// Do offset.
|
|
dXdata += height * width;
|
|
dYdata += padded_height * padded_width;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
bool PadImageGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
|
|
auto& dY = Input(0);
|
|
|
|
auto* dX = Output(
|
|
0,
|
|
{dY.dim32(0),
|
|
dY.dim32(1) - pad_t() - pad_b(),
|
|
dY.dim32(2) - pad_l() - pad_r(),
|
|
dY.dim32(3)},
|
|
at::dtype<float>());
|
|
int padded_height = dY.dim32(1);
|
|
int padded_width = dY.dim32(2);
|
|
int channels = dY.dim32(3);
|
|
int height = dX->dim32(1);
|
|
int width = dX->dim32(2);
|
|
|
|
const float* dYdata = dY.data<float>();
|
|
float* dXdata = dX->template mutable_data<float>();
|
|
math::Set<float, CPUContext>(dX->numel(), 0, dXdata, &context_);
|
|
|
|
switch (mode_) {
|
|
case PadMode::CONSTANT:
|
|
for (int n = 0; n < dY.dim32(0); ++n) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
const int pad_index = (ph * padded_width + pw) * channels;
|
|
if (!(h < 0 || w < 0 || h >= height || w >= width)) {
|
|
const int input_index = (h * width + w) * channels;
|
|
for (int c = 0; c < channels; ++c) {
|
|
dXdata[input_index + c] += dYdata[pad_index + c];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
dXdata += dX->numel() / dX->dim32(0);
|
|
dYdata += dY.numel() / dY.dim32(0);
|
|
}
|
|
break;
|
|
case PadMode::REFLECT:
|
|
for (int n = 0; n < dY.dim32(0); ++n) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
const int pad_index = (ph * padded_width + pw) * channels;
|
|
int h = ph - pad_t();
|
|
int w = pw - pad_l();
|
|
// max(h, -h) does reflection over 0
|
|
h = max(h, -h);
|
|
// min(h, 2 * height - h - 2) does reflection over height.
|
|
h = min(h, 2 * height - h - 2);
|
|
w = max(w, -w);
|
|
w = min(w, 2 * width - w - 2);
|
|
const int input_index = (h * width + w) * channels;
|
|
for (int c = 0; c < channels; ++c) {
|
|
dXdata[input_index + c] += dYdata[pad_index + c];
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
dXdata += dX->numel() / dX->dim32(0);
|
|
dYdata += dY.numel() / dY.dim32(0);
|
|
}
|
|
break;
|
|
case PadMode::EDGE:
|
|
for (int n = 0; n < dY.dim32(0); ++n) {
|
|
for (int ph = 0; ph < padded_height; ++ph) {
|
|
for (int pw = 0; pw < padded_width; ++pw) {
|
|
const int pad_index = (ph * padded_width + pw) * channels;
|
|
// Bounds to the right range.
|
|
int h = min(height - 1, max(ph - pad_t(), 0));
|
|
int w = min(width - 1, max(pw - pad_l(), 0));
|
|
const int input_index = (h * width + w) * channels;
|
|
for (int c = 0; c < channels; ++c) {
|
|
dXdata[input_index + c] += dYdata[pad_index + c];
|
|
}
|
|
}
|
|
}
|
|
// Do offset.
|
|
dXdata += dX->numel() / dX->dim32(0);
|
|
dYdata += dY.numel() / dY.dim32(0);
|
|
}
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
std::vector<TensorShape> PadImageOp<float, CPUContext>::PadTensorInference(
|
|
const OperatorDef& def,
|
|
const vector<TensorShape>& in) {
|
|
return ConvPoolOpBase::TensorInferenceForPool(def, in);
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(PadImage, PadImageOp<float, CPUContext>);
|
|
REGISTER_CPU_GRADIENT_OPERATOR(
|
|
PadImageGradient,
|
|
PadImageGradientOp<float, CPUContext>);
|
|
|
|
OPERATOR_SCHEMA(PadImage)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.TensorInferenceFunction(PadImageOp<float, CPUContext>::PadTensorInference)
|
|
.SetDoc(R"DOC(
|
|
PadImage pads values around the boundary of an image according to the pad
|
|
values and stride sizes defined by the ConvPoolOpBase operator.
|
|
)DOC")
|
|
.Input(
|
|
0,
|
|
"X",
|
|
"Input data tensor from the previous operator; dimensions "
|
|
"depend on whether the NCHW or NHWC operators are being used. For example, "
|
|
"in the former, the input has size (N x C x H x W), where N is the batch "
|
|
"size, C is the number of channels, and H and W are the height and the width "
|
|
"of the data. The corresponding permutation of dimensions is used in the "
|
|
"latter case. ")
|
|
.Output(
|
|
0,
|
|
"Y",
|
|
"Output data tensor from padding the H and W dimensions on "
|
|
"the tensor. Dimensions will vary based on various pad and stride "
|
|
"sizes.");
|
|
|
|
GRADIENT_OPERATOR_SCHEMA(PadImageGradient).NumInputs(1).NumOutputs(1);
|
|
|
|
class GetPadImageGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
"PadImageGradient", "", vector<string>{GO(0)}, vector<string>{GI(0)});
|
|
}
|
|
};
|
|
REGISTER_GRADIENT(PadImage, GetPadImageGradient);
|
|
|
|
} // namespace caffe2
|