Add count_include_pad to average_pool_gradient_op (#15997)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15997

Add count_include_pad to average_pool_gradient_op

Reviewed By: houseroad

Differential Revision: D13648339

fbshipit-source-id: 205cb2acb32dc24a85256b628298b1a11f0ffa2c
This commit is contained in:
Xiaomeng Yang 2019-01-15 16:44:33 -08:00 committed by Facebook Github Bot
parent b2eb98f6c3
commit 13f38ab79d
4 changed files with 1444 additions and 944 deletions

View File

@ -617,6 +617,48 @@ void RunMaxPoolGradient3D(
} // namespace
template <>
template <>
bool AveragePoolFunctor<CPUContext>::
GlobalPoolingBackward<float, StorageOrder::NCHW>(
const int N,
const int C,
const int HxW,
const float* dY,
const float* /* X */,
const float* /* Y */,
float* dX,
CPUContext* /* context */) const {
const int NxC = N * C;
EigenArrayMap<float> dX_arr(dX, HxW, NxC);
const float scale = 1.0f / static_cast<float>(HxW);
for (int i = 0; i < NxC; ++i) {
dX_arr.col(i).setConstant(dY[i] * scale);
}
return true;
}
template <>
template <>
bool AveragePoolFunctor<CPUContext>::
GlobalPoolingBackward<float, StorageOrder::NHWC>(
const int N,
const int C,
const int HxW,
const float* dY,
const float* /* X */,
const float* /* Y */,
float* dX,
CPUContext* /* context */) const {
ConstEigenArrayMap<float> dY_arr(dY, C, N);
const float scale = 1.0f / static_cast<float>(HxW);
for (int i = 0; i < N; ++i) {
EigenArrayMap<float>(dX + i * HxW * C, C, HxW).colwise() =
dY_arr.col(i) * scale;
}
return true;
}
template <>
template <typename T, StorageOrder kOrder>
bool AveragePoolFunctor<CPUContext>::Backward(
@ -699,6 +741,52 @@ bool AveragePoolFunctor<CPUContext>::Backward(
}
}
template <>
template <>
bool MaxPoolFunctor<CPUContext>::
GlobalPoolingBackward<float, StorageOrder::NCHW>(
const int N,
const int C,
const int HxW,
const float* dY,
const float* X,
const float* Y,
float* dX,
CPUContext* /* context */) const {
const int NxC = N * C;
ConstEigenArrayMap<float> X_arr(X, HxW, NxC);
EigenArrayMap<float> dX_arr(dX, HxW, NxC);
for (int i = 0; i < NxC; ++i) {
dX_arr.col(i) = (X_arr.col(i) == Y[i]).template cast<float>() * dY[i];
}
return true;
}
template <>
template <>
bool MaxPoolFunctor<CPUContext>::
GlobalPoolingBackward<float, StorageOrder::NHWC>(
const int N,
const int C,
const int HxW,
const float* dY,
const float* X,
const float* Y,
float* dX,
CPUContext* /* context */) const {
ConstEigenArrayMap<float> Y_arr(Y, C, N);
ConstEigenArrayMap<float> dY_arr(dY, C, N);
for (int i = 0; i < N; ++i) {
ConstEigenArrayMap<float> X_arr(X + i * HxW * C, C, HxW);
EigenArrayMap<float> dX_arr(dX + i * HxW * C, C, HxW);
for (int j = 0; j < HxW; ++j) {
dX_arr.col(j) =
(X_arr.col(j) == Y_arr.col(i)).template cast<float>() * dY_arr.col(i);
}
}
return true;
}
template <>
template <typename T, StorageOrder kOrder>
bool MaxPoolFunctor<CPUContext>::Backward(

File diff suppressed because it is too large Load Diff

View File

@ -115,7 +115,16 @@ class PoolGradientOp final : public ConvPoolOpBase<Context> {
const int C = X.dim32(1);
const std::vector<int> X_HW_dims = GetDims(X);
const std::vector<int> Y_HW_dims = GetDims(Y);
ConvPoolOpBase<CPUContext>::ComputePads(X_HW_dims);
ConvPoolOpBase<Context>::ComputePads(X_HW_dims);
const T* dY_data = dY.template data<T>();
const T* X_data = X.template data<T>();
const T* Y_data = Y.template data<T>();
T* dX_data = dX->template mutable_data<T>();
if (global_pooling_) {
const int HxW = X.numel() / (N * C);
return functor_.template GlobalPoolingBackward<T, StorageOrder::NCHW>(
N, C, HxW, dY_data, X_data, Y_data, dX_data, &context_);
}
return functor_.template Backward<T, StorageOrder::NCHW>(
N,
C,
@ -125,10 +134,10 @@ class PoolGradientOp final : public ConvPoolOpBase<Context> {
dilation_,
stride_,
pads_,
dY.template data<T>(),
X.template data<T>(),
Y.template data<T>(),
dX->template mutable_data<T>(),
dY_data,
X_data,
Y_data,
dX_data,
&context_);
}
@ -142,7 +151,16 @@ class PoolGradientOp final : public ConvPoolOpBase<Context> {
const int C = X.dim32(ndim - 1);
const std::vector<int> X_HW_dims = GetDims(X);
const std::vector<int> Y_HW_dims = GetDims(Y);
ConvPoolOpBase<CPUContext>::ComputePads(X_HW_dims);
ConvPoolOpBase<Context>::ComputePads(X_HW_dims);
const T* dY_data = dY.template data<T>();
const T* X_data = X.template data<T>();
const T* Y_data = Y.template data<T>();
T* dX_data = dX->template mutable_data<T>();
if (global_pooling_) {
const int HxW = X.numel() / (N * C);
return functor_.template GlobalPoolingBackward<T, StorageOrder::NHWC>(
N, C, HxW, dY_data, X_data, Y_data, dX_data, &context_);
}
return functor_.template Backward<T, StorageOrder::NHWC>(
N,
C,
@ -152,10 +170,10 @@ class PoolGradientOp final : public ConvPoolOpBase<Context> {
dilation_,
stride_,
pads_,
dY.template data<T>(),
X.template data<T>(),
Y.template data<T>(),
dX->template mutable_data<T>(),
dY_data,
X_data,
Y_data,
dX_data,
&context_);
}
@ -192,6 +210,17 @@ struct AveragePoolFunctor {
T* Y,
Context* context) const;
template <typename T, StorageOrder kOrder>
bool GlobalPoolingBackward(
int N,
int C,
int HxW,
const T* dY,
const T* X,
const T* Y,
T* dX,
Context* context) const;
template <typename T, StorageOrder kOrder>
bool Backward(
int N,
@ -238,6 +267,17 @@ struct MaxPoolFunctor {
T* Y,
Context* context) const;
template <typename T, StorageOrder kOrder>
bool GlobalPoolingBackward(
int N,
int C,
int HxW,
const T* dY,
const T* X,
const T* Y,
T* dX,
Context* context) const;
template <typename T, StorageOrder kOrder>
bool Backward(
int N,

View File

@ -335,6 +335,69 @@ class TestPooling(hu.HypothesisTestCase):
if 'MaxPool' not in op_type:
self.assertGradientChecks(gc, op, [X], 0, [0])
@given(op_type=st.sampled_from(["MaxPool", "MaxPoolND"]),
dim=st.integers(1, 3),
N=st.integers(1, 3),
C=st.integers(1, 3),
D=st.integers(3, 5),
H=st.integers(3, 5),
W=st.integers(3, 5),
kernel=st.integers(1, 3),
stride=st.integers(1, 3),
pad=st.integers(0, 2),
order=st.sampled_from(["NCHW", "NHWC"]),
engine=st.sampled_from(["", "CUDNN"]),
**hu.gcs)
def test_max_pool_grad(
self, op_type, dim, N, C, D, H, W, kernel, stride, pad, order,
engine, gc, dc):
assume(pad < kernel)
assume(dim > 1 or engine == "")
if hiputl.run_in_hip(gc, dc):
if dim != 2:
assume(engine != "CUDNN")
elif engine == "CUDNN":
assume(order == "NCHW")
if op_type.endswith("ND"):
op_type = op_type.replace("N", str(dim))
op = core.CreateOperator(
op_type,
["X"],
["Y"],
kernels=[kernel] * dim,
strides=[stride] * dim,
pads=[pad] * dim * 2,
order=order,
engine=engine,
)
if dim == 1:
size = W
dims = [N, C, W]
axes = [0, 2, 1]
elif dim == 2:
size = H * W
dims = [N, C, H, W]
axes = [0, 2, 3, 1]
else:
size = D * H * W
dims = [N, C, D, H, W]
axes = [0, 2, 3, 4, 1]
X = np.zeros((N * C, size)).astype(np.float32)
for i in range(N * C):
X[i, :] = np.arange(size, dtype=np.float32) / size
np.random.shuffle(X[i, :])
X = X.reshape(dims)
if order == "NHWC":
X = np.transpose(X, axes)
self.assertDeviceChecks(dc, op, [X], [0])
self.assertGradientChecks(
gc, op, [X], 0, [0], threshold=5e-2, stepsize=1e-3)
if __name__ == "__main__":
import unittest