mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Codemod generated with clangr shard mode, 50 files per diff, clangr code(ndim()->dim()): diffusion/FBS/browse/master/fbcode/caffe2/caffe2/fb/codemods/TensorMethodRename.cpp Reviewed By: dzhulgakov Differential Revision: D12935748 fbshipit-source-id: fccec04e28ec049789f772e70d691382cb8927e0
83 lines
2.9 KiB
C++
83 lines
2.9 KiB
C++
#include "caffe2/operators/channel_backprop_stats_op.h"
|
|
#include "caffe2/utils/eigen_utils.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
template <>
|
|
bool ChannelBackpropStatsOp<CPUContext>::RunOnDevice() {
|
|
const auto& X = Input(INPUT);
|
|
const auto& dY = Input(OUTPUT_GRAD);
|
|
CAFFE_ENFORCE(X.dim() >= 3 && X.dim() <= 5);
|
|
const int N = X.dim32(0);
|
|
const int C = X.dim32(1);
|
|
const int H = X.dim32(2);
|
|
const int W = X.dim() > 3 ? X.dim32(3) : 1;
|
|
const int D = X.dim() > 4 ? X.dim32(4) : 1;
|
|
|
|
const int sampleSize = H * W * D;
|
|
|
|
Output(SCALE_GRAD)->Resize(C);
|
|
Output(BIAS_GRAD)->Resize(C);
|
|
auto* dScale = Output(SCALE_GRAD);
|
|
auto* dBias = Output(BIAS_GRAD);
|
|
|
|
ConstEigenArrayMap<float> X_arr(X.data<float>(), sampleSize, N * C);
|
|
ConstEigenArrayMap<float> dY_arr(dY.data<float>(), sampleSize, N * C);
|
|
ConstEigenVectorArrayMap<float> mean_arr(Input(SAVED_MEAN).data<float>(), C);
|
|
ConstEigenVectorArrayMap<float> inv_stddev_arr(
|
|
Input(SAVED_INV_STDDEV).data<float>(), C);
|
|
EigenVectorArrayMap<float> dBias_arr(
|
|
dBias->template mutable_data<float>(), C);
|
|
EigenVectorArrayMap<float> dScale_arr(
|
|
dScale->template mutable_data<float>(), C);
|
|
|
|
dBias_arr.setZero();
|
|
dScale_arr.setZero();
|
|
|
|
for (int nc = 0; nc < N * C; ++nc) {
|
|
int c = nc % C;
|
|
dBias_arr(c) += dY_arr.col(nc).sum();
|
|
dScale_arr(c) +=
|
|
((X_arr.col(nc) - mean_arr(c)) * inv_stddev_arr(c) * dY_arr.col(nc))
|
|
.sum();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(ChannelBackpropStats, ChannelBackpropStatsOp<CPUContext>);
|
|
|
|
OPERATOR_SCHEMA(ChannelBackpropStats)
|
|
.NumInputs(4)
|
|
.NumOutputs(2)
|
|
.SetDoc(R"DOC(
|
|
Given an input tensor in NCHW format, the gradient for the output of SpatialBN
|
|
and the per-channel mean and inverse std var vectors for the input, computes the
|
|
per-channel bias and scale gradient to be used during the backward pass for
|
|
subsequent spatial batch normalization gradient calculation. Typically, the
|
|
results of this op are subsequently reduced over multiple devices to obtain
|
|
statistics over a larger batch size in cases where the batch size for a single
|
|
model copy is too low to yield the full benefit of batch normalization. The
|
|
resulting bias and scale can then be plugged back into SpatialBNGradient to get
|
|
results over the larger batch size )DOC")
|
|
.Input(0, "X", "The input 4-dimensional tensor of shape NCHW")
|
|
.Input(
|
|
1,
|
|
"mean",
|
|
"The mean saved from the forward pass as a 1-dimensional "
|
|
"tensor of size C.")
|
|
.Input(
|
|
2,
|
|
"inv_std",
|
|
"The saved inverse standard deviation as a 1-dimensional tensor "
|
|
"of size C.")
|
|
.Input(
|
|
3,
|
|
"output_grad",
|
|
"Gradient for the output layer of SpatialBN, here used as input "
|
|
"because we are on the backward pass")
|
|
.Output(0, "scale_grad", "Gradient for the scale vector")
|
|
.Output(1, "bias_grad", "Gradient for the bias vector");
|
|
SHOULD_NOT_DO_GRADIENT(ChannelBackpropStats);
|
|
|
|
} // namespace caffe2
|