mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/15137 Codemod generated with clangr shard mode, 25 files per diff, motivation: https://github.com/pytorch/pytorch/pull/12407 Reviewed By: ezyang Differential Revision: D13419736 fbshipit-source-id: f4ad7b9582c2f809258169b7fef9adbca7063d99
370 lines
9.3 KiB
C++
370 lines
9.3 KiB
C++
#include "caffe2/operators/reduction_ops.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
|
|
REGISTER_CPU_OPERATOR(SumElementsInt, SumElementsIntOp<int, CPUContext>);
|
|
REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>);
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
SumElementsGradient,
|
|
SumElementsGradientOp<float, CPUContext>);
|
|
|
|
REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>);
|
|
REGISTER_CPU_OPERATOR(
|
|
RowwiseMaxGradient,
|
|
MaxReductionGradientOp<float, CPUContext, true>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ColwiseMaxGradient,
|
|
MaxReductionGradientOp<float, CPUContext, false>);
|
|
REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>);
|
|
|
|
OPERATOR_SCHEMA(SumElements)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.ScalarType(TensorProto::FLOAT)
|
|
.SetDoc(R"DOC(
|
|
Sums the elements of the input tensor. Tensor type must be float32.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
sum_op = core.CreateOperator(
|
|
"SumElements",
|
|
["X"],
|
|
["Y"]
|
|
)
|
|
|
|
avg_op = core.CreateOperator(
|
|
"SumElements",
|
|
["X"],
|
|
["Y"],
|
|
average=True
|
|
)
|
|
|
|
workspace.FeedBlob("X", np.random.randint(10, size=(3,3)).astype(np.float32))
|
|
print("X:\n", workspace.FetchBlob("X"))
|
|
workspace.RunOperatorOnce(sum_op)
|
|
print("Y (sum_op):", workspace.FetchBlob("Y"))
|
|
workspace.RunOperatorOnce(avg_op)
|
|
print("Y (avg_op):", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[7. 2. 5.]
|
|
[9. 4. 2.]
|
|
[1. 2. 5.]]
|
|
Y (sum_op): 37.0
|
|
Y (avg_op): 4.111111
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
)DOC")
|
|
.Arg("average", "(*bool*): set to True to compute the average of the elements rather than the sum")
|
|
.Input(0, "X", "(*Tensor`<float>`*): blob pointing to an instance of a counter")
|
|
.Output(0, "sum", "(*Tensor`<float>`*): Scalar tensor containing the sum (or average)");
|
|
|
|
OPERATOR_SCHEMA(SumElementsInt)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.ScalarType(TensorProto::INT32)
|
|
.SetDoc("Sums the integer elements of the input tensor.")
|
|
.Input(0, "X", "Tensor to sum up")
|
|
.Output(0, "sum", "Scalar sum");
|
|
SHOULD_NOT_DO_GRADIENT(SumElementsInt);
|
|
|
|
OPERATOR_SCHEMA(SumSqrElements)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.ScalarType(TensorProto::FLOAT)
|
|
.SetDoc("Sums the squares elements of the input tensor.")
|
|
.Arg("average", "whether to average or not")
|
|
.Input(0, "X", "Tensor to sum up")
|
|
.Output(0, "sum", "Scalar sum of squares");
|
|
|
|
OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1);
|
|
|
|
class GetSumElementsGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
"SumElementsGradient",
|
|
"",
|
|
vector<string>{I(0), GO(0)},
|
|
vector<string>{GI(0)});
|
|
}
|
|
};
|
|
REGISTER_GRADIENT(SumElements, GetSumElementsGradient);
|
|
|
|
OPERATOR_SCHEMA(RowwiseMax)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Compute row-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxM$, with one row for each element of the batch, and the same number of columns as the number of rows of the input tensor.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
op = core.CreateOperator(
|
|
"RowwiseMax",
|
|
["X"],
|
|
["Y"]
|
|
)
|
|
|
|
// Create X, simulating a batch of 2, 4x4 matricies
|
|
X = np.random.randint(0,high=20,size=(2,4,4))
|
|
print("X:\n",X)
|
|
|
|
// Feed X into workspace
|
|
workspace.FeedBlob("X", X.astype(np.float32))
|
|
|
|
// Run op
|
|
workspace.RunOperatorOnce(op)
|
|
|
|
// Collect Output
|
|
print("Y:\n", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[[ 5 12 10 1]
|
|
[ 4 16 2 15]
|
|
[ 5 11 12 15]
|
|
[15 4 17 19]]
|
|
|
|
[[16 5 5 13]
|
|
[17 2 1 17]
|
|
[18 3 19 5]
|
|
[14 16 10 16]]]
|
|
Y:
|
|
[[12. 16. 15. 19.]
|
|
[16. 17. 19. 16.]]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
)DOC")
|
|
.Input(
|
|
0,
|
|
"X",
|
|
"A tensor of dimensions $B x M x N$ to compute rowwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.")
|
|
.Output(
|
|
0,
|
|
"Y",
|
|
"The output tensor of shape $B x M$, where each row represents the row-wise maximums for that element of the input batch.");
|
|
|
|
OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1);
|
|
class GetRowwiseMaxGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
"RowwiseMaxGradient",
|
|
"",
|
|
vector<string>{I(0), O(0), GO(0)},
|
|
vector<string>{GI(0)});
|
|
}
|
|
};
|
|
REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient);
|
|
|
|
OPERATOR_SCHEMA(ColwiseMaxGradient);
|
|
|
|
OPERATOR_SCHEMA(ColwiseMax)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Compute column-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxN$, with one row for each element of the batch, and the same number of columns as the input tensor.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
workspace.ResetWorkspace()
|
|
|
|
op = core.CreateOperator(
|
|
"ColwiseMax",
|
|
["X"],
|
|
["Y"]
|
|
)
|
|
|
|
// Create X, simulating a batch of 2, 4x4 matricies
|
|
X = np.random.randint(0,high=20,size=(2,4,4))
|
|
print("X:\n",X)
|
|
|
|
// Feed X into workspace
|
|
workspace.FeedBlob("X", X.astype(np.float32))
|
|
|
|
// Run op
|
|
workspace.RunOperatorOnce(op)
|
|
|
|
// Collect Output
|
|
print("Y:\n", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[[17 15 2 6]
|
|
[ 8 12 6 0]
|
|
[ 6 9 7 3]
|
|
[ 4 13 16 13]]
|
|
|
|
[[ 0 3 4 12]
|
|
[18 1 17 12]
|
|
[ 7 17 13 14]
|
|
[12 17 2 1]]]
|
|
Y:
|
|
[[17. 15. 16. 13.]
|
|
[18. 17. 17. 14.]]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
)DOC")
|
|
.Input(
|
|
0,
|
|
"X",
|
|
"A tensor of dimensions $B x M x N$ to compute columnwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.")
|
|
.Output(
|
|
0,
|
|
"Y",
|
|
"The output tensor of shape $B x N$, where each row represents the column-wise maximums for that element of the input batch.");
|
|
|
|
OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1);
|
|
class GetColwiseMaxGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
"ColwiseMaxGradient",
|
|
"",
|
|
vector<string>{I(0), O(0), GO(0)},
|
|
vector<string>{GI(0)});
|
|
}
|
|
};
|
|
REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient);
|
|
|
|
template <typename T, class Context>
|
|
bool SumElementsGradientOp<T, Context>::RunOnDevice()
|
|
// TODO: T21635077 fix float-divide-by-zero undefined behavior
|
|
#if defined(__has_feature)
|
|
#if __has_feature(__address_sanitizer__)
|
|
__attribute__((__no_sanitize__("float-divide-by-zero")))
|
|
#endif
|
|
#endif
|
|
{
|
|
auto& X = Input(0);
|
|
Tensor sum_grad(Input(1), CPU);
|
|
|
|
auto* dX = Output(0, X.sizes(), at::dtype<T>());
|
|
DCHECK_EQ(sum_grad.numel(), 1);
|
|
math::Set<T, Context>(
|
|
dX->numel(),
|
|
static_cast<T>(
|
|
sum_grad.template data<T>()[0] * (average_ ? 1.0 / X.numel() : 1)),
|
|
dX->template mutable_data<T>(),
|
|
&context_);
|
|
return true;
|
|
}
|
|
|
|
template <typename T, class Context, bool ROWWISE>
|
|
bool MaxReductionGradientOp<T, Context, ROWWISE>::RunOnDevice() {
|
|
auto& X = Input(0);
|
|
auto& Y = Input(1);
|
|
auto& dY = Input(2);
|
|
|
|
auto* dX = Output(0, X.sizes(), at::dtype<T>());
|
|
|
|
CAFFE_ENFORCE_EQ(X.dim(), 3);
|
|
|
|
const int batch_size = X.dim32(0);
|
|
const int M = X.dim32(1);
|
|
const int N = X.dim32(2);
|
|
|
|
const T* Xdata = X.template data<T>();
|
|
const T* Ydata = Y.template data<T>();
|
|
const T* dYdata = dY.template data<T>();
|
|
T* dXdata = dX->template mutable_data<T>();
|
|
|
|
const int input_size = M * N;
|
|
for (int i = 0; i < batch_size; ++i) {
|
|
const T* Xdata_i = Xdata + i * input_size;
|
|
T* dXdata_i = dXdata + i * input_size;
|
|
if (ROWWISE) {
|
|
const T* Ydata_i = Ydata + i * M;
|
|
const T* dYdata_i = dYdata + i * M;
|
|
for (int m = 0; m < M; ++m) {
|
|
const T* Xdata_m = Xdata_i + m * N;
|
|
T* dXdata_m = dXdata_i + m * N;
|
|
for (int n = 0; n < N; ++n) {
|
|
if (Xdata_m[n] == Ydata_i[m]) {
|
|
dXdata_m[n] = dYdata_i[m];
|
|
} else {
|
|
dXdata_m[n] = static_cast<T>(0);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
const T* Ydata_i = Ydata + i * N;
|
|
const T* dYdata_i = dYdata + i * N;
|
|
for (int n = 0; n < N; ++n) {
|
|
for (int m = 0; m < M; ++m) {
|
|
const T* Xdata_m = Xdata_i + m * N;
|
|
T* dXdata_m = dXdata_i + m * N;
|
|
if (Xdata_m[n] == Ydata_i[n]) {
|
|
dXdata_m[n] = dYdata_i[n];
|
|
} else {
|
|
dXdata_m[n] = static_cast<T>(0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace caffe2
|