mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: The second input (`lengths`) is not supported. Pull Request resolved: https://github.com/pytorch/pytorch/pull/16727 Differential Revision: D14054105 Pulled By: houseroad fbshipit-source-id: 36b8d00460f9623696439e1bd2a6bc60b7bb263c
538 lines
14 KiB
C++
538 lines
14 KiB
C++
#include "caffe2/operators/reduce_ops.h"
|
|
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <vector>
|
|
|
|
#include "caffe2/utils/math.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
namespace {
|
|
|
|
template <typename T>
|
|
void ComputeReduceMinMaxGradient(
|
|
const std::vector<int>& dY_dims,
|
|
const std::vector<int>& dX_dims,
|
|
const T* dY_data,
|
|
const T* X_data,
|
|
const T* Y_data,
|
|
T* dX_data) {
|
|
const int dX_size = std::accumulate(
|
|
dX_dims.cbegin(), dX_dims.cend(), 1, std::multiplies<int>());
|
|
const int ndim = dX_dims.size();
|
|
std::vector<int> index(ndim, 0);
|
|
for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
|
|
const int dY_index =
|
|
math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
|
|
dX_data[dX_index] =
|
|
Y_data[dY_index] == X_data[dX_index] ? dY_data[dY_index] : T(0);
|
|
math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
template <>
|
|
template <typename T>
|
|
bool MinReducer<CPUContext>::Backward(
|
|
const std::vector<int>& dY_dims,
|
|
const std::vector<int>& dX_dims,
|
|
const T* dY_data,
|
|
const T* X_data,
|
|
const T* Y_data,
|
|
T* dX_data,
|
|
CPUContext* /* context */) const {
|
|
ComputeReduceMinMaxGradient(
|
|
dY_dims, dX_dims, dY_data, X_data, Y_data, dX_data);
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
template <typename T>
|
|
bool MaxReducer<CPUContext>::Backward(
|
|
const std::vector<int>& dY_dims,
|
|
const std::vector<int>& dX_dims,
|
|
const T* dY_data,
|
|
const T* X_data,
|
|
const T* Y_data,
|
|
T* dX_data,
|
|
CPUContext* /* context */) const {
|
|
ComputeReduceMinMaxGradient(
|
|
dY_dims, dX_dims, dY_data, X_data, Y_data, dX_data);
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceMin,
|
|
ReduceOp<
|
|
TensorTypes<std::int32_t, std::int64_t, float, double>,
|
|
CPUContext,
|
|
MinReducer<CPUContext>>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceMinGradient,
|
|
ReduceGradientOp<
|
|
TensorTypes<std::int32_t, std::int64_t, float, double>,
|
|
CPUContext,
|
|
MinReducer<CPUContext>>);
|
|
|
|
OPERATOR_SCHEMA(ReduceMin)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes the min of the input tensor's element along the provided axes.
|
|
The resulted tensor has the same rank as the input if keepdims equal True.
|
|
If keepdims equal false, then the resulted tensor have the reduced dimension
|
|
pruned.
|
|
)DOC")
|
|
.Arg("axes", "A list of integers, along which to reduce.")
|
|
.Arg(
|
|
"keepdims",
|
|
"Keep the reduced dimension(s) or not, default True keeps the reduced "
|
|
"dimension(s).")
|
|
.Input(0, "data", "An input tensor.")
|
|
.Output(0, "reduced", "Reduced output tensor.");
|
|
|
|
OPERATOR_SCHEMA(ReduceMinGradient).NumInputs(3).NumOutputs(1);
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceMax,
|
|
ReduceOp<
|
|
TensorTypes<std::int32_t, std::int64_t, float, double>,
|
|
CPUContext,
|
|
MaxReducer<CPUContext>>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceMaxGradient,
|
|
ReduceGradientOp<
|
|
TensorTypes<std::int32_t, std::int64_t, float, double>,
|
|
CPUContext,
|
|
MaxReducer<CPUContext>>);
|
|
|
|
OPERATOR_SCHEMA(ReduceMax)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes the max of the input tensor's element along the provided axes.
|
|
The resulted tensor has the same rank as the input if keepdims equal True.
|
|
If keepdims equal false, then the resulted tensor have the reduced dimension
|
|
pruned.
|
|
)DOC")
|
|
.Arg("axes", "A list of integers, along which to reduce.")
|
|
.Arg(
|
|
"keepdims",
|
|
"Keep the reduced dimension(s) or not, default True keeps the reduced "
|
|
"dimension(s).")
|
|
.Input(0, "data", "An input tensor.")
|
|
.Output(0, "reduced", "Reduced output tensor.");
|
|
|
|
OPERATOR_SCHEMA(ReduceMaxGradient).NumInputs(3).NumOutputs(1);
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceSum,
|
|
ReduceOp<
|
|
TensorTypes<std::int32_t, std::int64_t, float, double>,
|
|
CPUContext,
|
|
SumReducer<CPUContext>>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceSumGradient,
|
|
ReduceGradientOp<
|
|
TensorTypes<std::int32_t, std::int64_t, float, double>,
|
|
CPUContext,
|
|
SumReducer<CPUContext>>);
|
|
|
|
OPERATOR_SCHEMA(ReduceSum)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes the **sum** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
op = core.CreateOperator(
|
|
"ReduceSum",
|
|
["X"],
|
|
["Y"],
|
|
axes=(0,1),
|
|
keepdims=0
|
|
)
|
|
|
|
workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
|
|
print("X:", workspace.FetchBlob("X"))
|
|
workspace.RunOperatorOnce(op)
|
|
print("Y:", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[[[5. 3. 7. 9. 5.]
|
|
[4. 5. 1. 8. 3.]
|
|
[1. 0. 9. 7. 6.]
|
|
[7. 5. 0. 3. 1.]
|
|
[6. 4. 4. 8. 3.]]
|
|
|
|
[[8. 9. 6. 7. 7.]
|
|
[5. 5. 4. 7. 0.]
|
|
[9. 7. 6. 6. 7.]
|
|
[7. 5. 2. 4. 2.]
|
|
[4. 5. 1. 9. 4.]]]]
|
|
Y:
|
|
[[13. 12. 13. 16. 12.]
|
|
[ 9. 10. 5. 15. 3.]
|
|
[10. 7. 15. 13. 13.]
|
|
[14. 10. 2. 7. 3.]
|
|
[10. 9. 5. 17. 7.]]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
)DOC")
|
|
.Arg("axes", "(*Tuple(int)*): list of axes to reduce")
|
|
.Arg(
|
|
"keepdims",
|
|
"(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
|
|
.Input(0, "X", "(*Tensor`<float>`*): input tensor")
|
|
.Output(0, "Y", "(*Tensor`<float>`*): reduced tensor");
|
|
|
|
OPERATOR_SCHEMA(ReduceSumGradient).NumInputs(3).NumOutputs(1);
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceMean,
|
|
ReduceOp<TensorTypes<float>, CPUContext, MeanReducer<CPUContext>>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceMeanGradient,
|
|
ReduceGradientOp<TensorTypes<float>, CPUContext, MeanReducer<CPUContext>>);
|
|
|
|
OPERATOR_SCHEMA(ReduceMean)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes the **mean** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
op = core.CreateOperator(
|
|
"ReduceMean",
|
|
["X"],
|
|
["Y"],
|
|
axes=(0,1),
|
|
keepdims=0
|
|
)
|
|
|
|
workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
|
|
print("X:", workspace.FetchBlob("X"))
|
|
workspace.RunOperatorOnce(op)
|
|
print("Y:", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[[[9. 0. 3. 6. 0.]
|
|
[3. 4. 5. 0. 9.]
|
|
[6. 9. 1. 1. 5.]
|
|
[6. 2. 3. 7. 7.]
|
|
[3. 1. 1. 0. 1.]]
|
|
|
|
[[4. 3. 9. 8. 1.]
|
|
[8. 2. 0. 4. 0.]
|
|
[8. 9. 9. 0. 2.]
|
|
[7. 2. 5. 8. 9.]
|
|
[5. 9. 1. 9. 0.]]]]
|
|
Y:
|
|
[[6.5 1.5 6. 7. 0.5]
|
|
[5.5 3. 2.5 2. 4.5]
|
|
[7. 9. 5. 0.5 3.5]
|
|
[6.5 2. 4. 7.5 8. ]
|
|
[4. 5. 1. 4.5 0.5]]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
|
|
)DOC")
|
|
.Arg("axes", "(*Tuple(int)*): list of axes to reduce")
|
|
.Arg(
|
|
"keepdims",
|
|
"(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
|
|
.Input(0, "X", "(*Tensor`<float>`*): input tensor")
|
|
.Output(0, "Y", "(*Tensor`<float>`*): reduced tensor");
|
|
|
|
OPERATOR_SCHEMA(ReduceMeanGradient).NumInputs(3).NumOutputs(1);
|
|
|
|
template <>
|
|
template <typename T>
|
|
bool L1Reducer<CPUContext>::Backward(
|
|
const std::vector<int>& dY_dims,
|
|
const std::vector<int>& dX_dims,
|
|
const T* dY_data,
|
|
const T* X_data,
|
|
const T* /* Y_data */,
|
|
T* dX_data,
|
|
CPUContext* /* context */) const {
|
|
const float kEps = 1e-12f;
|
|
const int dX_size = std::accumulate(
|
|
dX_dims.cbegin(), dX_dims.cend(), 1, std::multiplies<int>());
|
|
const int ndim = dX_dims.size();
|
|
std::vector<int> index(ndim, 0);
|
|
for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
|
|
const int dY_index =
|
|
math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
|
|
float temp = X_data[dX_index];
|
|
if (temp < -kEps) {
|
|
dX_data[dX_index] = -dY_data[dY_index];
|
|
} else if (temp > kEps) {
|
|
dX_data[dX_index] = dY_data[dY_index];
|
|
} else {
|
|
dX_data[dX_index] = T(0);
|
|
}
|
|
math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
template <typename T>
|
|
bool L2Reducer<CPUContext>::Backward(
|
|
const std::vector<int>& dY_dims,
|
|
const std::vector<int>& dX_dims,
|
|
const T* dY_data,
|
|
const T* X_data,
|
|
const T* Y_data,
|
|
T* dX_data,
|
|
CPUContext* /* context */) const {
|
|
const float kEps = 1e-12f;
|
|
const int dX_size = std::accumulate(
|
|
dX_dims.cbegin(), dX_dims.cend(), 1, std::multiplies<int>());
|
|
const int ndim = dX_dims.size();
|
|
std::vector<int> index(ndim, 0);
|
|
for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
|
|
const int dY_index =
|
|
math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
|
|
T norm = Y_data[dY_index];
|
|
if (norm < kEps) {
|
|
dX_data[dX_index] = dY_data[dY_index];
|
|
} else {
|
|
dX_data[dX_index] = dY_data[dY_index] * X_data[dX_index] / norm;
|
|
}
|
|
math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceL1,
|
|
ReduceOp<TensorTypes<float>, CPUContext, L1Reducer<CPUContext>>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceL1Gradient,
|
|
ReduceGradientOp<TensorTypes<float>, CPUContext, L1Reducer<CPUContext>>);
|
|
|
|
OPERATOR_SCHEMA(ReduceL1)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes the **L1 norm** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
op = core.CreateOperator(
|
|
"ReduceL1",
|
|
["X"],
|
|
["Y"],
|
|
axes=(0,1),
|
|
keepdims=0
|
|
)
|
|
|
|
workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
|
|
print("X:", workspace.FetchBlob("X"))
|
|
workspace.RunOperatorOnce(op)
|
|
print("Y:", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[[[ 2. 7. 6. 4. 5.]
|
|
[ 2. 1. 9. 8. 7.]
|
|
[ 4. 9. 1. 0. 0.]
|
|
[ 6. 4. 0. 8. 1.]
|
|
[ 1. 7. 1. 0. 2.]]
|
|
|
|
[[ 5. 8. 1. 7. 7.]
|
|
[ 4. 5. 6. 5. 4.]
|
|
[ 1. 9. 6. 6. 3.]
|
|
[ 6. 6. 8. 8. 4.]
|
|
[ 2. 3. 5. 8. 1.]]]]
|
|
|
|
Y:
|
|
[[ 7. 15. 7. 11. 12.]
|
|
[ 6. 6. 15. 13. 11.]
|
|
[ 5. 18. 7. 6. 3.]
|
|
[ 12. 10. 8. 16. 5.]
|
|
[ 3. 10. 6. 8. 3.]]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
|
|
)DOC")
|
|
.Arg("axes", "(*Tuple(int)*): list of axes to reduce")
|
|
.Arg(
|
|
"keepdims",
|
|
"(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
|
|
.Input(0, "X", "(*Tensor`<float>`*): input tensor")
|
|
.Output(0, "Y", "(*Tensor`<float>`*): reduced tensor");
|
|
|
|
OPERATOR_SCHEMA(ReduceL1Gradient).NumInputs(3).NumOutputs(1);
|
|
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceL2,
|
|
ReduceOp<TensorTypes<float>, CPUContext, L2Reducer<CPUContext>>);
|
|
REGISTER_CPU_OPERATOR(
|
|
ReduceL2Gradient,
|
|
ReduceGradientOp<TensorTypes<float>, CPUContext, L2Reducer<CPUContext>>);
|
|
|
|
OPERATOR_SCHEMA(ReduceL2)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Computes the **L2 norm** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
op = core.CreateOperator(
|
|
"ReduceL2",
|
|
["X"],
|
|
["Y"],
|
|
axes=(0,1),
|
|
keepdims=0
|
|
)
|
|
|
|
workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
|
|
print("X:", workspace.FetchBlob("X"))
|
|
workspace.RunOperatorOnce(op)
|
|
print("Y:", workspace.FetchBlob("Y"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
X:
|
|
[[[[ 8. 0. 2. 5. 1.]
|
|
[ 1. 3. 0. 4. 0.]
|
|
[ 1. 3. 6. 7. 7.]
|
|
[ 6. 9. 8. 4. 6.]
|
|
[ 6. 1. 5. 7. 3.]]
|
|
|
|
[[ 2. 4. 6. 2. 8.]
|
|
[ 1. 1. 8. 0. 8.]
|
|
[ 5. 9. 0. 3. 2.]
|
|
[ 1. 7. 3. 7. 3.]
|
|
[ 6. 8. 9. 8. 7.]]]]
|
|
|
|
Y:
|
|
[[ 8.24621105 4. 6.3245554 5.38516474 8.06225777]
|
|
[ 1.41421354 3.1622777 8. 4. 8. ]
|
|
[ 5.09901953 9.48683262 6. 7.6157732 7.28010988]
|
|
[ 6.08276272 11.40175438 8.54400349 8.06225777 6.70820379]
|
|
[ 8.48528099 8.06225777 10.29563046 10.63014603 7.6157732 ]]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
|
|
)DOC")
|
|
.Arg("axes", "(*Tuple(int)*): list of axes to reduce")
|
|
.Arg(
|
|
"keepdims",
|
|
"(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
|
|
.Input(0, "X", "(*Tensor`<float>`*): input tensor")
|
|
.Output(0, "Y", "(*Tensor`<float>`*): reduced tensor")
|
|
.InheritOnnxSchema("ReduceMean");
|
|
|
|
OPERATOR_SCHEMA(ReduceL2Gradient).NumInputs(3).NumOutputs(1);
|
|
|
|
namespace {
|
|
|
|
class GetReduceGradient final : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
|
|
std::vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
def_.type() + "Gradient",
|
|
"",
|
|
std::vector<string>{GO(0), I(0), O(0)},
|
|
std::vector<string>{GI(0)});
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
REGISTER_GRADIENT(ReduceMin, GetReduceGradient);
|
|
REGISTER_GRADIENT(ReduceMax, GetReduceGradient);
|
|
REGISTER_GRADIENT(ReduceSum, GetReduceGradient);
|
|
REGISTER_GRADIENT(ReduceMean, GetReduceGradient);
|
|
REGISTER_GRADIENT(ReduceL1, GetReduceGradient);
|
|
REGISTER_GRADIENT(ReduceL2, GetReduceGradient);
|
|
|
|
} // namespace caffe2
|