pytorch/caffe2/operators/elu_op.cc

#include "caffe2/operators/elu_op.h"

#include <algorithm>
#include <functional>
#include <string>

#include "caffe2/utils/eigen_utils.h"

namespace caffe2 {

template <>
template <typename T>
bool EluFunctor<CPUContext>::
operator()(const int N, const T* X, T* Y, CPUContext* /* context */) const {
  ConstEigenVectorArrayMap<T> X_arr(X, N);
  EigenVectorMap<T>(Y, N) =
      (X_arr < 0).select(alpha * (X_arr.exp() - T(1)), X_arr);
  return true;
}

template <>
template <typename T>
bool EluGradientFunctor<CPUContext>::Forward(
    const std::vector<int>& Y_dims,
    const std::vector<int>& /* dY_dims */,
    const T* Y,
    const T* dY,
    T* dX,
    CPUContext* /* context */) const {
  const int size = std::accumulate(
      Y_dims.cbegin(), Y_dims.cend(), 1, std::multiplies<int>());
  ConstEigenVectorArrayMap<T> Y_arr(Y, size);
  ConstEigenVectorArrayMap<T> dY_arr(dY, size);
  EigenVectorArrayMap<T>(dX, size) =
      (Y_arr < 0).select(dY_arr * (Y_arr + alpha), dY_arr);
  return true;
}

REGISTER_CPU_OPERATOR(
    Elu,
    UnaryElementwiseWithArgsOp<
        TensorTypes<float>,
        CPUContext,
        EluFunctor<CPUContext>>);
REGISTER_CPU_GRADIENT_OPERATOR(
    EluGradient,
    BinaryElementwiseWithArgsOp<
        TensorTypes<float>,
        CPUContext,
        EluGradientFunctor<CPUContext>>);

// Input: X, output: Y
OPERATOR_SCHEMA(Elu)
    .NumInputs(1)
    .NumOutputs(1)
    .AllowInplace({{0, 0}})
    .IdenticalTypeAndShape()
    .SetDoc(R"DOC(

This op implements the exponential linear unit (ELU) activation function as described in [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289). The op takes an input tensor $X$ of arbitrary shape, computes the elementwise elu operation, and returns a vector $Y$ of the same shape as output. The alpha parameter may be passed as an argument, but defaults to 1. The elu operation is defined as

$$y=f(x) =\begin{cases}\alpha(e^x-1) & x < 0 \\ x & otherwise\end{cases}$$

Github Links:
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.h
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.cc

<details>

<summary> <b>Example</b> </summary>

**Code**

```
workspace.ResetWorkspace()

op = core.CreateOperator(
    "Elu",
    ["X"],
    ["Y"],
    alpha=1.1
)

workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
print("X:\n", workspace.FetchBlob("X"), "\n")

workspace.RunOperatorOnce(op)
print("Y:\n", workspace.FetchBlob("Y"))

```

**Result**

```

X:
 [[ 0.35339102  1.1860217  -0.10710736]
 [-3.1173866  -0.1889988  -0.20330353]
 [ 1.8525308  -0.368949    0.506277  ]]

Y:
 [[ 0.35339102  1.1860217  -0.11172786]
 [-1.0513     -0.18943374 -0.20236646]
 [ 1.8525308  -0.33939326  0.506277  ]]

```

</details>

)DOC")
    .Input(0, "X", "1D input tensor of data to be operated on.")
    .Output(0, "Y", "1D input tensor, calculated as described above.")
    .Arg(
        "alpha",
        "*(type: float; default: 1.0)* Defines alpha parameter used in calculation.")
    .InheritOnnxSchema();

// Input: Y, dY, output: dX
GRADIENT_OPERATOR_SCHEMA(EluGradient)
    .NumInputs(2)
    .NumOutputs(1)
    .AllowInplace({{1, 0}})
    .SetDoc(R"DOC(
EluGradient takes both Y and dY and uses this to update dX according to the
chain rule and derivatives of the rectified linear function.
)DOC");

namespace {

class GetEluGradient : public GradientMakerBase {
  using GradientMakerBase::GradientMakerBase;
  std::vector<OperatorDef> GetGradientDefs() override {
    return SingleGradientDef(
        def_.type() + "Gradient",
        "",
        std::vector<std::string>{O(0), GO(0)},
        std::vector<std::string>{GI(0)});
  }
};

} // namespace

REGISTER_GRADIENT(Elu, GetEluGradient);

} // namespace caffe2