pytorch/caffe2/utils/math/broadcast.cc
Xiaomeng Yang 2db847b3a7 Separate elementwise level2 math functions (#16753)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/16753

Separate elementwise level2 math functions

i-am-not-moving-c2-to-c10

Reviewed By: houseroad

Differential Revision: D13954928

fbshipit-source-id: 1ca7a5d3da96e32510f502e5e4e79168854bee67
2019-02-07 18:38:26 -08:00

56 lines
3.1 KiB
C++

#include "caffe2/utils/math/broadcast.h"
#include "caffe2/core/context.h"
#include "caffe2/utils/eigen_utils.h"
namespace caffe2 {
namespace math {
#define CAFFE2_SPECIALIZED_AFFINE_CHANNEL(T) \
template <> \
C10_EXPORT void AffineChannel<T, CPUContext, StorageOrder::NCHW>( \
const int N, \
const int C, \
const int HxW, \
const T* X, \
const T* scale, \
const T* bias, \
T* Y, \
CPUContext* /* context */) { \
ConstEigenVectorArrayMap<T> scale_arr(scale, C); \
ConstEigenVectorArrayMap<T> bias_arr(bias, C); \
const int stride = C * HxW; \
const T* X_ptr = X; \
T* Y_ptr = Y; \
for (int i = 0; i < N; ++i) { \
EigenArrayMap<T>(Y_ptr, HxW, C) = \
(ConstEigenArrayMap<T>(X_ptr, HxW, C).rowwise() * \
scale_arr.transpose()) \
.rowwise() + \
bias_arr.transpose(); \
X_ptr += stride; \
Y_ptr += stride; \
} \
} \
template <> \
C10_EXPORT void AffineChannel<T, CPUContext, StorageOrder::NHWC>( \
const int N, \
const int C, \
const int HxW, \
const T* X, \
const T* scale, \
const T* bias, \
T* Y, \
CPUContext* /* context */) { \
EigenArrayMap<T>(Y, C, N * HxW) = \
(ConstEigenArrayMap<T>(X, C, N * HxW).colwise() * \
ConstEigenVectorArrayMap<T>(scale, C)) \
.colwise() + \
ConstEigenVectorArrayMap<T>(bias, C); \
}
CAFFE2_SPECIALIZED_AFFINE_CHANNEL(float)
#undef CAFFE2_SPECIALIZED_AFFINE_CHANNEL
} // namespace math
} // namespace caffe2