mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
133 lines
3.7 KiB
C++
133 lines
3.7 KiB
C++
/**
|
|
* Copyright (c) 2016-present, Facebook, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "elementwise_linear_dnnlowp_op.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
using namespace dnnlowp;
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
|
template <typename T>
|
|
ElementwiseLinearDNNLowPOp<T>::ElementwiseLinearDNNLowPOp(
|
|
const OperatorDef& operator_def,
|
|
Workspace* ws)
|
|
: BaseType(operator_def, ws),
|
|
axis_(this->template GetSingleArgument<int>("axis", 1)) {}
|
|
|
|
template <typename T>
|
|
bool ElementwiseLinearDNNLowPOp<T>::RunOnDevice() {
|
|
if (!GetQuantizationParameters_()) {
|
|
return false;
|
|
}
|
|
|
|
const auto& X = InputTensorCPU_(0);
|
|
const auto& a = InputTensorCPU_(1);
|
|
const auto& b = InputTensorCPU_(2);
|
|
auto* Y = OutputTensorCPU_(0);
|
|
|
|
const auto canonical_axis = X.canonical_axis_index(axis_);
|
|
const int N = X.size_to_dim(canonical_axis);
|
|
const int D = X.size_from_dim(canonical_axis);
|
|
|
|
CAFFE_ENFORCE_EQ(a.ndim(), 1, a.ndim());
|
|
CAFFE_ENFORCE_EQ(a.size(0), D, a.ndim());
|
|
CAFFE_ENFORCE_EQ(b.ndim(), 1, b.ndim());
|
|
CAFFE_ENFORCE_EQ(b.size(0), D, b.ndim());
|
|
|
|
Y->ResizeLike(X);
|
|
|
|
// Quantize X
|
|
vector<T> X_temp;
|
|
const T* X_quantized =
|
|
QuantizeInputIfNeeded<T>(this, 0, in_qparams_[0], X_temp);
|
|
|
|
// Quantize b
|
|
vector<int32_t> b_quantized(b.numel());
|
|
const float* b_data = b.template data<float>();
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for
|
|
#endif
|
|
for (int i = 0; i < b.numel(); ++i) {
|
|
b_quantized[i] = fbgemm::Quantize<int32_t>(
|
|
b_data[i],
|
|
0,
|
|
in_qparams_[0].scale * in_qparams_[1].scale,
|
|
32,
|
|
true /* signed */);
|
|
}
|
|
|
|
T* Y_quantized = GetQuantizedOutputData_();
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for
|
|
#endif
|
|
for (int n = 0; n < N; ++n) {
|
|
for (int d = 0; d < D; ++d) {
|
|
int32_t raw = (X_quantized[n * D + d] - in_qparams_[0].zero_point) *
|
|
(a_quantized_[d] - in_qparams_[1].zero_point) +
|
|
b_quantized[d];
|
|
Y_quantized[n * D + d] =
|
|
fbgemm::Requantize<T>(raw, requantization_params_);
|
|
}
|
|
}
|
|
|
|
RunOnDeviceEpilogue_();
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool ElementwiseLinearDNNLowPOp<T>::GetQuantizationParameters_() {
|
|
using namespace dnnlowp;
|
|
|
|
// Choose quantization for X
|
|
in_qparams_[0] = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
|
|
|
|
// Quantize a
|
|
if (a_quantized_.empty()) {
|
|
const auto& a = InputTensorCPU_(1);
|
|
in_qparams_[1] = qfactory_->ChooseQuantizationParams(
|
|
a.template data<float>(), a.numel(), true /*weight*/);
|
|
|
|
a_quantized_.resize(a.numel());
|
|
fbgemm::Quantize<T>(
|
|
a.template data<float>(),
|
|
a_quantized_.data(),
|
|
a_quantized_.size(),
|
|
in_qparams_[1]);
|
|
}
|
|
|
|
GetOutputQuantizationParams_();
|
|
|
|
float real_multiplier =
|
|
in_qparams_[0].scale * in_qparams_[1].scale / out_qparams_.scale;
|
|
requantization_params_ =
|
|
qfactory_->ChooseRequantizationMultiplier(real_multiplier, out_qparams_);
|
|
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
ElementwiseLinear,
|
|
DNNLOWP,
|
|
ElementwiseLinearDNNLowPOp<uint8_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Int8ElementwiseLinear,
|
|
DNNLOWP,
|
|
ElementwiseLinearDNNLowPOp<uint8_t>);
|
|
|
|
} // namespace caffe2
|