mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
107 lines
2.7 KiB
C++
107 lines
2.7 KiB
C++
#include "quantize_dnnlowp_op.h"
|
|
#include "dnnlowp_op.h"
|
|
|
|
#include "caffe2/core/tensor_int8.h"
|
|
#include "caffe2/quantization/server/int8_gen_quant_params.h"
|
|
#include "caffe2_dnnlowp_utils.h"
|
|
#include "dnnlowp_partition.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
using namespace std;
|
|
|
|
template <typename T>
|
|
QuantizeDNNLowPOp<T>::QuantizeDNNLowPOp(
|
|
const OperatorDef& operator_def,
|
|
Workspace* ws)
|
|
: Operator<CPUContext>(operator_def, ws),
|
|
qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {}
|
|
|
|
template <typename T>
|
|
bool QuantizeDNNLowPOp<T>::RunOnDevice() {
|
|
using namespace dnnlowp;
|
|
|
|
if (!arguments_parsed_) {
|
|
dnnlowp::ParseDNNLowPOperatorArguments(this);
|
|
arguments_parsed_ = true;
|
|
}
|
|
|
|
CAFFE_ENFORCE(InputSize() <= 2);
|
|
CAFFE_ENFORCE(Input(0).template IsType<float>());
|
|
|
|
bool use_input_qparam = false;
|
|
float in_scale = 0;
|
|
int in_zero_point = 0;
|
|
if (InputSize() == 2) {
|
|
use_input_qparam = true;
|
|
|
|
const auto* input_qparam_blob =
|
|
Input<caffe2::unique_ptr<Int8QuantParamsBlob>>(1).get();
|
|
CAFFE_ENFORCE(input_qparam_blob);
|
|
in_scale = input_qparam_blob->qparam.scale;
|
|
in_zero_point = input_qparam_blob->qparam.zero_point;
|
|
}
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
|
TensorQuantizationParams in_qparams;
|
|
|
|
if (use_input_qparam) {
|
|
in_qparams.scale = in_scale;
|
|
in_qparams.zero_point = in_zero_point;
|
|
in_qparams.precision = qfactory_->GetActivationPrecision();
|
|
} else {
|
|
if (HasStaticQuantization(this)) {
|
|
in_qparams = GetStaticQuantizationParamsOf(this, 0);
|
|
} else {
|
|
in_qparams = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
|
|
}
|
|
}
|
|
|
|
int8::Int8TensorCPU* output =
|
|
Outputs()[0]->template GetMutable<int8::Int8TensorCPU>();
|
|
output->t.ResizeLike(Input(0));
|
|
|
|
const float* in_data = Input(0).template data<float>();
|
|
T* out_data = output->t.template mutable_data<T>();
|
|
|
|
fbgemm::Quantize<T>(in_data, out_data, Input(0).numel(), in_qparams);
|
|
|
|
PropagateOutputTensorQuantizationParams(this, 0, in_qparams);
|
|
|
|
return true;
|
|
}
|
|
|
|
OPERATOR_SCHEMA(Quantize)
|
|
.NumInputs(1, 2)
|
|
.NumOutputs(1)
|
|
.IdenticalTypeAndShapeOfInput(0);
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Quantize,
|
|
DNNLOWP,
|
|
QuantizeDNNLowPOp<uint8_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Quantize,
|
|
DNNLOWP_ROWWISE,
|
|
QuantizeDNNLowPOp<uint8_t>);
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Quantize,
|
|
DNNLOWP_16,
|
|
QuantizeDNNLowPOp<uint16_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Quantize,
|
|
DNNLOWP_ROWWISE_16,
|
|
QuantizeDNNLowPOp<uint16_t>);
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Int8Quantize,
|
|
DNNLOWP,
|
|
QuantizeDNNLowPOp<uint8_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Int8Quantize,
|
|
DNNLOWP_ROWWISE,
|
|
QuantizeDNNLowPOp<uint8_t>);
|
|
|
|
} // namespace caffe2
|