mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
81 lines
2.1 KiB
C++
81 lines
2.1 KiB
C++
#include "dequantize_dnnlowp_op.h"
|
|
|
|
#include "caffe2/core/tensor_int8.h"
|
|
#include "caffe2_dnnlowp_utils.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
template <typename T>
|
|
DequantizeDNNLowPOp<T>::DequantizeDNNLowPOp(
|
|
const OperatorDef& operator_def,
|
|
Workspace* ws)
|
|
: Operator<CPUContext>(operator_def, ws),
|
|
qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {
|
|
if (this->debug_def().engine() == "DNNLOWP_16" ||
|
|
this->debug_def().engine() == "DNNLOWP_ROWWISE_16") {
|
|
LOG(WARNING)
|
|
<< this->debug_def().engine()
|
|
<< " is an experimental feature mostly for testing accuracy with "
|
|
"fixed-point precision higher than 8 and performance is very slow";
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
bool DequantizeDNNLowPOp<T>::RunOnDevice() {
|
|
using namespace dnnlowp;
|
|
TensorQuantizationParams in_qparams =
|
|
GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
|
|
|
|
const TensorCPU& input = InputIsType<int8::Int8TensorCPU>(0)
|
|
? this->template Input<int8::Int8TensorCPU>(0).t
|
|
: Input(0);
|
|
|
|
CAFFE_ENFORCE(input.template IsType<T>());
|
|
Output(0)->ResizeLike(input);
|
|
fbgemm::Dequantize<T>(
|
|
input.template data<T>(),
|
|
Output(0)->template mutable_data<float>(),
|
|
input.numel(),
|
|
in_qparams);
|
|
|
|
return true;
|
|
}
|
|
|
|
OPERATOR_SCHEMA(Dequantize)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.IdenticalTypeAndShapeOfInput(0);
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Dequantize,
|
|
DNNLOWP,
|
|
DequantizeDNNLowPOp<std::uint8_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Dequantize,
|
|
DNNLOWP_ROWWISE,
|
|
DequantizeDNNLowPOp<std::uint8_t>);
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Dequantize,
|
|
DNNLOWP_16,
|
|
DequantizeDNNLowPOp<std::uint16_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Dequantize,
|
|
DNNLOWP_ROWWISE_16,
|
|
DequantizeDNNLowPOp<std::uint16_t>);
|
|
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Int8Dequantize,
|
|
DNNLOWP,
|
|
DequantizeDNNLowPOp<std::uint8_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Int8Dequantize,
|
|
DNNLOWP_ROWWISE,
|
|
DequantizeDNNLowPOp<std::uint8_t>);
|
|
REGISTER_CPU_OPERATOR_WITH_ENGINE(
|
|
Int8DequantizeRowWise,
|
|
DNNLOWP,
|
|
DequantizeDNNLowPOp<std::uint8_t>);
|
|
|
|
} // namespace caffe2
|