mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14164 See title Reviewed By: csummersea Differential Revision: D13115470 fbshipit-source-id: d754f558cd06e5f4c1cd00315e912cdb7b50731a
111 lines
3.1 KiB
C++
111 lines
3.1 KiB
C++
#pragma once
|
|
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/quantization/server/dnnlowp.h"
|
|
#include "caffe2/utils/eigen_utils.h"
|
|
|
|
namespace dnnlowp {
|
|
|
|
/**
|
|
* Let consumers of op know that qparams the quantization parameter used
|
|
* for output_index'th output of op.
|
|
*/
|
|
void PropagateOutputTensorQuantizationParams(
|
|
caffe2::OperatorBase* op,
|
|
int output_index,
|
|
const TensorQuantizationParams& qparams);
|
|
|
|
/**
|
|
* If input_index'th input is already quantized, return quantization parameter
|
|
* used for the input tensor (should've been set by
|
|
* PropagateOutputTensorQuantizationParams when the producer was invoked).
|
|
* If the input tensor is not quantized, return the quantization parameter
|
|
* chosen by qfactory based on the distribution of the input tensor
|
|
*/
|
|
TensorQuantizationParams GetInputTensorQuantizationParamsOf(
|
|
caffe2::OperatorBase* op,
|
|
int input_index,
|
|
const QuantizationFactory* qfactory,
|
|
bool is_weight = false);
|
|
|
|
void SetStaticQuantizationParams(
|
|
caffe2::OperatorBase* op,
|
|
int output_index,
|
|
const TensorQuantizationParams& qparams);
|
|
|
|
/**
|
|
* @return true if op's outputs should use static quantization (i.e. op has
|
|
* Y_scale and optionally Y_zero_offset argument).
|
|
*/
|
|
bool HasStaticQuantization(
|
|
const caffe2::OperatorBase* op,
|
|
int output_index = 0);
|
|
|
|
/**
|
|
* Get output_index'th quantization parameter.
|
|
* Should be used only when UseStaticQuantization is true
|
|
*/
|
|
TensorQuantizationParams GetStaticQuantizationParamsOf(
|
|
const caffe2::OperatorBase* op,
|
|
int output_index);
|
|
|
|
/**
|
|
* Quantize input_index'th input if it's not already quantized.
|
|
* a vector temp should be passed to store quantized results.
|
|
*
|
|
* @return array of quantized values
|
|
*/
|
|
template <typename T>
|
|
const T* QuantizeInputIfNeeded(
|
|
caffe2::OperatorBase* op,
|
|
int input_index,
|
|
const TensorQuantizationParams& qparams,
|
|
std::vector<T>& temp);
|
|
|
|
template <typename T>
|
|
const T* RowWiseQuantizeInputIfNeeded(
|
|
caffe2::OperatorBase* op,
|
|
int input_index,
|
|
const std::vector<TensorQuantizationParams>& qparams,
|
|
std::vector<T>& temp);
|
|
|
|
struct QuantizationErrorStats {
|
|
float sum_sq{0}, sum_err_sq{0};
|
|
float max_abs_err{0};
|
|
// actual and reference values that resulted in max_abs_err
|
|
float max_err_actual{0}, max_err_ref{0};
|
|
int measure_cnt{0};
|
|
};
|
|
|
|
void MeasureQuantizationError(
|
|
const float* actual,
|
|
const float* ref,
|
|
size_t len,
|
|
QuantizationErrorStats* stat);
|
|
|
|
void ReportQuantizationError(
|
|
const caffe2::OperatorBase* op,
|
|
const QuantizationErrorStats& stat);
|
|
|
|
/**
|
|
* Get QuantizationFactory based on the arguments of op
|
|
*/
|
|
std::unique_ptr<QuantizationFactory> GetQuantizationFactoryOf(
|
|
const caffe2::OperatorBase* op);
|
|
|
|
void AdjustOutputTensorQuantizationParamsWithFollowedBy(
|
|
caffe2::OperatorBase* op,
|
|
const std::string& followed_by);
|
|
|
|
void ParseDNNLowPOperatorArguments(
|
|
caffe2::OperatorBase* op,
|
|
bool* dequantize_output = nullptr,
|
|
bool* measure_quantization_error = nullptr,
|
|
std::string* followed_by = nullptr);
|
|
|
|
caffe2::NetDef AddScaleZeroOffsetArgumentsWithHistogram(
|
|
caffe2::NetDef net_def,
|
|
const std::string& histogram_file_name);
|
|
|
|
} // namespace dnnlowp
|