mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-08 07:39:33 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14301 This diff removes quantization utility functions copied to fbgemm Reviewed By: Maratyszcza Differential Revision: D13159299 fbshipit-source-id: a7f3cd2af0aa241a8578d532a70a157da70d9289
152 lines
5.5 KiB
C++
152 lines
5.5 KiB
C++
#include "dnnlowp.h"
|
|
|
|
#include <cmath>
|
|
#include <iostream>
|
|
#include <random>
|
|
|
|
#include <gtest/gtest.h>
|
|
#include "caffe2/core/logging.h"
|
|
|
|
using namespace std;
|
|
using namespace dnnlowp;
|
|
|
|
TEST(Requantization, BatchRequantizationUnitTest) {
|
|
// generate input data
|
|
default_random_engine eng;
|
|
|
|
uniform_int_distribution<int32_t> in_max_dis(
|
|
10, numeric_limits<int32_t>::max());
|
|
uniform_int_distribution<int> zero_point_dis(0, 255);
|
|
|
|
constexpr int NITER = 1024;
|
|
constexpr int LEN = 77;
|
|
|
|
vector<int32_t> src(LEN);
|
|
vector<uint8_t> expected(LEN), actual(LEN);
|
|
|
|
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
|
|
|
|
for (int i = 0; i < NITER; ++i) {
|
|
int32_t in_max = in_max_dis(eng);
|
|
uniform_int_distribution<int32_t> in_dis(-in_max, in_max);
|
|
|
|
for (int j = 0; j < LEN; ++j) {
|
|
src[j] = in_dis(eng);
|
|
}
|
|
|
|
// Precise real_multiplier will be (255 / in_max) but intentionally use
|
|
// a bigger multiplier to test if saturation is handled correctly.
|
|
float real_multiplier = 255 / (1.5 * in_max);
|
|
TensorQuantizationParams target_qparams;
|
|
target_qparams.zero_point = zero_point_dis(eng);
|
|
target_qparams.precision = 8;
|
|
|
|
RequantizationParams params = qfactory->ChooseRequantizationMultiplier(
|
|
real_multiplier, target_qparams);
|
|
|
|
for (int j = 0; j < LEN; ++j) {
|
|
expected[j] = fbgemm::clamp(
|
|
target_qparams.zero_point +
|
|
std::nearbyint(static_cast<double>(src[j]) * real_multiplier),
|
|
8);
|
|
}
|
|
|
|
unsigned long long cycle_begin = __rdtsc();
|
|
fbgemm::Requantize(src.data(), actual.data(), LEN, params);
|
|
unsigned long long cycle_end = __rdtsc();
|
|
double elements_per_cycle = (double)LEN / (cycle_end - cycle_begin);
|
|
LOG(INFO) << elements_per_cycle << " elements_per_cycle";
|
|
|
|
for (int j = 0; j < LEN; ++j) {
|
|
EXPECT_EQ((int)expected[j], (int)actual[j])
|
|
<< "i " << i << " j " << j << " src " << src[j] << " real_multiplier "
|
|
<< real_multiplier << " multiplier " << params.multiplier
|
|
<< " right_shift " << params.right_shift << " zero_point "
|
|
<< target_qparams.zero_point;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(Requantization, RequantizationUnitTest) {
|
|
// Rescaling to a random range [min1, max1] to [min2, max2].
|
|
// Make sure the ranges include 0 and inputs don't have input quantization
|
|
// error
|
|
default_random_engine gen;
|
|
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
|
|
|
|
{
|
|
// Test 31-bit to 8-bit scaling (the most common one for example used for
|
|
// the results of GEMM).
|
|
// Dest quantization parameter is pre-determined by actual min/max of the
|
|
// values.
|
|
// Source scale can vary and zero_offset is 0.
|
|
uniform_real_distribution<float> src_scale_exponent_dist(-19, -1);
|
|
uniform_real_distribution<float> dst_exponent_dist(0.1, 4);
|
|
// Bits used in src_scale plus dst should be <= 23 not to have any
|
|
// input quantization error because float has 23 bit precision.
|
|
uniform_real_distribution<float> negative_proportion_dist(0, 1);
|
|
|
|
for (int i = 0; i < 256; ++i) {
|
|
TensorQuantizationParams src_qparams;
|
|
// scale is 2^-1 ~ 2^-19
|
|
src_qparams.scale = powf(2, src_scale_exponent_dist(gen));
|
|
src_qparams.zero_point = 0;
|
|
src_qparams.precision = 31;
|
|
|
|
float dst_extend = powf(2, dst_exponent_dist(gen));
|
|
float negative_proportion = negative_proportion_dist(gen);
|
|
float min = -(dst_extend * negative_proportion);
|
|
float max = dst_extend + min;
|
|
TensorQuantizationParams dst_qparams =
|
|
qfactory->ChooseQuantizationParams(min, max);
|
|
// scale = dst_extend / 2^8
|
|
// which is between 0.1/2^-8 ~ 2^-4
|
|
|
|
float real_multiplier = src_qparams.scale / dst_qparams.scale;
|
|
RequantizationParams requantization_params =
|
|
qfactory->ChooseRequantizationMultiplier(
|
|
real_multiplier, dst_qparams);
|
|
|
|
uniform_real_distribution<float> value_dist(
|
|
ceil(min / src_qparams.scale) * src_qparams.scale,
|
|
floor(max / src_qparams.scale) * src_qparams.scale);
|
|
// round with src_qparams.scale to avoid input quantization error due
|
|
// to clipping
|
|
float sum_sq = 0, max_err = 0;
|
|
constexpr int LEN = 1111;
|
|
vector<int32_t> src_q(LEN);
|
|
vector<float> src(LEN);
|
|
for (int j = 0; j < LEN; ++j) {
|
|
float src_orig = value_dist(gen);
|
|
src_q[j] = fbgemm::Quantize<int32_t>(
|
|
src_orig, 0, src_qparams.scale, 32, true /* signed*/);
|
|
src[j] = fbgemm::Dequantize<int32_t>(src_q[j], src_qparams);
|
|
// This number shouldn't have any quantization error
|
|
EXPECT_EQ(
|
|
fbgemm::Quantize<int32_t>(src[j], 0, src_qparams.scale, 32, true),
|
|
src_q[j]);
|
|
}
|
|
|
|
vector<uint8_t> dst_q(LEN);
|
|
fbgemm::Requantize(
|
|
src_q.data(), dst_q.data(), LEN, requantization_params);
|
|
|
|
for (int j = 0; j < LEN; ++j) {
|
|
float dst = fbgemm::Dequantize<uint8_t>(dst_q[j], dst_qparams);
|
|
|
|
float err = fabsf(dst - src[j]);
|
|
sum_sq += err * err;
|
|
max_err = std::max(max_err, err);
|
|
EXPECT_LE(err, dst_qparams.scale / 1.9);
|
|
}
|
|
|
|
LOG(INFO) << "src_scale " << src_qparams.scale << " dst_extend "
|
|
<< dst_extend << " real_multiplier " << real_multiplier
|
|
<< " avg_l2_err " << std::sqrt(sum_sq) / 1024 << " max_err "
|
|
<< max_err << endl;
|
|
// We shouldn't have an error bigger than output quantization error
|
|
EXPECT_LE(max_err, dst_qparams.scale / 1.9);
|
|
}
|
|
}
|
|
}
|