mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Add cppcoreguidelines-avoid-magic-numbers exclusion to clang-tidy Remove existing nolint warnings using following script: ``` for file in `git ls-files | grep -v \.py`; do gsed '/^ *\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)/d' -i $file; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/57841 Reviewed By: samestep Differential Revision: D28295045 Pulled By: malfet fbshipit-source-id: 7c6e8d1213c9593f169ed3df6a916498f1a97163
356 lines
14 KiB
C++
356 lines
14 KiB
C++
#include "caffe2/core/logging.h"
|
|
#include "caffe2/utils/cpuid.h"
|
|
#include "l2_minimization.h"
|
|
|
|
#include <cassert>
|
|
#include <cmath>
|
|
#include <limits>
|
|
|
|
#include <immintrin.h>
|
|
|
|
#include <c10/util/irange.h>
|
|
|
|
using namespace std;
|
|
|
|
namespace dnnlowp {
|
|
|
|
#undef NDEBUG
|
|
|
|
// Use fp16_min as the small scale cutoff because we don't want to use scales in fp16 subnormal range.
|
|
// This is to be consistent with Glow and FakeLowP implementation for NNPI.
|
|
constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f;
|
|
|
|
static float
|
|
GetNorm(float begin, float end, float density, NormMinimization::Kind kind) {
|
|
float norm = 0;
|
|
|
|
// assume values are uniformly distributed within each histogram bin
|
|
if (NormMinimization::L2 == kind) {
|
|
// err = density * (integral_{begin, end} x^2)
|
|
// = density * (end^3 - begin^3) / 3
|
|
norm = (end * end * end - begin * begin * begin) / 3;
|
|
// for begin = d/2 and end = -d/2, this leads to d^3/12
|
|
} else {
|
|
// err = density * (integral_{begin, end} |x|)
|
|
// = density * (end^2 - begin^2) / 2
|
|
float left_begin = std::min(0.0f, begin);
|
|
float left_end = std::min(0.0f, end);
|
|
assert(left_begin * left_begin >= left_end * left_end);
|
|
norm += (left_begin * left_begin - left_end * left_end) / 2;
|
|
|
|
float right_begin = std::max(0.0f, begin);
|
|
float right_end = std::max(0.0f, end);
|
|
assert(right_end * right_end >= right_begin * right_begin);
|
|
norm += (right_end * right_end - right_begin * right_begin) / 2;
|
|
}
|
|
|
|
return density * norm;
|
|
}
|
|
|
|
// Filter out outliers in input distributions
|
|
// Exploit the input distributions for the quick search
|
|
TensorQuantizationParams NormMinimization::NonlinearQuantizationParamsSearch(
|
|
const Histogram& hist,
|
|
bool preserve_sparsity,
|
|
int precision) {
|
|
if (preserve_sparsity) {
|
|
VLOG(2) << "l2_approx with symmetric quantization falls back to L2";
|
|
return ChooseQuantizationParams(hist, preserve_sparsity, precision);
|
|
}
|
|
VLOG(2) << "Using the nonlinear quantile search";
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
|
float min, max;
|
|
vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max));
|
|
int nbins = bins_f.size();
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
float bin_width = (max - min) / nbins;
|
|
float scale = (max - min) / float((1 << precision) - 1);
|
|
if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) {
|
|
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
|
|
return qfactory->ChooseQuantizationParams(
|
|
min, max, precision, preserve_sparsity);
|
|
}
|
|
int dst_nbins = 1 << precision;
|
|
|
|
float org_max = max;
|
|
float org_min = min;
|
|
// calculate the CDF
|
|
uint64_t total = 0;
|
|
for (uint64_t x : bins_f) {
|
|
total += x;
|
|
}
|
|
vector<uint64_t> CDF;
|
|
uint64_t sum = 0;
|
|
for (uint64_t x : bins_f) {
|
|
sum += x;
|
|
CDF.push_back(sum);
|
|
}
|
|
|
|
double stepsize = 0.00001; // experiment on the granularity
|
|
double alpha = 0.0f, beta = 1.0f; // lowerbound and upperbound
|
|
int start_bin = 0;
|
|
int end_bin = nbins - 1;
|
|
double norm_min = numeric_limits<double>::max();
|
|
|
|
while (alpha < beta) {
|
|
// find the next step
|
|
double next_alpha = alpha + stepsize;
|
|
double next_beta = beta - stepsize;
|
|
|
|
// find the left and right bins between the quantile bounds
|
|
int i = start_bin, j = end_bin;
|
|
while (i < end_bin && CDF[i] < next_alpha * total)
|
|
i++;
|
|
while (j > start_bin && CDF[j] > next_beta * total)
|
|
j--;
|
|
|
|
// decide the next move
|
|
// cout << i << ", " << j << endl;
|
|
int next_start_bin = start_bin, next_end_bin = end_bin;
|
|
if ((i - start_bin) > (end_bin - j)) {
|
|
// move the start_bin
|
|
next_start_bin = i;
|
|
alpha = next_alpha;
|
|
} else {
|
|
// move the end_bin
|
|
next_end_bin = j;
|
|
beta = next_beta;
|
|
}
|
|
|
|
if (next_start_bin == start_bin && next_end_bin == end_bin)
|
|
continue;
|
|
// calculate the norm
|
|
double norm = 0;
|
|
double dst_bin_width =
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
bin_width * (next_end_bin - next_start_bin + 1) / dst_nbins;
|
|
|
|
// go over each histogram bin and accumulate errors
|
|
for (int src_bin = 0; src_bin < nbins; ++src_bin) {
|
|
// distances from the beginning of first dst_bin to the beginning and
|
|
// end of src_bin
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
double src_bin_begin = (src_bin - next_start_bin) * bin_width;
|
|
double src_bin_end = src_bin_begin + bin_width;
|
|
|
|
// which dst_bins the beginning and end of src_bin belong to?
|
|
int dst_bin_of_begin = std::min(
|
|
(1 << precision) - 1.,
|
|
std::max(0., floor(src_bin_begin / dst_bin_width)));
|
|
int dst_bin_of_end = std::min(
|
|
(1 << precision) - 1.,
|
|
std::max(0., floor(src_bin_end / dst_bin_width)));
|
|
|
|
double dst_bin_of_begin_center =
|
|
dst_bin_of_begin * dst_bin_width + dst_bin_width / 2;
|
|
double density = bins_f[src_bin] / bin_width;
|
|
if (dst_bin_of_begin == dst_bin_of_end) {
|
|
// if src_bin is entirely within 1 dst_bin
|
|
double delta_begin = src_bin_begin - dst_bin_of_begin_center;
|
|
double delta_end = src_bin_end - dst_bin_of_begin_center;
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
norm += GetNorm(delta_begin, delta_end, density, kind_);
|
|
} else {
|
|
double delta_begin = src_bin_begin - dst_bin_of_begin_center;
|
|
double delta_end = dst_bin_width / 2;
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
norm += GetNorm(delta_begin, delta_end, density, kind_);
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
norm += (dst_bin_of_end - dst_bin_of_begin - 1) *
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
GetNorm(-dst_bin_width / 2, dst_bin_width / 2, density, kind_);
|
|
|
|
double dst_bin_of_end_center =
|
|
dst_bin_of_end * dst_bin_width + dst_bin_width / 2;
|
|
delta_begin = -dst_bin_width / 2;
|
|
delta_end = src_bin_end - dst_bin_of_end_center;
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
norm += GetNorm(delta_begin, delta_end, density, kind_);
|
|
}
|
|
}
|
|
if (norm > norm_min)
|
|
break;
|
|
norm_min = norm;
|
|
start_bin = next_start_bin;
|
|
end_bin = next_end_bin;
|
|
}
|
|
VLOG(2) << "best quantization range " << start_bin << "," << end_bin + 1
|
|
<< "," << norm_min;
|
|
|
|
double selected_sum = 0;
|
|
for (int i = start_bin; i < end_bin + 1; ++i) {
|
|
selected_sum += bins_f[i];
|
|
}
|
|
VLOG(2) << "best quantization range covers "
|
|
<< (double)selected_sum / total * 100 << " %%";
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
max = min + bin_width * (end_bin + 1);
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
min = min + bin_width * start_bin;
|
|
|
|
VLOG(2) << "Org min " << org_min << " org max " << org_max << " found min "
|
|
<< min << " max " << max << " with minimal norm " << norm_min;
|
|
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
|
|
return qfactory->ChooseQuantizationParams(
|
|
min, max, precision, preserve_sparsity);
|
|
}
|
|
|
|
TensorQuantizationParams NormMinimization::ChooseQuantizationParams(
|
|
const Histogram& hist,
|
|
bool preserve_sparsity,
|
|
int precision) {
|
|
VLOG(2) << "Using the brute force search";
|
|
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
|
float min, max;
|
|
vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max));
|
|
int nbins = bins_f.size();
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
float bin_width = (max - min) / nbins;
|
|
|
|
float scale = (max - min) / float((1 << precision) - 1);
|
|
if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) {
|
|
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
|
|
return qfactory->ChooseQuantizationParams(
|
|
min, max, precision, preserve_sparsity);
|
|
}
|
|
int dst_nbins = 1 << precision;
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
int zero_bin = round(-min / bin_width);
|
|
|
|
vector<pair<int, float>> best_start_bins(nbins + 1);
|
|
|
|
// Look at mapping [start_bin, start_bin + nbins_selected) to
|
|
// [0, 1 << precision) for every (start_bin, nbins_selected) combination and
|
|
// pick the one with smallest L2 quantization error
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for schedule(dynamic)
|
|
#endif
|
|
for (int nbins_selected = 1; nbins_selected <= nbins; ++nbins_selected) {
|
|
float norm_min = numeric_limits<float>::max();
|
|
int best_start_bin = 0;
|
|
|
|
int start_bin_begin = 0, start_bin_end = nbins - nbins_selected + 1;
|
|
if (preserve_sparsity) {
|
|
// when preserving sparsity we only check the range
|
|
// starting from 0 (when min is 0) or symmetric around 0.
|
|
if (min == 0) {
|
|
start_bin_begin = 0;
|
|
start_bin_end = 1;
|
|
} else {
|
|
start_bin_begin = zero_bin - nbins_selected / 2;
|
|
start_bin_end = start_bin_begin + 1;
|
|
}
|
|
}
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
float dst_bin_width = bin_width * nbins_selected / dst_nbins;
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
|
int start_bin;
|
|
for (start_bin = start_bin_begin; start_bin < start_bin_end; ++start_bin) {
|
|
float norm = 0;
|
|
|
|
// go over each histogram bin and accumulate errors
|
|
caffe2::CpuId cpuid = caffe2::GetCpuId();
|
|
if (kind_ == NormMinimization::L2 && cpuid.avx2() && cpuid.fma()) {
|
|
norm = internal::L2MinimizationKernelAVX2(
|
|
precision,
|
|
bins_f.data(),
|
|
nbins,
|
|
bin_width,
|
|
dst_bin_width,
|
|
start_bin);
|
|
} else {
|
|
for (int src_bin = 0; src_bin < nbins; ++src_bin) {
|
|
// distances from the beginning of first dst_bin to the beginning and
|
|
// end of src_bin
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
float src_bin_begin = (src_bin - start_bin) * bin_width;
|
|
float src_bin_end = src_bin_begin + bin_width;
|
|
|
|
// which dst_bins the beginning and end of src_bin belong to?
|
|
int dst_bin_of_begin = std::min(
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
(1 << precision) - 1.0f,
|
|
std::max(0.0f, floorf(src_bin_begin / dst_bin_width)));
|
|
int dst_bin_of_end = std::min(
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
(1 << precision) - 1.0f,
|
|
std::max(0.0f, floorf(src_bin_end / dst_bin_width)));
|
|
|
|
float dst_bin_of_begin_center =
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
dst_bin_of_begin * dst_bin_width + dst_bin_width / 2;
|
|
float density = bins_f[src_bin] / bin_width;
|
|
float delta_begin = src_bin_begin - dst_bin_of_begin_center;
|
|
if (dst_bin_of_begin == dst_bin_of_end) {
|
|
// if src_bin is entirely within 1 dst_bin
|
|
float delta_end = src_bin_end - dst_bin_of_begin_center;
|
|
norm += GetNorm(delta_begin, delta_end, density, kind_);
|
|
} else {
|
|
float delta_end = dst_bin_width / 2;
|
|
norm += GetNorm(delta_begin, delta_end, density, kind_);
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
norm += (dst_bin_of_end - dst_bin_of_begin - 1) *
|
|
GetNorm(-dst_bin_width / 2, dst_bin_width / 2, density, kind_);
|
|
|
|
float dst_bin_of_end_center =
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
dst_bin_of_end * dst_bin_width + dst_bin_width / 2;
|
|
delta_begin = -dst_bin_width / 2;
|
|
delta_end = src_bin_end - dst_bin_of_end_center;
|
|
norm += GetNorm(delta_begin, delta_end, density, kind_);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (norm < norm_min) {
|
|
norm_min = norm;
|
|
best_start_bin = start_bin;
|
|
}
|
|
} // for each start_bin
|
|
|
|
best_start_bins[nbins_selected] = {best_start_bin, norm_min};
|
|
} // for each nbins_selected
|
|
|
|
float norm_min = numeric_limits<float>::max();
|
|
int best_nbins_selected = 1, best_start_bin = 0;
|
|
for (int nbins_selected = 1; nbins_selected <= nbins; ++nbins_selected) {
|
|
float norm = best_start_bins[nbins_selected].second;
|
|
if (norm < norm_min) {
|
|
norm_min = norm;
|
|
best_start_bin = best_start_bins[nbins_selected].first;
|
|
best_nbins_selected = nbins_selected;
|
|
}
|
|
}
|
|
|
|
float total_sum = 0;
|
|
for (const auto i : c10::irange(bins_f.size())) {
|
|
total_sum += bins_f[i];
|
|
}
|
|
float selected_sum = 0;
|
|
int i_begin = std::max(0, best_start_bin);
|
|
int i_end = std::min(nbins, best_start_bin + best_nbins_selected);
|
|
for (int i = i_begin; i < i_end; ++i) {
|
|
selected_sum += bins_f[i];
|
|
}
|
|
VLOG(2) << "best quantization range covers " << selected_sum / total_sum * 100
|
|
<< " %%";
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
max = min + bin_width * (best_start_bin + best_nbins_selected);
|
|
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
|
|
min = min + bin_width * (best_start_bin);
|
|
|
|
QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
|
|
return qfactory->ChooseQuantizationParams(
|
|
min, max, precision, preserve_sparsity);
|
|
} // ChooseQuantizationParams
|
|
|
|
} // namespace dnnlowp
|