pytorch/caffe2/quantization/server/norm_minimization.cc

#include "caffe2/core/logging.h"
#include "caffe2/utils/cpuid.h"
#include "l2_minimization.h"

#include <cassert>
#include <cmath>
#include <limits>

#include <immintrin.h>

#include <c10/util/irange.h>

using namespace std;

namespace dnnlowp {

#undef NDEBUG

// Use fp16_min as the small scale cutoff because we don't want to use scales in fp16 subnormal range.
// This is to be consistent with Glow and FakeLowP implementation for NNPI.
constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f;

static float
GetNorm(float begin, float end, float density, NormMinimization::Kind kind) {
  float norm = 0;

  // assume values are uniformly distributed within each histogram bin
  if (NormMinimization::L2 == kind) {
    // err = density * (integral_{begin, end} x^2)
    //     = density * (end^3 - begin^3) / 3
    norm = (end * end * end - begin * begin * begin) / 3;
    // for begin = d/2 and end = -d/2, this leads to d^3/12
  } else {
    // err = density * (integral_{begin, end} |x|)
    //     = density * (end^2 - begin^2) / 2
    float left_begin = std::min(0.0f, begin);
    float left_end = std::min(0.0f, end);
    assert(left_begin * left_begin >= left_end * left_end);
    norm += (left_begin * left_begin - left_end * left_end) / 2;

    float right_begin = std::max(0.0f, begin);
    float right_end = std::max(0.0f, end);
    assert(right_end * right_end >= right_begin * right_begin);
    norm += (right_end * right_end - right_begin * right_begin) / 2;
  }

  return density * norm;
}

// Filter out outliers in input distributions
// Exploit the input distributions for the quick search
TensorQuantizationParams NormMinimization::NonlinearQuantizationParamsSearch(
    const Histogram& hist,
    bool preserve_sparsity,
    int precision) {
  if (preserve_sparsity) {
    VLOG(2) << "l2_approx with symmetric quantization falls back to L2";
    return ChooseQuantizationParams(hist, preserve_sparsity, precision);
  }
  VLOG(2) << "Using the nonlinear quantile search";

  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
  float min, max;
  vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max));
  int nbins = bins_f.size();
  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  float bin_width = (max - min) / nbins;
  float scale = (max - min) / float((1 << precision) - 1);
  if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) {
    QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
    return qfactory->ChooseQuantizationParams(
        min, max, precision, preserve_sparsity);
  }
  int dst_nbins = 1 << precision;

  float org_max = max;
  float org_min = min;
  // calculate the CDF
  uint64_t total = 0;
  for (uint64_t x : bins_f) {
    total += x;
  }
  vector<uint64_t> CDF;
  uint64_t sum = 0;
  for (uint64_t x : bins_f) {
    sum += x;
    CDF.push_back(sum);
  }

  double stepsize = 0.00001; // experiment on the granularity
  double alpha = 0.0f, beta = 1.0f; // lowerbound and upperbound
  int start_bin = 0;
  int end_bin = nbins - 1;
  double norm_min = numeric_limits<double>::max();

  while (alpha < beta) {
    // find the next step
    double next_alpha = alpha + stepsize;
    double next_beta = beta - stepsize;

    // find the left and right bins between the quantile bounds
    int i = start_bin, j = end_bin;
    while (i < end_bin && CDF[i] < next_alpha * total)
      i++;
    while (j > start_bin && CDF[j] > next_beta * total)
      j--;

    // decide the next move
    // cout << i << ", " << j << endl;
    int next_start_bin = start_bin, next_end_bin = end_bin;
    if ((i - start_bin) > (end_bin - j)) {
      // move the start_bin
      next_start_bin = i;
      alpha = next_alpha;
    } else {
      // move the end_bin
      next_end_bin = j;
      beta = next_beta;
    }

    if (next_start_bin == start_bin && next_end_bin == end_bin)
      continue;
    // calculate the norm
    double norm = 0;
    double dst_bin_width =
        // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
        bin_width * (next_end_bin - next_start_bin + 1) / dst_nbins;

    // go over each histogram bin and accumulate errors
    for (int src_bin = 0; src_bin < nbins; ++src_bin) {
      // distances from the beginning of first dst_bin to the beginning and
      // end of src_bin
      // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
      double src_bin_begin = (src_bin - next_start_bin) * bin_width;
      double src_bin_end = src_bin_begin + bin_width;

      // which dst_bins the beginning and end of src_bin belong to?
      int dst_bin_of_begin = std::min(
          (1 << precision) - 1.,
          std::max(0., floor(src_bin_begin / dst_bin_width)));
      int dst_bin_of_end = std::min(
          (1 << precision) - 1.,
          std::max(0., floor(src_bin_end / dst_bin_width)));

      double dst_bin_of_begin_center =
          dst_bin_of_begin * dst_bin_width + dst_bin_width / 2;
      double density = bins_f[src_bin] / bin_width;
      if (dst_bin_of_begin == dst_bin_of_end) {
        // if src_bin is entirely within 1 dst_bin
        double delta_begin = src_bin_begin - dst_bin_of_begin_center;
        double delta_end = src_bin_end - dst_bin_of_begin_center;
        // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
        norm += GetNorm(delta_begin, delta_end, density, kind_);
      } else {
        double delta_begin = src_bin_begin - dst_bin_of_begin_center;
        double delta_end = dst_bin_width / 2;
        // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
        norm += GetNorm(delta_begin, delta_end, density, kind_);

        // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
        norm += (dst_bin_of_end - dst_bin_of_begin - 1) *
            // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
            GetNorm(-dst_bin_width / 2, dst_bin_width / 2, density, kind_);

        double dst_bin_of_end_center =
            dst_bin_of_end * dst_bin_width + dst_bin_width / 2;
        delta_begin = -dst_bin_width / 2;
        delta_end = src_bin_end - dst_bin_of_end_center;
        // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
        norm += GetNorm(delta_begin, delta_end, density, kind_);
      }
    }
    if (norm > norm_min)
      break;
    norm_min = norm;
    start_bin = next_start_bin;
    end_bin = next_end_bin;
  }
  VLOG(2) << "best quantization range " << start_bin << "," << end_bin + 1
          << "," << norm_min;

  double selected_sum = 0;
  for (int i = start_bin; i < end_bin + 1; ++i) {
    selected_sum += bins_f[i];
  }
  VLOG(2) << "best quantization range covers "
          << (double)selected_sum / total * 100 << " %%";

  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  max = min + bin_width * (end_bin + 1);
  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  min = min + bin_width * start_bin;

  VLOG(2) << "Org min " << org_min << " org max " << org_max << " found min "
          << min << " max " << max << " with minimal norm " << norm_min;
  QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
  return qfactory->ChooseQuantizationParams(
      min, max, precision, preserve_sparsity);
}

TensorQuantizationParams NormMinimization::ChooseQuantizationParams(
    const Histogram& hist,
    bool preserve_sparsity,
    int precision) {
  VLOG(2) << "Using the brute force search";
  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
  float min, max;
  vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max));
  int nbins = bins_f.size();
  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  float bin_width = (max - min) / nbins;

  float scale = (max - min) / float((1 << precision) - 1);
  if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) {
    QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
    return qfactory->ChooseQuantizationParams(
        min, max, precision, preserve_sparsity);
  }
  int dst_nbins = 1 << precision;

  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  int zero_bin = round(-min / bin_width);

  vector<pair<int, float>> best_start_bins(nbins + 1);

  // Look at mapping [start_bin, start_bin + nbins_selected) to
  // [0, 1 << precision) for every (start_bin, nbins_selected) combination and
  // pick the one with smallest L2 quantization error
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic)
#endif
  for (int nbins_selected = 1; nbins_selected <= nbins; ++nbins_selected) {
    float norm_min = numeric_limits<float>::max();
    int best_start_bin = 0;

    int start_bin_begin = 0, start_bin_end = nbins - nbins_selected + 1;
    if (preserve_sparsity) {
      // when preserving sparsity we only check the range
      // starting from 0 (when min is 0) or symmetric around 0.
      if (min == 0) {
        start_bin_begin = 0;
        start_bin_end = 1;
      } else {
        start_bin_begin = zero_bin - nbins_selected / 2;
        start_bin_end = start_bin_begin + 1;
      }
    }

    // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
    float dst_bin_width = bin_width * nbins_selected / dst_nbins;

    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
    int start_bin;
    for (start_bin = start_bin_begin; start_bin < start_bin_end; ++start_bin) {
      float norm = 0;

      // go over each histogram bin and accumulate errors
      caffe2::CpuId cpuid = caffe2::GetCpuId();
      if (kind_ == NormMinimization::L2 && cpuid.avx2() && cpuid.fma()) {
        norm = internal::L2MinimizationKernelAVX2(
            precision,
            bins_f.data(),
            nbins,
            bin_width,
            dst_bin_width,
            start_bin);
      } else {
        for (int src_bin = 0; src_bin < nbins; ++src_bin) {
          // distances from the beginning of first dst_bin to the beginning and
          // end of src_bin
          // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
          float src_bin_begin = (src_bin - start_bin) * bin_width;
          float src_bin_end = src_bin_begin + bin_width;

          // which dst_bins the beginning and end of src_bin belong to?
          int dst_bin_of_begin = std::min(
              // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
              (1 << precision) - 1.0f,
              std::max(0.0f, floorf(src_bin_begin / dst_bin_width)));
          int dst_bin_of_end = std::min(
              // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
              (1 << precision) - 1.0f,
              std::max(0.0f, floorf(src_bin_end / dst_bin_width)));

          float dst_bin_of_begin_center =
              // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
              dst_bin_of_begin * dst_bin_width + dst_bin_width / 2;
          float density = bins_f[src_bin] / bin_width;
          float delta_begin = src_bin_begin - dst_bin_of_begin_center;
          if (dst_bin_of_begin == dst_bin_of_end) {
            // if src_bin is entirely within 1 dst_bin
            float delta_end = src_bin_end - dst_bin_of_begin_center;
            norm += GetNorm(delta_begin, delta_end, density, kind_);
          } else {
            float delta_end = dst_bin_width / 2;
            norm += GetNorm(delta_begin, delta_end, density, kind_);

            // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
            norm += (dst_bin_of_end - dst_bin_of_begin - 1) *
                GetNorm(-dst_bin_width / 2, dst_bin_width / 2, density, kind_);

            float dst_bin_of_end_center =
                // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
                dst_bin_of_end * dst_bin_width + dst_bin_width / 2;
            delta_begin = -dst_bin_width / 2;
            delta_end = src_bin_end - dst_bin_of_end_center;
            norm += GetNorm(delta_begin, delta_end, density, kind_);
          }
        }
      }

      if (norm < norm_min) {
        norm_min = norm;
        best_start_bin = start_bin;
      }
    } // for each start_bin

    best_start_bins[nbins_selected] = {best_start_bin, norm_min};
  } // for each nbins_selected

  float norm_min = numeric_limits<float>::max();
  int best_nbins_selected = 1, best_start_bin = 0;
  for (int nbins_selected = 1; nbins_selected <= nbins; ++nbins_selected) {
    float norm = best_start_bins[nbins_selected].second;
    if (norm < norm_min) {
      norm_min = norm;
      best_start_bin = best_start_bins[nbins_selected].first;
      best_nbins_selected = nbins_selected;
    }
  }

  float total_sum = 0;
  for (const auto i : c10::irange(bins_f.size())) {
    total_sum += bins_f[i];
  }
  float selected_sum = 0;
  int i_begin = std::max(0, best_start_bin);
  int i_end = std::min(nbins, best_start_bin + best_nbins_selected);
  for (int i = i_begin; i < i_end; ++i) {
    selected_sum += bins_f[i];
  }
  VLOG(2) << "best quantization range covers " << selected_sum / total_sum * 100
          << " %%";

  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  max = min + bin_width * (best_start_bin + best_nbins_selected);
  // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
  min = min + bin_width * (best_start_bin);

  QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
  return qfactory->ChooseQuantizationParams(
      min, max, precision, preserve_sparsity);
} // ChooseQuantizationParams

} // namespace dnnlowp