mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18240 For rare cases when dst_bin_width == 0 we should just put all numbers to an arbitrary bin. Reviewed By: csummersea Differential Revision: D14544685 fbshipit-source-id: 02d04ff8bd1555d6cf7e7eeb1196a4ab3325a9e5
234 lines
6.7 KiB
C++
234 lines
6.7 KiB
C++
#include "dynamic_histogram.h"
|
|
#include "dnnlowp_op.h"
|
|
|
|
#include <cassert>
|
|
#include <cmath>
|
|
#include <limits>
|
|
|
|
#ifdef _OPENMP
|
|
#include <omp.h>
|
|
#endif
|
|
|
|
namespace dnnlowp {
|
|
|
|
using namespace std;
|
|
|
|
void Histogram::Add(float f, uint64_t cnt) {
|
|
int nbins = histogram_.size();
|
|
float bin_width = (max_ - min_) / nbins;
|
|
int bin = bin_width == 0
|
|
? 0
|
|
: std::min(static_cast<int>((f - min_) / bin_width), nbins - 1);
|
|
bin = std::max(0, bin);
|
|
assert(bin >= 0);
|
|
histogram_[bin] += cnt;
|
|
}
|
|
|
|
void Histogram::Add(const float* f, int len) {
|
|
int nbins = histogram_.size();
|
|
float bin_width = (max_ - min_) / nbins;
|
|
|
|
if (bin_width > 0.0) {
|
|
assert(per_thread_histogram_.size() % nbins == 0);
|
|
|
|
// Check if dnnlowp_get_max_threads has been reduced, and if so reduce
|
|
// per-thread histogram and clear them.
|
|
int old_nthreads = per_thread_histogram_.size() / nbins + 1;
|
|
if (caffe2::dnnlowp_get_max_threads() < old_nthreads) {
|
|
Finalize();
|
|
}
|
|
|
|
per_thread_histogram_.resize(
|
|
(caffe2::dnnlowp_get_max_threads() - 1) * nbins);
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel
|
|
#endif
|
|
{
|
|
int tid = caffe2::dnnlowp_get_thread_num();
|
|
|
|
uint64_t* my_histogram = nullptr;
|
|
if (tid == 0) {
|
|
my_histogram = histogram_.data();
|
|
} else {
|
|
my_histogram = per_thread_histogram_.data() + (tid - 1) * nbins;
|
|
}
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp for
|
|
#endif
|
|
for (auto i = 0; i < len; ++i) {
|
|
int bin =
|
|
std::min(static_cast<int>((f[i] - min_) / bin_width), nbins - 1);
|
|
bin = std::max(0, bin);
|
|
++my_histogram[bin];
|
|
}
|
|
} // omp parallel
|
|
} else {
|
|
histogram_[0] += len;
|
|
}
|
|
}
|
|
|
|
void Histogram::Finalize() {
|
|
int nbins = histogram_.size();
|
|
assert(per_thread_histogram_.size() % nbins == 0);
|
|
int nthreads = per_thread_histogram_.size() / nbins + 1;
|
|
|
|
if (nthreads > 1) {
|
|
for (int bin = 0; bin < nbins; ++bin) {
|
|
for (int i = 1; i < nthreads; ++i) {
|
|
histogram_[bin] += per_thread_histogram_[(i - 1) * nbins + bin];
|
|
}
|
|
}
|
|
}
|
|
|
|
per_thread_histogram_.clear();
|
|
}
|
|
|
|
static const int OVER_BINNING_FACTOR = 4;
|
|
|
|
DynamicHistogram::DynamicHistogram(int nbins)
|
|
: nbins_(nbins),
|
|
min_(numeric_limits<float>::max()),
|
|
max_(numeric_limits<float>::lowest()) {
|
|
assert(nbins_ > 0);
|
|
}
|
|
|
|
void DynamicHistogram::Add(float f) {
|
|
min_ = std::min(min_, f);
|
|
max_ = std::max(max_, f);
|
|
|
|
if (histograms_.empty()) {
|
|
histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, f, f);
|
|
} else {
|
|
Histogram& curr_hist = histograms_.back();
|
|
if (f < curr_hist.Min() || f > curr_hist.Max()) {
|
|
float old_spread = curr_hist.Max() - curr_hist.Min();
|
|
if (f < curr_hist.Min()) {
|
|
float new_min;
|
|
if (old_spread == 0) {
|
|
new_min = f;
|
|
} else {
|
|
new_min = curr_hist.Min() -
|
|
ceil((curr_hist.Min() - f) / old_spread) * old_spread;
|
|
}
|
|
histograms_.emplace_back(
|
|
curr_hist.GetHistogram()->size(), new_min, curr_hist.Max());
|
|
} else {
|
|
float new_max;
|
|
if (old_spread == 0) {
|
|
new_max = f;
|
|
} else {
|
|
new_max = curr_hist.Max() +
|
|
ceil((f - curr_hist.Max()) / old_spread) * old_spread;
|
|
}
|
|
histograms_.emplace_back(
|
|
curr_hist.GetHistogram()->size(), curr_hist.Min(), new_max);
|
|
}
|
|
}
|
|
}
|
|
|
|
Histogram& new_hist = histograms_.back();
|
|
new_hist.Add(f);
|
|
}
|
|
|
|
void DynamicHistogram::Add(const float* f, int len) {
|
|
float minimum = min_, maximum = max_;
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for reduction(min : minimum) reduction(max : maximum)
|
|
#endif
|
|
for (int i = 0; i < len; ++i) {
|
|
minimum = std::min(f[i], minimum);
|
|
maximum = std::max(f[i], maximum);
|
|
}
|
|
min_ = minimum;
|
|
max_ = maximum;
|
|
|
|
if (histograms_.empty()) {
|
|
histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, min_, max_);
|
|
} else {
|
|
Histogram& curr_hist = histograms_.back();
|
|
if (min_ < curr_hist.Min() || max_ > curr_hist.Max()) {
|
|
float old_spread = curr_hist.Max() - curr_hist.Min();
|
|
float new_min = curr_hist.Min(), new_max = curr_hist.Max();
|
|
if (min_ < curr_hist.Min()) {
|
|
if (old_spread == 0.0f) {
|
|
new_min = min_;
|
|
} else {
|
|
new_min = curr_hist.Min() -
|
|
ceil((curr_hist.Min() - min_) / old_spread) * old_spread;
|
|
}
|
|
}
|
|
if (max_ > curr_hist.Max()) {
|
|
old_spread = curr_hist.Max() - new_min;
|
|
if (old_spread == 0.0f) {
|
|
new_max = max_;
|
|
} else {
|
|
new_max = curr_hist.Max() +
|
|
ceil((max_ - curr_hist.Max()) / old_spread) * old_spread;
|
|
}
|
|
}
|
|
histograms_.emplace_back(
|
|
curr_hist.GetHistogram()->size(), new_min, new_max);
|
|
}
|
|
}
|
|
|
|
Histogram& new_hist = histograms_.back();
|
|
new_hist.Add(f, len);
|
|
}
|
|
|
|
const Histogram* DynamicHistogram::Finalize() {
|
|
if (final_histogram_.get()) {
|
|
return final_histogram_.get();
|
|
}
|
|
|
|
final_histogram_.reset(new Histogram(nbins_, min_, max_));
|
|
float dst_bin_width = (max_ - min_) / nbins_;
|
|
|
|
for (Histogram& hist : histograms_) {
|
|
hist.Finalize();
|
|
|
|
const std::vector<uint64_t>& bins = *hist.GetHistogram();
|
|
float src_bin_width = (hist.Max() - hist.Min()) / bins.size();
|
|
|
|
for (int i = 0; i < bins.size(); ++i) {
|
|
if (bins[i] == 0) {
|
|
continue;
|
|
}
|
|
float src_bin_begin = hist.Min() + src_bin_width * i;
|
|
float src_bin_end = src_bin_begin + src_bin_width;
|
|
// dst_bin corresponds to the beginning of the src_bin
|
|
// dst_bin2 corresponds to the end of the src_bin
|
|
int dst_bin =
|
|
dst_bin_width == 0 ? 0 : (src_bin_begin - min_) / dst_bin_width;
|
|
float dst_bin_begin = min_ + dst_bin_width * dst_bin;
|
|
float dst_bin_end = dst_bin_begin + dst_bin_width;
|
|
int dst_bin2 =
|
|
dst_bin_width == 0 ? 0 : (src_bin_end - min_) / dst_bin_width;
|
|
// 1 src_bin is mapped to at most 2 dst bin
|
|
assert(dst_bin2 <= dst_bin + 2);
|
|
|
|
// dst_bin_cnt is the count from src_bin that should go to dst_bin
|
|
// The remainder should go to dst_bin2
|
|
// rint is the fastest way to round
|
|
// (https://stackoverflow.com/questions/485525/round-for-float-in-c/5849630)
|
|
uint64_t dst_bin_cnt = (src_bin_width == 0 || dst_bin_width == 0)
|
|
? bins[i]
|
|
: std::min(
|
|
static_cast<uint64_t>(rint(
|
|
(dst_bin_end - src_bin_begin) / src_bin_width * bins[i])),
|
|
bins[i]);
|
|
|
|
final_histogram_->Add(dst_bin_begin + dst_bin_width / 2, dst_bin_cnt);
|
|
if (dst_bin_cnt < bins[i]) {
|
|
final_histogram_->Add(
|
|
dst_bin_end + dst_bin_width / 2, bins[i] - dst_bin_cnt);
|
|
}
|
|
}
|
|
} // for each histogram with different scales
|
|
|
|
return final_histogram_.get();
|
|
}
|
|
|
|
} // namespace dnnlowp
|