pytorch/caffe2/perfkernels/math.h

#pragma once

#include <cstdint>

namespace caffe2 {

namespace math {

// Returns the quantized and compressed values of floating inputs
// The "fused" representation stores the [bitwidth][tail][min][max]
// with the quantized data in one array. Since we store 8/bitwidth
// quantized data in one byte, the last buckets of some bytes may have
// unused bits. There are totally tail buckets are unused.
// We encode *bitwidth* and *tail* at the beginning,
// following by 32-bit floating data respresenting min and max.
// | bitwidth | tail | min | max | ... int8 data ... |
// |    1B    |  1B  |  4B |  4B | ...output_data....|
// In output_data: the b-th bucket of the i-th byte stores
// the i-th data of the b-th segment of input row

void quantize_and_compress(
    const float* input_data,
    std::uint8_t* output_data,
    std::uint64_t input_size,
    std::uint64_t bitwidth,
    bool random,
    const float* random_buffer);

void decompress_and_dequantize(
    const std::uint8_t* input_data,
    float* output_data,
    std::uint64_t input_size);

} // namespace math
} // namespace caffe2