mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/69303
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/792
Follow up to D32715453 (e60fd10659), allowing row size to be 64-bit.
Test Plan:
buck test mode/opt -c fbcode.caffe2_gpu_type=v100,a100 //deeplearning/fbgemm/fbgemm_gpu:quantize_ops_test
buck test mode/opt -c fbcode.caffe2_gpu_type=none //deeplearning/fbgemm/fbgemm_gpu:quantize_ops_test
buck test mode/opt //caffe2/test:
Reviewed By: jspark1105, jianyuh
Differential Revision: D32768838
fbshipit-source-id: 9e2b01d8d23e71f8333820e725379c3fc1c0711a
40 lines
789 B
C++
40 lines
789 B
C++
#pragma once
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
namespace caffe2 {
|
|
|
|
void FloatToFused8BitRowwiseQuantized(
|
|
const float* input,
|
|
size_t input_rows,
|
|
int input_columns,
|
|
std::uint8_t* output);
|
|
|
|
void Fused8BitRowwiseQuantizedToFloat(
|
|
const std::uint8_t* input,
|
|
size_t input_rows,
|
|
int input_columns,
|
|
float* output);
|
|
|
|
/**
|
|
* Row-wise quantization with fp16 scale and bias
|
|
*
|
|
* @param bit_rate can be 2, 4, or 8
|
|
*/
|
|
void FloatToFusedNBitRowwiseQuantizedSBHalf(
|
|
int bit_rate,
|
|
const float* input,
|
|
size_t input_rows,
|
|
int input_columns,
|
|
std::uint8_t* output);
|
|
|
|
void FusedNBitRowwiseQuantizedSBHalfToFloat(
|
|
int bit_rate,
|
|
const std::uint8_t* input,
|
|
size_t input_rows,
|
|
int input_columns,
|
|
float* output);
|
|
|
|
} // namespace caffe2
|