mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/65545 Introduce 2bit qtensor. The new dtype added for this is c10::quint2x4 The underlying storage for this is still uint8_t, so we pack 4 2-bit values in a byte while quantizing it. Kernels that use this dtype should be aware of the packing format. (4 2-bit values in one byte) Test Plan: `buck test mode/dev-asan caffe2/test/:quantization -- test_qtensor` Reviewed By: supriyar Differential Revision: D31148141 fbshipit-source-id: 1dc1de719e097adaf93fee47c6d1b8010a3eae6c
20 lines
366 B
C++
20 lines
366 B
C++
#pragma once
|
|
#include <cstdint>
|
|
|
|
#include <c10/macros/Macros.h>
|
|
|
|
namespace c10 {
|
|
|
|
/**
|
|
* quint2x4 is for un-signed 2 bit quantized Tensors that are packed to byte
|
|
* boundary.
|
|
*/
|
|
struct alignas(1) quint2x4 {
|
|
using underlying = uint8_t;
|
|
uint8_t val_;
|
|
quint2x4() = default;
|
|
C10_HOST_DEVICE explicit quint2x4(uint8_t val) : val_(val) {}
|
|
};
|
|
|
|
} // namespace c10
|