mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17105 To make FC with rowwise quantization faster, reduce code duplication, and make code consistent with Convolution Reviewed By: csummersea Differential Revision: D14080461 fbshipit-source-id: 2b0e67b86e7e3029c90751a8824bf80ae1223680
23 lines
537 B
C++
23 lines
537 B
C++
#pragma once
|
|
|
|
#include "fbgemm/Fbgemm.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
/**
|
|
* If there's an existing packed matrix for the same matrix, reuse it.
|
|
* Create a new one otherwise. This can save memory usage if many threads are
|
|
* sharing the same weight.
|
|
*/
|
|
template <typename ACC_T>
|
|
std::shared_ptr<fbgemm::PackBMatrix<int8_t, ACC_T>>
|
|
GetOrCreateFbgemmPackBMatrix(
|
|
fbgemm::matrix_op_t trans,
|
|
std::int32_t m,
|
|
std::int32_t n,
|
|
const void* orig_data,
|
|
const std::int8_t* quantized_data,
|
|
std::int32_t ld);
|
|
|
|
} // namespace caffe2
|