mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
This PR enables `-Winconsistent-missing-destructor-override` and `-Winconsistent-missing-override` and fixes violations. <!-- copilot:summary --> ### <samp>🤖 Generated by Copilot at 47e904e</samp> This pull request updates the code of various classes and operators in the `caffe2` and `aten` subdirectories to use the `override` specifier instead of the `virtual` keyword for destructors and other virtual functions that override a base class function. This improves the code readability, quality, and consistency with C++ best practices. It also modifies the `./CMakeLists.txt` file to enable warnings for these specifiers, but disable errors. Pull Request resolved: https://github.com/pytorch/pytorch/pull/104032 Approved by: https://github.com/malfet
138 lines
4.0 KiB
C++
138 lines
4.0 KiB
C++
#pragma once
|
|
|
|
#include <fbgemm/Fbgemm.h>
|
|
#include "caffe2/operators/conv_op.h"
|
|
#include "caffe2/operators/conv_pool_op_base.h"
|
|
#include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
|
|
#include "caffe2/quantization/server/conv_pool_dnnlowp_op_base.h"
|
|
#include "caffe2/quantization/server/dnnlowp.h"
|
|
#include "caffe2/quantization/server/op_wrapper.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
using ConvFp32Op = ConvOp<float, CPUContext>;
|
|
|
|
// Convolutional layer computed in integer with quantization
|
|
template <typename T, bool ReluFused = false>
|
|
class ConvDNNLowPOp : public ConvPoolDNNLowPOpBase<T, ConvFp32Op> {
|
|
public:
|
|
USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
|
|
USE_CONV_POOL_DNNLOWP_OPERATOR_BASE_FUNCTIONS(T, ConvFp32Op);
|
|
ConvDNNLowPOp(const OperatorDef& operator_def, Workspace* ws);
|
|
~ConvDNNLowPOp() override;
|
|
|
|
protected:
|
|
bool RunOnDeviceWithOrderNCHW() override;
|
|
bool RunOnDeviceWithOrderNHWC() override;
|
|
|
|
bool GetQuantizationParameters_();
|
|
|
|
/**
|
|
* @return true if convolution is basically a GEMM point-wise (e.g., 1x1)
|
|
* convolution, no stride/dilation/pad
|
|
*/
|
|
bool IsConvGEMM_() const;
|
|
bool NoIm2ColNHWC_();
|
|
int KernelDim_();
|
|
|
|
const T* Im2ColNHWC_(Tensor* col_buffer);
|
|
|
|
dnnlowp::TensorQuantizationParams& FilterQuantizationParams(int group_id);
|
|
dnnlowp::RequantizationParams& RequantizationParams(int group_id);
|
|
|
|
static void PartitionGroupedNHWCConv_(
|
|
int* group_begin,
|
|
int* group_end,
|
|
int* i_begin,
|
|
int* i_end,
|
|
int num_groups,
|
|
int m,
|
|
int nthreads,
|
|
int thread_id);
|
|
|
|
virtual bool Acc16() const {
|
|
return false;
|
|
}
|
|
|
|
Tensor col_buffer_{CPU};
|
|
Tensor img_shape_device_{CPU};
|
|
Tensor col_buffer_shape_device_{CPU};
|
|
|
|
// Input: X, W, b
|
|
// Output: Y
|
|
INPUT_TAGS(INPUT, FILTER, BIAS);
|
|
|
|
// x86 only provides SIMD instructions that multiply a signed integer with an
|
|
// unsigned integer. We use signed for weights.
|
|
using T_signed = typename std::make_signed<T>::type;
|
|
|
|
// used in slow path for T != uint8_t
|
|
std::vector<T_signed> W_quantized_;
|
|
|
|
// pre-computed biases and offsets
|
|
std::shared_ptr<std::vector<std::int32_t>> column_offsets_;
|
|
std::vector<std::int32_t> row_offsets_;
|
|
const std::int32_t* b_quantized_data_{nullptr};
|
|
|
|
std::vector<std::uint8_t> X_pack_buf_;
|
|
|
|
void RunOnDeviceEpilogueNCHW_(
|
|
const T* col_buffer_data,
|
|
std::int32_t* Y_int32,
|
|
T* Y_data,
|
|
std::size_t i_offset,
|
|
int group_id);
|
|
void RunOnDeviceEpilogueNHWC_(
|
|
const T* col_buffer_data,
|
|
std::int32_t* Y_int32);
|
|
|
|
std::vector<std::int32_t> Y_int32_;
|
|
std::vector<dnnlowp::TensorQuantizationParams> filter_qparams_;
|
|
std::vector<std::int32_t> filter_zero_points_;
|
|
|
|
std::vector<float> requantization_multipliers_;
|
|
bool quantize_groupwise_;
|
|
|
|
private:
|
|
void QuantizeWeight_();
|
|
void PreComputeRowColumnOffsets_();
|
|
void QuantizeBias_();
|
|
|
|
bool TakeDepthWise3x3FastPath_();
|
|
bool TakeDepthWise3x3x3FastPath_();
|
|
bool TakeGConvFastPath_();
|
|
|
|
template <typename PackAMatrix, fbgemm::QuantizationGranularity Q_GRAN>
|
|
void DispatchFBGEMM_(
|
|
PackAMatrix& packA,
|
|
vector<std::int32_t>* Y_int32,
|
|
uint8_t* Y_uint8_data);
|
|
|
|
void ConvNHWCCore_(const T* col_buffer_data, vector<std::int32_t>* Y_int32);
|
|
|
|
fbgemm::conv_param_t<> GetConvParam_();
|
|
fbgemm::conv_param_t<3> GetConv3DParam_();
|
|
|
|
std::vector<dnnlowp::RequantizationParams> requantization_params_;
|
|
|
|
// used in fast path for T == uint8_t
|
|
std::shared_ptr<fbgemm::PackBMatrix<std::int8_t>> Wq_packed_;
|
|
|
|
// For depthwise conv
|
|
std::shared_ptr<fbgemm::PackedDepthWiseConvMatrix> Wq_depthwise_packed_;
|
|
// For small gconv
|
|
std::shared_ptr<fbgemm::PackWeightMatrixForGConv<std::int8_t>>
|
|
Wq_gconv_packed_;
|
|
std::shared_ptr<
|
|
fbgemm::PackWeightMatrixForGConv<std::int8_t, std::int32_t, 3>>
|
|
Wq_gconv3d_packed_;
|
|
|
|
// pre-computed biases and offsets
|
|
std::shared_ptr<std::vector<std::int32_t>> b_quantized_;
|
|
|
|
float in_qparams_scale_old_{0};
|
|
std::int32_t in_qparams_zero_point_old_{0};
|
|
}; // class ConvDNNLowPOp
|
|
|
|
} // namespace caffe2
|