mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[RELAND] Remove deprecated fbgemm operators (#112153)
These operators are not used and have been deprecated since #72690 (Feb 2022). BC-breaking message: `TorchScript` models that were exported with the deprecated `torch.jit.quantized` API will no longer be loadable, as the required internal operators have been removed. Please re-export your models using the newer `torch.ao.quantization` API instead. Pull Request resolved: https://github.com/pytorch/pytorch/pull/112153 Approved by: https://github.com/jerryzh168
This commit is contained in:
parent
2327879fb6
commit
19e8ba95e5
|
|
@ -14,12 +14,9 @@
|
|||
#else
|
||||
#include <ATen/ops/empty.h>
|
||||
#include <ATen/ops/empty_like_native.h>
|
||||
#include <ATen/ops/fbgemm_linear_fp16_weight_fp32_activation_native.h>
|
||||
#include <ATen/ops/fbgemm_linear_fp16_weight_native.h>
|
||||
#include <ATen/ops/fbgemm_linear_int8_weight_fp32_activation_native.h>
|
||||
#include <ATen/ops/fbgemm_linear_int8_weight_native.h>
|
||||
#include <ATen/ops/fbgemm_linear_quantize_weight_native.h>
|
||||
#include <ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h>
|
||||
#include <ATen/ops/fbgemm_pack_quantized_matrix_native.h>
|
||||
#endif
|
||||
|
||||
|
|
@ -376,94 +373,6 @@ void HandleWeightsSaturation(int64_t N, float* weight) {
|
|||
|
||||
} // namespace
|
||||
|
||||
Tensor fbgemm_pack_gemm_matrix_fp16(const Tensor& weight) {
|
||||
TORCH_WARN_ONCE("fbgemm_pack_gemm_matrix_fp16 is deprecated "
|
||||
"and will be removed in a future PyTorch release.")
|
||||
|
||||
// We make a strong guarantee that models using these operators will have the
|
||||
// same numerics across different machines. Therefore, we do not provide a
|
||||
// fallback path and rather fail loudly if we cannot run FBGEMM.
|
||||
TORCH_CHECK(fbgemm::fbgemmSupportedCPU(), "Your CPU doesn't support FBGEMM.");
|
||||
|
||||
const int64_t K = weight.size(1);
|
||||
const int64_t N = weight.size(0);
|
||||
Tensor weight_contig = weight.contiguous();
|
||||
float* weight_contig_ptr = weight_contig.data_ptr<float>();
|
||||
HandleWeightsSaturation(K * N, weight_contig_ptr);
|
||||
|
||||
// TODO(mingzhe09088):
|
||||
// Consider using a functor here in PackedGemmMatrixFP16
|
||||
// Comments from (XQ): Not entirely sure this make_unique is safe. make_unique
|
||||
// is created with regular "new", and freed through TypeMetaData::deleteFn in
|
||||
// this function. This is perfectly fine if the tensors are created and freed
|
||||
// within this translation unit. It might be very problematic if that tensor
|
||||
// flows across dll boundaries.
|
||||
auto ptr = std::make_unique<fbgemm::PackedGemmMatrixFP16>(
|
||||
fbgemm::matrix_op_t::Transpose, K, N, 1, weight_contig_ptr);
|
||||
c10::intrusive_ptr<LinearPackedParamsBase> packed_weight =
|
||||
c10::make_intrusive<PackedLinearWeightFp16>(std::move(ptr), c10::nullopt);
|
||||
auto unique_ptr_wrapper =
|
||||
std::make_unique<decltype(packed_weight)>(std::move(packed_weight));
|
||||
return cpp_custom_type_hack::create(
|
||||
std::move(unique_ptr_wrapper), weight.options());
|
||||
}
|
||||
|
||||
Tensor fbgemm_linear_fp16_weight_fp32_activation(
|
||||
const Tensor& input,
|
||||
const Tensor& packed_weight,
|
||||
const Tensor& bias) {
|
||||
TORCH_WARN_ONCE("fbgemm_linear_fp16_weight_fp32_activation is deprecated "
|
||||
"and will be removed in a future PyTorch release.")
|
||||
|
||||
// We make a strong guarantee that models using these operators will have the
|
||||
// same numerics across different machines. Therefore, we do not provide a
|
||||
// fallback path and rather fail loudly if we cannot run FBGEMM.
|
||||
TORCH_CHECK(fbgemm::fbgemmSupportedCPU(), "Your CPU doesn't support FBGEMM.");
|
||||
|
||||
const Tensor input_contig = input.contiguous();
|
||||
const float* input_ptr = input_contig.data_ptr<float>();
|
||||
|
||||
// Pull out the PackedGemmMatrixFP16 instance from the owning tensor
|
||||
const fbgemm::PackedGemmMatrixFP16& packed_weight_fp16 =
|
||||
*c10::dynamic_intrusive_pointer_cast<PackedLinearWeightFp16>(
|
||||
cpp_custom_type_hack::cast<
|
||||
c10::intrusive_ptr<LinearPackedParamsBase>>(packed_weight))
|
||||
->w;
|
||||
|
||||
TORCH_CHECK(input.size(input.dim() - 1) == packed_weight_fp16.numRows())
|
||||
TORCH_CHECK(input.dim() >= 2);
|
||||
TORCH_CHECK(bias.dim() == 1);
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
|
||||
const int64_t M = size_to_dim_(input.dim() - 1, input.sizes());
|
||||
const int64_t N = packed_weight_fp16.numCols();
|
||||
std::vector<int64_t> output_size = input.sizes().vec();
|
||||
output_size.back() = N;
|
||||
Tensor output = at::empty(output_size, input.options().dtype(at::kFloat));
|
||||
|
||||
// Call the fp16 gemm interface
|
||||
fbgemm::cblas_gemm_compute(
|
||||
fbgemm::matrix_op_t::NoTranspose,
|
||||
M,
|
||||
input_ptr,
|
||||
packed_weight_fp16,
|
||||
0.0f,
|
||||
output.data_ptr<float>());
|
||||
|
||||
// Add bias term
|
||||
output.add_(bias);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
Tensor fbgemm_linear_fp16_weight(
|
||||
const Tensor& input,
|
||||
const Tensor& packed_weight,
|
||||
const Tensor& bias) {
|
||||
return at::native::fbgemm_linear_fp16_weight_fp32_activation(
|
||||
input, packed_weight, bias);
|
||||
}
|
||||
|
||||
#else // USE_FBGEMM
|
||||
|
||||
Tensor fbgemm_linear_int8_weight_fp32_activation(
|
||||
|
|
@ -539,45 +448,6 @@ Tensor fbgemm_pack_quantized_matrix(
|
|||
false, "This PyTorch installation was not built with FBGEMM operators");
|
||||
}
|
||||
|
||||
Tensor fbgemm_pack_gemm_matrix_fp16(const Tensor& weight) {
|
||||
TORCH_WARN_ONCE("fbgemm_pack_gemm_matrix_fp16 is deprecated "
|
||||
"and will be removed in a future PyTorch release.")
|
||||
|
||||
// We make a strong guarantee that models using these operators will have the
|
||||
// same numerics across different machines. Therefore, we do not provide a
|
||||
// fallback path and rather fail loudly if we cannot run FBGEMM.
|
||||
TORCH_CHECK(
|
||||
false, "This PyTorch installation was not built with FBGEMM operators");
|
||||
}
|
||||
|
||||
Tensor fbgemm_linear_fp16_weight_fp32_activation(
|
||||
const Tensor& input,
|
||||
const Tensor& packed_weight,
|
||||
const Tensor& bias) {
|
||||
TORCH_WARN_ONCE("fbgemm_linear_fp16_weight_fp32_activation is deprecated "
|
||||
"and will be removed in a future PyTorch release.")
|
||||
|
||||
// We make a strong guarantee that models using these operators will have the
|
||||
// same numerics across different machines. Therefore, we do not provide a
|
||||
// fallback path and rather fail loudly if we cannot run FBGEMM.
|
||||
TORCH_CHECK(
|
||||
false, "This PyTorch installation was not built with FBGEMM operators");
|
||||
}
|
||||
|
||||
Tensor fbgemm_linear_fp16_weight(
|
||||
const Tensor& input,
|
||||
const Tensor& packed_weight,
|
||||
const Tensor& bias) {
|
||||
TORCH_WARN_ONCE("fbgemm_linear_fp16_weight is deprecated "
|
||||
"and will be removed in a future PyTorch release.")
|
||||
|
||||
// We make a strong guarantee that models using these operators will have the
|
||||
// same numerics across different machines. Therefore, we do not provide a
|
||||
// fallback path and rather fail loudly if we cannot run FBGEMM.
|
||||
TORCH_CHECK(
|
||||
false, "This PyTorch installation was not built with FBGEMM operators");
|
||||
}
|
||||
|
||||
#endif // USE_FBGEMM
|
||||
|
||||
} // namespace at::native
|
||||
|
|
|
|||
|
|
@ -32,19 +32,12 @@
|
|||
#include <ATen/ops/cat.h>
|
||||
#include <ATen/ops/cudnn_is_acceptable.h>
|
||||
#include <ATen/ops/dropout.h>
|
||||
#include <ATen/ops/fbgemm_linear_int8_weight_fp32_activation.h>
|
||||
#include <ATen/ops/fbgemm_linear_quantize_weight_native.h>
|
||||
#include <ATen/ops/fbgemm_pack_quantized_matrix_native.h>
|
||||
#include <ATen/ops/gru_cell_native.h>
|
||||
#include <ATen/ops/gru_native.h>
|
||||
#include <ATen/ops/linear.h>
|
||||
#include <ATen/ops/lstm_cell_native.h>
|
||||
#include <ATen/ops/lstm_native.h>
|
||||
#include <ATen/ops/matmul.h>
|
||||
#include <ATen/ops/quantized_gru_cell_native.h>
|
||||
#include <ATen/ops/quantized_lstm_cell_native.h>
|
||||
#include <ATen/ops/quantized_rnn_relu_cell_native.h>
|
||||
#include <ATen/ops/quantized_rnn_tanh_cell_native.h>
|
||||
#include <ATen/ops/relu.h>
|
||||
#include <ATen/ops/rnn_relu_cell_native.h>
|
||||
#include <ATen/ops/rnn_relu_native.h>
|
||||
|
|
@ -214,158 +207,6 @@ struct CellParams : public CellParamsBase {
|
|||
}
|
||||
};
|
||||
|
||||
c10::intrusive_ptr<CellParamsBase> make_quantized_cell_params(
|
||||
const at::Tensor& w_ih,
|
||||
const at::Tensor& w_hh,
|
||||
at::Tensor bias_ih,
|
||||
at::Tensor bias_hh);
|
||||
|
||||
struct QuantizedCellParams : public CellParamsBase {
|
||||
QuantizedCellParams(
|
||||
Tensor _w_ih,
|
||||
Tensor _w_hh,
|
||||
Tensor _b_ih,
|
||||
Tensor _b_hh,
|
||||
Tensor _packed_ih,
|
||||
Tensor _packed_hh,
|
||||
Tensor _col_offsets_ih,
|
||||
Tensor _col_offsets_hh,
|
||||
Scalar _scale_ih,
|
||||
Scalar _scale_hh,
|
||||
Scalar _zero_point_ih,
|
||||
Scalar _zero_point_hh)
|
||||
: w_ih(std::move(_w_ih)),
|
||||
w_hh(std::move(_w_hh)),
|
||||
b_ih_(std::move(_b_ih)),
|
||||
b_hh_(std::move(_b_hh)),
|
||||
packed_ih(std::move(_packed_ih)),
|
||||
packed_hh(std::move(_packed_hh)),
|
||||
col_offsets_ih(std::move(_col_offsets_ih)),
|
||||
col_offsets_hh(std::move(_col_offsets_hh)),
|
||||
scale_ih(std::move(_scale_ih)),
|
||||
scale_hh(std::move(_scale_hh)),
|
||||
zero_point_ih(std::move(_zero_point_ih)),
|
||||
zero_point_hh(std::move(_zero_point_hh)) {}
|
||||
|
||||
const Tensor w_ih;
|
||||
const Tensor w_hh;
|
||||
const Tensor b_ih_;
|
||||
const Tensor b_hh_;
|
||||
const Tensor packed_ih;
|
||||
const Tensor packed_hh;
|
||||
const Tensor col_offsets_ih;
|
||||
const Tensor col_offsets_hh;
|
||||
const Scalar scale_ih;
|
||||
const Scalar scale_hh;
|
||||
const Scalar zero_point_ih;
|
||||
const Scalar zero_point_hh;
|
||||
|
||||
Tensor matmul_ih(const Tensor& input) const override {
|
||||
TORCH_CHECK(false, "matmul is not supported with quantized cell params");
|
||||
}
|
||||
Tensor matmul_hh(const Tensor& h) const override {
|
||||
TORCH_CHECK(false, "matmul is not supported with quantized cell params");
|
||||
}
|
||||
Tensor linear_ih(const Tensor& input) const override {
|
||||
return at::fbgemm_linear_int8_weight_fp32_activation(
|
||||
input, w_ih, packed_ih, col_offsets_ih, scale_ih, zero_point_ih, b_ih_);
|
||||
}
|
||||
Tensor linear_hh(const Tensor& h) const override {
|
||||
return at::fbgemm_linear_int8_weight_fp32_activation(
|
||||
h, w_hh, packed_hh, col_offsets_hh, scale_hh, zero_point_hh, b_hh_);
|
||||
}
|
||||
const Tensor& b_ih() const override {
|
||||
return b_ih_;
|
||||
}
|
||||
const Tensor& b_hh() const override {
|
||||
return b_hh_;
|
||||
}
|
||||
CellParamsSerializationType __getstate__() const override {
|
||||
std::vector<at::Tensor> tensors_to_serialize = {
|
||||
w_ih, w_hh, b_ih_, b_hh_, col_offsets_ih, col_offsets_hh};
|
||||
std::vector<double> doubles_to_serialize = {scale_ih.toDouble(),
|
||||
scale_hh.toDouble()};
|
||||
std::vector<int64_t> longs_to_serialize = {zero_point_ih.toLong(),
|
||||
zero_point_hh.toLong()};
|
||||
return CellParamsSerializationType(
|
||||
"quantized",
|
||||
std::move(tensors_to_serialize),
|
||||
std::move(doubles_to_serialize),
|
||||
std::move(longs_to_serialize),
|
||||
{});
|
||||
}
|
||||
static c10::intrusive_ptr<CellParamsBase> __setstate__(
|
||||
CellParamsSerializationType state) {
|
||||
std::vector<at::Tensor> tensors;
|
||||
std::vector<double> doubles;
|
||||
std::vector<int64_t> longs;
|
||||
std::tie(std::ignore, tensors, doubles, longs, std::ignore) =
|
||||
std::move(state);
|
||||
TORCH_INTERNAL_ASSERT(tensors.size() == 6);
|
||||
TORCH_INTERNAL_ASSERT(doubles.size() == 2);
|
||||
TORCH_INTERNAL_ASSERT(longs.size() == 2);
|
||||
|
||||
at::Tensor qw_ih = std::move(tensors[0]), qw_hh = std::move(tensors[1]),
|
||||
b_ih = std::move(tensors[2]), b_hh = std::move(tensors[3]),
|
||||
col_offsets_ih = std::move(tensors[4]),
|
||||
col_offsets_hh = std::move(tensors[5]);
|
||||
double scale_ih = doubles[0], scale_hh = doubles[1];
|
||||
int64_t zero_point_ih = longs[0], zero_point_hh = longs[1];
|
||||
|
||||
at::Tensor packed_ih = at::native::fbgemm_pack_quantized_matrix(qw_ih);
|
||||
at::Tensor packed_hh = at::native::fbgemm_pack_quantized_matrix(qw_hh);
|
||||
|
||||
return c10::make_intrusive<QuantizedCellParams>(
|
||||
/*w_ih=*/std::move(qw_ih),
|
||||
/*w_hh=*/std::move(qw_hh),
|
||||
/*b_ih_=*/std::move(b_ih),
|
||||
/*b_hh_=*/std::move(b_hh),
|
||||
/*packed_ih=*/std::move(packed_ih),
|
||||
/*packed_hh=*/std::move(packed_hh),
|
||||
/*col_offsets_ih=*/std::move(col_offsets_ih),
|
||||
/*col_offsets_hh=*/std::move(col_offsets_hh),
|
||||
/*scale_ih=*/scale_ih,
|
||||
/*scale_hh=*/scale_hh,
|
||||
/*zero_point_ih=*/zero_point_ih,
|
||||
/*zero_point_hh=*/zero_point_hh);
|
||||
}
|
||||
};
|
||||
|
||||
c10::intrusive_ptr<CellParamsBase> make_quantized_cell_params(
|
||||
const at::Tensor& w_ih,
|
||||
const at::Tensor& w_hh,
|
||||
at::Tensor b_ih,
|
||||
at::Tensor b_hh) {
|
||||
auto make_vals = [&](const at::Tensor& W) {
|
||||
auto params = at::native::fbgemm_linear_quantize_weight(W);
|
||||
at::Tensor packed_weight =
|
||||
at::native::fbgemm_pack_quantized_matrix(std::get<0>(params));
|
||||
return std::tuple_cat(
|
||||
std::make_tuple(std::move(packed_weight)), std::move(params));
|
||||
};
|
||||
|
||||
at::Tensor qw_ih, qw_hh, packed_ih, packed_hh, col_offsets_ih, col_offsets_hh;
|
||||
at::Scalar scale_ih, scale_hh, zero_point_ih, zero_point_hh;
|
||||
|
||||
std::tie(packed_ih, qw_ih, col_offsets_ih, scale_ih, zero_point_ih) =
|
||||
make_vals(w_ih);
|
||||
std::tie(packed_hh, qw_hh, col_offsets_hh, scale_hh, zero_point_hh) =
|
||||
make_vals(w_hh);
|
||||
|
||||
return c10::make_intrusive<QuantizedCellParams>(
|
||||
/*qw_ih=*/std::move(qw_ih),
|
||||
/*qw_hh=*/std::move(qw_hh),
|
||||
/*b_ih=*/std::move(b_ih),
|
||||
/*b_hh=*/std::move(b_hh),
|
||||
/*packed_ih=*/std::move(packed_ih),
|
||||
/*packed_hh=*/std::move(packed_hh),
|
||||
/*col_offsets_ih=*/std::move(col_offsets_ih),
|
||||
/*col_offsets_hh=*/std::move(col_offsets_hh),
|
||||
/*scale_ih=*/std::move(scale_ih),
|
||||
/*scale_hh=*/std::move(scale_hh),
|
||||
/*zero_point_ih=*/std::move(zero_point_ih),
|
||||
/*zero_point_hh=*/std::move(zero_point_hh));
|
||||
}
|
||||
|
||||
// QuantizedCellParams vs. QuantizedCellParamsDynamic
|
||||
//
|
||||
|
|
@ -542,7 +383,6 @@ static std::unordered_map<
|
|||
std::string,
|
||||
c10::intrusive_ptr<CellParamsBase> (*)(CellParamsSerializationType)>
|
||||
cell_params_deserializers = {
|
||||
{"quantized", &QuantizedCellParams::__setstate__},
|
||||
{"quantized_dynamic", &QuantizedCellParamsDynamic::__setstate__},
|
||||
{"quantized_fp16", &QuantizedCellParamsFP16::__setstate__}};
|
||||
|
||||
|
|
@ -1834,38 +1674,6 @@ static std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
|
|||
"using the newer definitions in torch.jit.quantized");
|
||||
}
|
||||
|
||||
#define DEFINE_QUANTIZED_RNN_CELL(name, hx_type, cell_type, return_type, prepare_hx_fn) \
|
||||
return_type name( \
|
||||
const Tensor& input, \
|
||||
hx_type hx, \
|
||||
const Tensor& w_ih, \
|
||||
const Tensor& w_hh, \
|
||||
const Tensor& b_ih, \
|
||||
const Tensor& b_hh, \
|
||||
const Tensor& packed_ih, \
|
||||
const Tensor& packed_hh, \
|
||||
const Tensor& col_offsets_ih, \
|
||||
const Tensor& col_offsets_hh, \
|
||||
const Scalar& scale_ih, \
|
||||
const Scalar& scale_hh, \
|
||||
const Scalar& zero_point_ih, \
|
||||
const Scalar& zero_point_hh) { \
|
||||
QuantizedCellParams params( \
|
||||
w_ih, \
|
||||
w_hh, \
|
||||
b_ih, \
|
||||
b_hh, \
|
||||
packed_ih, \
|
||||
packed_hh, \
|
||||
col_offsets_ih, \
|
||||
col_offsets_hh, \
|
||||
scale_ih, \
|
||||
scale_hh, \
|
||||
zero_point_ih, \
|
||||
zero_point_hh); \
|
||||
return cell_type{}( \
|
||||
input, prepare_hx_fn(hx), params); \
|
||||
}
|
||||
// Set reduced range to be True for all RNN Cells by default. This flag is used only for FBGEMM kernels
|
||||
// QNNPACK does not reduce range for activations
|
||||
#define DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(name, hx_type, cell_type, return_type, prepare_hx_fn) \
|
||||
|
|
@ -1888,7 +1696,6 @@ return_type name( \
|
|||
}
|
||||
|
||||
// Quantized LSTM cell
|
||||
using quantized_lstm_cell_type = LSTMCell<QuantizedCellParams>;
|
||||
using quantized_lstm_return_type = std::tuple<Tensor, Tensor>;
|
||||
static std::tuple<Tensor, Tensor> prepare_quantized_lstm_hx(TensorList hx) {
|
||||
return std::make_tuple(hx[0], hx[1]);
|
||||
|
|
@ -1897,7 +1704,6 @@ static std::tuple<Tensor, Tensor> prepare_quantized_lstm_hx(TensorList hx) {
|
|||
// Quantized LSTM cell
|
||||
using quantized_lstm_cell_dynamic_type = LSTMCell<QuantizedCellParamsDynamic>;
|
||||
|
||||
DEFINE_QUANTIZED_RNN_CELL(quantized_lstm_cell, TensorList, quantized_lstm_cell_type, quantized_lstm_return_type, prepare_quantized_lstm_hx);
|
||||
|
||||
static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_lstm_cell_dynamic, TensorList, quantized_lstm_cell_dynamic_type, quantized_lstm_return_type, prepare_quantized_lstm_hx);
|
||||
|
||||
|
|
@ -1908,22 +1714,15 @@ static simple_hx_type prepare_quantized_hx(simple_hx_type hx) {
|
|||
}
|
||||
|
||||
// Quantized GRU cell
|
||||
using quantized_gru_cell_type = GRUCell<QuantizedCellParams>;
|
||||
using quantized_gru_cell_dynamic_type = GRUCell<QuantizedCellParamsDynamic>;
|
||||
|
||||
DEFINE_QUANTIZED_RNN_CELL(quantized_gru_cell, simple_hx_type, quantized_gru_cell_type, Tensor, prepare_quantized_hx);
|
||||
|
||||
static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_gru_cell_dynamic, simple_hx_type, quantized_gru_cell_dynamic_type, Tensor, prepare_quantized_hx);
|
||||
|
||||
// Quantized RNN w/ ReLU cell
|
||||
using quantized_rnn_relu_cell_type = SimpleCell<relu_f, QuantizedCellParams>;
|
||||
DEFINE_QUANTIZED_RNN_CELL(quantized_rnn_relu_cell, simple_hx_type, quantized_rnn_relu_cell_type, Tensor, prepare_quantized_hx);
|
||||
using quantized_rnn_relu_cell_dynamic_type = SimpleCell<relu_f, QuantizedCellParamsDynamic>;
|
||||
static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_relu_cell_dynamic, simple_hx_type, quantized_rnn_relu_cell_dynamic_type, Tensor, prepare_quantized_hx);
|
||||
|
||||
// Quantized RNN w/ tanh cell
|
||||
using quantized_rnn_tanh_cell_type = SimpleCell<tanh_f, QuantizedCellParams>;
|
||||
DEFINE_QUANTIZED_RNN_CELL(quantized_rnn_tanh_cell, simple_hx_type, quantized_rnn_tanh_cell_type, Tensor, prepare_quantized_hx);
|
||||
using quantized_rnn_tanh_cell_dynamic_type = SimpleCell<tanh_f, QuantizedCellParamsDynamic>;
|
||||
static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_tanh_cell_dynamic, simple_hx_type, quantized_rnn_tanh_cell_dynamic_type, Tensor, prepare_quantized_hx);
|
||||
|
||||
|
|
@ -1965,7 +1764,6 @@ TORCH_LIBRARY_FRAGMENT(aten, m) {
|
|||
TORCH_LIBRARY_FRAGMENT(quantized, m) {
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("quantized::make_quantized_cell_params_dynamic(__torch__.torch.classes.quantized.LinearPackedParamsBase w_ih, __torch__.torch.classes.quantized.LinearPackedParamsBase w_hh, Tensor bias_ih, Tensor bias_hh, bool reduce_range=False) -> __torch__.torch.classes.rnn.CellParamsBase"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("quantized::make_quantized_cell_params_fp16(__torch__.torch.classes.quantized.LinearPackedParamsBase w_ih, __torch__.torch.classes.quantized.LinearPackedParamsBase w_hh) -> __torch__.torch.classes.rnn.CellParamsBase"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("quantized::make_quantized_cell_params(Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh) -> __torch__.torch.classes.rnn.CellParamsBase"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("quantized::quantized_lstm_cell_dynamic(Tensor input, Tensor[] hx, __torch__.torch.classes.quantized.LinearPackedParamsBase w_ih, __torch__.torch.classes.quantized.LinearPackedParamsBase w_hh, Tensor bias_ih, Tensor bias_hh) -> (Tensor, Tensor)"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("quantized::quantized_gru_cell_dynamic(Tensor input, Tensor hx, __torch__.torch.classes.quantized.LinearPackedParamsBase w_ih, __torch__.torch.classes.quantized.LinearPackedParamsBase w_hh, Tensor b_ih, Tensor b_hh) -> Tensor"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("quantized::quantized_rnn_relu_cell_dynamic(Tensor input, Tensor hx, __torch__.torch.classes.quantized.LinearPackedParamsBase w_ih, __torch__.torch.classes.quantized.LinearPackedParamsBase w_hh, Tensor b_ih, Tensor b_hh) -> Tensor"));
|
||||
|
|
@ -1985,7 +1783,6 @@ TORCH_LIBRARY_IMPL(aten, CPU, m) {
|
|||
|
||||
TORCH_LIBRARY_IMPL(quantized, CPU, m) {
|
||||
m.impl(TORCH_SELECTIVE_NAME("quantized::make_quantized_cell_params_dynamic"), TORCH_FN(make_quantized_cell_params_dynamic));
|
||||
m.impl(TORCH_SELECTIVE_NAME("quantized::make_quantized_cell_params"), TORCH_FN(make_quantized_cell_params));
|
||||
m.impl(TORCH_SELECTIVE_NAME("quantized::quantized_lstm_cell_dynamic"), TORCH_FN(quantized_lstm_cell_dynamic));
|
||||
m.impl(TORCH_SELECTIVE_NAME("quantized::quantized_gru_cell_dynamic"), TORCH_FN(quantized_gru_cell_dynamic));
|
||||
m.impl(TORCH_SELECTIVE_NAME("quantized::quantized_rnn_relu_cell_dynamic"), TORCH_FN(quantized_rnn_relu_cell_dynamic));
|
||||
|
|
|
|||
|
|
@ -3343,12 +3343,6 @@
|
|||
|
||||
- func: fbgemm_linear_quantize_weight(Tensor input) -> (Tensor, Tensor, float, int)
|
||||
|
||||
- func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
|
||||
|
||||
- func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
|
||||
|
||||
- func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
|
||||
|
||||
- func: fbgemm_pack_quantized_matrix(Tensor input) -> Tensor
|
||||
|
||||
- func: fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor
|
||||
|
|
@ -7686,15 +7680,6 @@
|
|||
# - func: quantized_gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
||||
#
|
||||
|
||||
# Quantized RNN cells
|
||||
- func: quantized_lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> (Tensor, Tensor)
|
||||
|
||||
- func: quantized_gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor
|
||||
|
||||
- func: quantized_rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor
|
||||
|
||||
- func: quantized_rnn_tanh_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor
|
||||
|
||||
# PackedSequence utilities
|
||||
- func: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)
|
||||
dispatch:
|
||||
|
|
|
|||
|
|
@ -142,6 +142,14 @@ ALLOW_LIST = [
|
|||
("onednn::qconv3d_pointwise", datetime.date(2023, 12, 31)),
|
||||
("onednn::qconv2d_pointwise.binary", datetime.date(2023, 12, 31)),
|
||||
("onednn::qlinear_pointwise", datetime.date(2023, 12, 31)),
|
||||
("aten::fbgemm_pack_gemm_matrix_fp16", datetime.date(2024, 12, 31)),
|
||||
("aten::fbgemm_linear_fp16_weight_fp32_activation", datetime.date(2024, 12, 31)),
|
||||
("aten::fbgemm_linear_fp16_weight", datetime.date(2024, 12, 31)),
|
||||
("aten::quantized_lstm_cell", datetime.date(2024, 12, 31)),
|
||||
("aten::quantized_gru_cell", datetime.date(2024, 12, 31)),
|
||||
("aten::quantized_rnn_relu_cell", datetime.date(2024, 12, 31)),
|
||||
("aten::quantized_rnn_tanh_cell", datetime.date(2024, 12, 31)),
|
||||
("quantized::make_quantized_cell_params", datetime.date(2024, 12, 31)),
|
||||
]
|
||||
|
||||
ALLOW_LIST_COMPILED = [
|
||||
|
|
|
|||
|
|
@ -2114,25 +2114,6 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
|
|||
res_bf16 = F.threshold(x.to(dtype=dtype), threshold, 0).float()
|
||||
self.assertEqual(res_bf16, expected)
|
||||
|
||||
@unittest.skipUnless('fbgemm' in torch.backends.quantized.supported_engines,
|
||||
'Linear_FP16_weight requires FBGEMM. FBGEMM is only optimized for CPUs'
|
||||
' with instruction set support avx2 or newer.')
|
||||
def test_fb_fc_packed(self):
|
||||
X = np.random.rand(16, 16).astype(np.float32) - 0.5
|
||||
W = np.random.rand(16, 16).astype(np.float32) - 0.5
|
||||
b = np.random.rand(16).astype(np.float32) - 0.5
|
||||
|
||||
def fc_op(X, W, b):
|
||||
return np.dot(X, W.T) + b
|
||||
|
||||
x_tensor = torch.tensor(X)
|
||||
w_tensor = torch.tensor(W)
|
||||
b_tensor = torch.tensor(b)
|
||||
packed_w_tensor = torch.fbgemm_pack_gemm_matrix_fp16(w_tensor)
|
||||
actual_output = torch.fbgemm_linear_fp16_weight(x_tensor, packed_w_tensor, b_tensor)
|
||||
expected_output = fc_op(X, W, b)
|
||||
torch.testing.assert_close(torch.from_numpy(expected_output), actual_output.cpu(), atol=1e-3, rtol=1e-3)
|
||||
|
||||
def test_pad_scalar_error(self):
|
||||
inputs = torch.tensor(0., requires_grad=True)
|
||||
self.assertRaises(RuntimeError, lambda: F.pad(inputs, (1, 1)))
|
||||
|
|
|
|||
|
|
@ -584,13 +584,10 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
|
|||
torch.fused_moving_avg_obs_fake_quant: (lambda x, observer_on, fake_quant_on, averaging_const, running_min,
|
||||
running_max, scale, zero_point, quant_min, quant_max, ch_axis,
|
||||
per_row_fake_quant=False, symmetric_quant=False: -1),
|
||||
torch.fbgemm_linear_fp16_weight: lambda input, packed_weight, bias: -1,
|
||||
torch.fbgemm_linear_fp16_weight_fp32_activation: lambda input, packed_weight, bias: -1,
|
||||
torch.fbgemm_linear_int8_weight: lambda input, weight, packed, col_offsets, weight_scale, weight_zero_point, bias: -1,
|
||||
torch.fbgemm_linear_int8_weight_fp32_activation: (lambda input, weight, packed, col_offsets, weight_scale,
|
||||
weight_zero_point, bias: -1),
|
||||
torch.fbgemm_linear_quantize_weight: lambda input: -1,
|
||||
torch.fbgemm_pack_gemm_matrix_fp16: lambda input: -1,
|
||||
torch.fbgemm_pack_quantized_matrix: lambda input, a, b: -1,
|
||||
torch.feature_alpha_dropout: lambda input, p, train: -1,
|
||||
torch.feature_dropout: lambda input, p, train: -1,
|
||||
|
|
@ -977,21 +974,12 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
|
|||
torch.quantize_per_tensor: lambda input, scale, zero_point, dtype: -1,
|
||||
torch.quantize_per_tensor_dynamic: lambda input, dtype, reduce_range: -1,
|
||||
torch.quantized_batch_norm: lambda input, weight, bias, mean, var, eps, output_scale, output_zero_point: -1,
|
||||
torch.quantized_gru_cell: (lambda input, hx, w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih,
|
||||
col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh: -1),
|
||||
|
||||
torch.quantized_lstm_cell: (lambda input, hx, w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih,
|
||||
col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh: -1),
|
||||
torch.quantized_max_pool1d: (lambda input, kernel_size, stride=tuple(), padding=(0,),
|
||||
dilation=(1,), ceil_mode=False: -1),
|
||||
torch.quantized_max_pool2d: (lambda input, kernel_size, stride=tuple(), padding=(0, 0),
|
||||
dilation=(1, 1), ceil_mode=False: -1),
|
||||
torch.quantized_max_pool3d: (lambda input, kernel_size, stride=tuple(), padding=(0, 0, 0),
|
||||
dilation=(1, 1, 1), ceil_mode=False: -1),
|
||||
torch.quantized_rnn_relu_cell: (lambda input, hx, w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih,
|
||||
col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh: -1),
|
||||
torch.quantized_rnn_tanh_cell: (lambda input, hx, w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih,
|
||||
col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh: -1),
|
||||
torch.rad2deg: lambda input, out=None: -1,
|
||||
torch.rand_like: lambda input, dtype=None, layout=None, device=None, requires_grad=False: -1,
|
||||
torch.randint_like: lambda input, high, dtype=None, layout=torch.strided, device=None, requires_grad=False: -1,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user