mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Change wrapped_linear_prepack and wrapped_quantized_linear_prepacked to private by adding _ as prefix (#135401)
Summary: In https://github.com/pytorch/pytorch/pull/134232, we added two new ops wrapped_linear_prepack and wrapped_quantized_linear_prepacked. From the review comments and offline discussion, we are changing them to private by adding `_` as prefix Differential Revision: D62325142 Pull Request resolved: https://github.com/pytorch/pytorch/pull/135401 Approved by: https://github.com/houseroad
This commit is contained in:
parent
8334cb2fb9
commit
fd494dd426
|
|
@ -3400,9 +3400,9 @@
|
|||
|
||||
- func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
|
||||
|
||||
- func: wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
|
||||
- func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
|
||||
|
||||
- func: wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
|
||||
- func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
|
||||
|
||||
- func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
|
||||
|
||||
|
|
|
|||
|
|
@ -436,12 +436,12 @@ at::Tensor wrapped_quantized_linear_meta(
|
|||
#endif // USE_FBGEMM
|
||||
}
|
||||
|
||||
at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
|
||||
at::Tensor _wrapped_linear_prepack(const at::Tensor& weight,
|
||||
const at::Tensor& weight_scale,
|
||||
const at::Tensor& weight_zero_point,
|
||||
const at::Tensor& bias);
|
||||
|
||||
at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
|
||||
at::Tensor _wrapped_linear_prepack(const at::Tensor& weight,
|
||||
const at::Tensor& weight_scale,
|
||||
const at::Tensor& weight_zero_point,
|
||||
const at::Tensor& bias) {
|
||||
|
|
@ -474,14 +474,14 @@ at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
|
|||
#endif // USE_FBGEMM
|
||||
}
|
||||
|
||||
at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
|
||||
at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
|
||||
const at::Tensor& input_zero_point,
|
||||
const at::Tensor& packed_weight,
|
||||
const at::Tensor& output_scale,
|
||||
const at::Tensor& output_zero_point,
|
||||
[[maybe_unused]] const int64_t out_channel);
|
||||
|
||||
at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
|
||||
at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
|
||||
const at::Tensor& input_zero_point,
|
||||
const at::Tensor& packed_weight,
|
||||
const at::Tensor& output_scale,
|
||||
|
|
@ -507,12 +507,12 @@ at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at:
|
|||
#endif // USE_FBGEMM
|
||||
}
|
||||
|
||||
at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
|
||||
at::Tensor _wrapped_linear_prepack_meta(const at::Tensor& weight,
|
||||
[[maybe_unused]] const at::Tensor& weight_scale,
|
||||
[[maybe_unused]] const at::Tensor& weight_zero_point,
|
||||
[[maybe_unused]] const at::Tensor& bias);
|
||||
|
||||
at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
|
||||
at::Tensor _wrapped_linear_prepack_meta(const at::Tensor& weight,
|
||||
[[maybe_unused]] const at::Tensor& weight_scale,
|
||||
[[maybe_unused]] const at::Tensor& weight_zero_point,
|
||||
[[maybe_unused]] const at::Tensor& bias) {
|
||||
|
|
@ -530,7 +530,7 @@ at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
|
|||
#endif // USE_FBGEMM
|
||||
}
|
||||
|
||||
at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
|
||||
at::Tensor _wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
|
||||
[[maybe_unused]] const at::Tensor& input_scale,
|
||||
[[maybe_unused]] const at::Tensor& input_zero_point,
|
||||
[[maybe_unused]] const at::Tensor& packed_weight,
|
||||
|
|
@ -538,7 +538,7 @@ at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
|
|||
[[maybe_unused]] const at::Tensor& output_zero_point,
|
||||
const int64_t out_channel);
|
||||
|
||||
at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
|
||||
at::Tensor _wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
|
||||
[[maybe_unused]] const at::Tensor& input_scale,
|
||||
[[maybe_unused]] const at::Tensor& input_zero_point,
|
||||
[[maybe_unused]] const at::Tensor& packed_weight,
|
||||
|
|
@ -695,21 +695,21 @@ TORCH_LIBRARY_IMPL(_quantized, CPU, m) {
|
|||
m.impl(TORCH_SELECTIVE_NAME("_quantized::linear_prepack_fp16_legacy"), TORCH_FN(QLinearPackWeightFp16Legacy::run));
|
||||
m.impl(TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear"), TORCH_FN(wrapped_quantized_linear));
|
||||
m.impl(
|
||||
TORCH_SELECTIVE_NAME("_quantized::wrapped_linear_prepack"),
|
||||
wrapped_linear_prepack);
|
||||
TORCH_SELECTIVE_NAME("_quantized::_wrapped_linear_prepack"),
|
||||
_wrapped_linear_prepack);
|
||||
m.impl(
|
||||
TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear_prepacked"),
|
||||
wrapped_quantized_linear_prepacked);
|
||||
TORCH_SELECTIVE_NAME("_quantized::_wrapped_quantized_linear_prepacked"),
|
||||
_wrapped_quantized_linear_prepacked);
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_quantized, Meta, m) {
|
||||
m.impl(TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear"), TORCH_FN(wrapped_quantized_linear_meta));
|
||||
m.impl(
|
||||
TORCH_SELECTIVE_NAME("_quantized::wrapped_linear_prepack"),
|
||||
wrapped_linear_prepack_meta);
|
||||
TORCH_SELECTIVE_NAME("_quantized::_wrapped_linear_prepack"),
|
||||
_wrapped_linear_prepack_meta);
|
||||
m.impl(
|
||||
TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear_prepacked"),
|
||||
wrapped_quantized_linear_prepacked_meta);
|
||||
TORCH_SELECTIVE_NAME("_quantized::_wrapped_quantized_linear_prepacked"),
|
||||
_wrapped_quantized_linear_prepacked_meta);
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(onednn, CPU, m) {
|
||||
|
|
|
|||
|
|
@ -251,8 +251,8 @@ TORCH_LIBRARY(_quantized, m) {
|
|||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_fbgemm_pack_gemm_matrix_fp16(Tensor W) -> Tensor"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_fbgemm_linear_fp16_weight(Tensor X, Tensor W, Tensor B, int out_channel) -> Tensor"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_quantized_linear(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_linear_prepack(Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B) -> Tensor"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_quantized_linear_prepacked(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W_prepack, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::_wrapped_linear_prepack(Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B) -> Tensor"));
|
||||
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::_wrapped_quantized_linear_prepacked(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W_prepack, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
|
||||
}
|
||||
|
||||
TORCH_LIBRARY(onednn, m) {
|
||||
|
|
|
|||
|
|
@ -145,6 +145,11 @@ ALLOW_LIST = [
|
|||
("onednn::qlinear_pointwise.binary_tensor", datetime.date(2024, 12, 31)),
|
||||
("aten::_scaled_mm.out", datetime.date(2024, 12, 31)),
|
||||
("aten::_scaled_mm", datetime.date(2024, 12, 31)),
|
||||
("aten::wrapped_quantized_linear_prepacked", datetime.date(2024, 12, 31)),
|
||||
("aten::wrapped_linear_prepack", datetime.date(2024, 12, 31)),
|
||||
("_quantized::wrapped_linear_prepack", datetime.date(2024, 12, 31)),
|
||||
("_quantized::wrapped_linear_prepacked", datetime.date(2024, 12, 31)),
|
||||
("_quantized::wrapped_quantized_linear_prepacked", datetime.date(2024, 12, 31)),
|
||||
# BC-breaking change in can_cast signature: 'from' -> 'from_'
|
||||
("aten::can_cast", datetime.date(2024, 5, 31)),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -4223,8 +4223,8 @@ class TestQuantizedLinear(TestCase):
|
|||
ret_ref = qlinear.dequantize()
|
||||
self.assertEqual(ret, ret_ref)
|
||||
|
||||
"""Tests the correctness of the _quantized::wrapped_linear_prepack and
|
||||
_quantized::wrapped_quantized_linear_prepacked ops."""
|
||||
"""Tests the correctness of the _quantized::_wrapped_linear_prepack and
|
||||
_quantized::_wrapped_quantized_linear_prepacked ops."""
|
||||
@skipIfNoFBGEMM
|
||||
@given(
|
||||
m=st.integers(2, 6),
|
||||
|
|
@ -4243,13 +4243,13 @@ class TestQuantizedLinear(TestCase):
|
|||
output_zero_point = torch.tensor(0)
|
||||
out_channel = n
|
||||
|
||||
ret_1 = torch.ops._quantized.wrapped_linear_prepack(
|
||||
ret_1 = torch.ops._quantized._wrapped_linear_prepack(
|
||||
weight,
|
||||
weight_scale,
|
||||
weight_zero_point,
|
||||
bias
|
||||
)
|
||||
ret_2 = torch.ops._quantized.wrapped_quantized_linear_prepacked(
|
||||
ret_2 = torch.ops._quantized._wrapped_quantized_linear_prepacked(
|
||||
input,
|
||||
input_scale,
|
||||
input_zero_point,
|
||||
|
|
|
|||
|
|
@ -651,10 +651,10 @@ def wrapped_quantized_linear(
|
|||
out_zero_point: torch.Tensor,
|
||||
out_channel: int,
|
||||
) -> torch.Tensor:
|
||||
packed_weight = torch.ops._quantized.wrapped_linear_prepack(
|
||||
packed_weight = torch.ops._quantized._wrapped_linear_prepack(
|
||||
weight, weight_scale, weight_zero_point, bias
|
||||
)
|
||||
return torch.ops._quantized.wrapped_quantized_linear_prepacked(
|
||||
return torch.ops._quantized._wrapped_quantized_linear_prepacked(
|
||||
input,
|
||||
input_scale,
|
||||
input_zero_point,
|
||||
|
|
|
|||
|
|
@ -493,7 +493,7 @@ aoti_torch_cpu_wrapped_fbgemm_pack_gemm_matrix_fp16(
|
|||
|
||||
// This will soon be deprecated after ao_quantization is complete.
|
||||
// Please refrain from using this or increasing callsites.
|
||||
AOTI_TORCH_EXPORT AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
|
||||
AOTI_TORCH_EXPORT AOTITorchError aoti_torch_cpu__wrapped_linear_prepack(
|
||||
AtenTensorHandle weight,
|
||||
AtenTensorHandle weight_scale,
|
||||
AtenTensorHandle weight_zero_point,
|
||||
|
|
@ -513,7 +513,7 @@ aoti_torch_cpu_wrapped_fbgemm_linear_fp16_weight(
|
|||
// This will soon be deprecated after ao_quantization is complete.
|
||||
// Please refrain from using this or increasing callsites.
|
||||
AOTI_TORCH_EXPORT AOTITorchError
|
||||
aoti_torch_cpu_wrapped_quantized_linear_prepacked(
|
||||
aoti_torch_cpu__wrapped_quantized_linear_prepacked(
|
||||
AtenTensorHandle input,
|
||||
AtenTensorHandle input_scale,
|
||||
AtenTensorHandle input_zero_point,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@
|
|||
#include <ATen/ops/_scaled_dot_product_efficient_attention.h>
|
||||
#include <ATen/ops/_scaled_dot_product_flash_attention.h>
|
||||
#include <ATen/ops/_scaled_mm.h>
|
||||
#include <ATen/ops/_wrapped_linear_prepack.h>
|
||||
#include <ATen/ops/_wrapped_quantized_linear_prepacked.h>
|
||||
#include <ATen/ops/addmm.h>
|
||||
#include <ATen/ops/as_strided.h>
|
||||
#include <ATen/ops/bmm.h>
|
||||
|
|
@ -42,8 +44,6 @@
|
|||
#include <ATen/ops/scatter_reduce.h>
|
||||
#include <ATen/ops/view_as_real_ops.h>
|
||||
#include <ATen/ops/view_ops.h>
|
||||
#include <ATen/ops/wrapped_linear_prepack.h>
|
||||
#include <ATen/ops/wrapped_quantized_linear_prepacked.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -814,7 +814,7 @@ AOTITorchError aoti_torch_cpu_wrapped_fbgemm_pack_gemm_matrix_fp16(
|
|||
});
|
||||
}
|
||||
|
||||
AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
|
||||
AOTITorchError aoti_torch_cpu__wrapped_linear_prepack(
|
||||
AtenTensorHandle weight,
|
||||
AtenTensorHandle weight_scale,
|
||||
AtenTensorHandle weight_zero_point,
|
||||
|
|
@ -828,7 +828,7 @@ AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
|
|||
tensor_handle_to_tensor_pointer(weight_zero_point);
|
||||
at::Tensor* bias_tensor = tensor_handle_to_tensor_pointer(bias);
|
||||
|
||||
*out = new_tensor_handle(at::wrapped_linear_prepack(
|
||||
*out = new_tensor_handle(at::_wrapped_linear_prepack(
|
||||
*weight_tensor,
|
||||
*weight_scale_tensor,
|
||||
*weight_zero_point_tensor,
|
||||
|
|
@ -852,7 +852,7 @@ AOTITorchError aoti_torch_cpu_wrapped_fbgemm_linear_fp16_weight(
|
|||
});
|
||||
}
|
||||
|
||||
AOTITorchError aoti_torch_cpu_wrapped_quantized_linear_prepacked(
|
||||
AOTITorchError aoti_torch_cpu__wrapped_quantized_linear_prepacked(
|
||||
AtenTensorHandle input,
|
||||
AtenTensorHandle input_scale,
|
||||
AtenTensorHandle input_zero_point,
|
||||
|
|
@ -871,7 +871,7 @@ AOTITorchError aoti_torch_cpu_wrapped_quantized_linear_prepacked(
|
|||
at::Tensor* out_scale_tensor = tensor_handle_to_tensor_pointer(out_scale);
|
||||
at::Tensor* out_zeropoint_tensor =
|
||||
tensor_handle_to_tensor_pointer(out_zeropoint);
|
||||
*out = new_tensor_handle(at::wrapped_quantized_linear_prepacked(
|
||||
*out = new_tensor_handle(at::_wrapped_quantized_linear_prepacked(
|
||||
*input_tensor,
|
||||
*input_scale_tensor,
|
||||
*input_zero_point_tensor,
|
||||
|
|
|
|||
|
|
@ -1252,8 +1252,8 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
|
|||
torch.vsplit: lambda input, indices_or_sections: -1,
|
||||
torch.vstack: lambda tensors, out=None: -1,
|
||||
torch.where: lambda condition, x=None, y=None: -1,
|
||||
torch.wrapped_linear_prepack: lambda weight, weight_scale, weight_zero_point, bias : -1,
|
||||
torch.wrapped_quantized_linear_prepacked: (
|
||||
torch._wrapped_linear_prepack: lambda weight, weight_scale, weight_zero_point, bias : -1,
|
||||
torch._wrapped_quantized_linear_prepacked: (
|
||||
lambda input, input_scale, input_zero_point, prepacked, out_scale, out_zero_point, out_channel : -1 # noqa: B950
|
||||
),
|
||||
torch.zeros_like: lambda input, dtype=None, layout=None, device=None, requires_grad=False: -1,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user