Change wrapped_linear_prepack and wrapped_quantized_linear_prepacked to private by adding _ as prefix (#135401)

Summary: In https://github.com/pytorch/pytorch/pull/134232, we added two new ops wrapped_linear_prepack and wrapped_quantized_linear_prepacked. From the review comments and offline discussion, we are changing them to private by adding `_` as prefix

Differential Revision: D62325142

Pull Request resolved: https://github.com/pytorch/pytorch/pull/135401
Approved by: https://github.com/houseroad
This commit is contained in:
Huamin Li 2024-09-08 04:16:24 +00:00 committed by PyTorch MergeBot
parent 8334cb2fb9
commit fd494dd426
9 changed files with 41 additions and 36 deletions

View File

@ -3400,9 +3400,9 @@
- func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
- func: wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
- func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
- func: wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
- func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
- func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor

View File

@ -436,12 +436,12 @@ at::Tensor wrapped_quantized_linear_meta(
#endif // USE_FBGEMM
}
at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
at::Tensor _wrapped_linear_prepack(const at::Tensor& weight,
const at::Tensor& weight_scale,
const at::Tensor& weight_zero_point,
const at::Tensor& bias);
at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
at::Tensor _wrapped_linear_prepack(const at::Tensor& weight,
const at::Tensor& weight_scale,
const at::Tensor& weight_zero_point,
const at::Tensor& bias) {
@ -474,14 +474,14 @@ at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
#endif // USE_FBGEMM
}
at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
const at::Tensor& input_zero_point,
const at::Tensor& packed_weight,
const at::Tensor& output_scale,
const at::Tensor& output_zero_point,
[[maybe_unused]] const int64_t out_channel);
at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
const at::Tensor& input_zero_point,
const at::Tensor& packed_weight,
const at::Tensor& output_scale,
@ -507,12 +507,12 @@ at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at:
#endif // USE_FBGEMM
}
at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
at::Tensor _wrapped_linear_prepack_meta(const at::Tensor& weight,
[[maybe_unused]] const at::Tensor& weight_scale,
[[maybe_unused]] const at::Tensor& weight_zero_point,
[[maybe_unused]] const at::Tensor& bias);
at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
at::Tensor _wrapped_linear_prepack_meta(const at::Tensor& weight,
[[maybe_unused]] const at::Tensor& weight_scale,
[[maybe_unused]] const at::Tensor& weight_zero_point,
[[maybe_unused]] const at::Tensor& bias) {
@ -530,7 +530,7 @@ at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
#endif // USE_FBGEMM
}
at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
at::Tensor _wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
[[maybe_unused]] const at::Tensor& input_scale,
[[maybe_unused]] const at::Tensor& input_zero_point,
[[maybe_unused]] const at::Tensor& packed_weight,
@ -538,7 +538,7 @@ at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
[[maybe_unused]] const at::Tensor& output_zero_point,
const int64_t out_channel);
at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
at::Tensor _wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
[[maybe_unused]] const at::Tensor& input_scale,
[[maybe_unused]] const at::Tensor& input_zero_point,
[[maybe_unused]] const at::Tensor& packed_weight,
@ -695,21 +695,21 @@ TORCH_LIBRARY_IMPL(_quantized, CPU, m) {
m.impl(TORCH_SELECTIVE_NAME("_quantized::linear_prepack_fp16_legacy"), TORCH_FN(QLinearPackWeightFp16Legacy::run));
m.impl(TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear"), TORCH_FN(wrapped_quantized_linear));
m.impl(
TORCH_SELECTIVE_NAME("_quantized::wrapped_linear_prepack"),
wrapped_linear_prepack);
TORCH_SELECTIVE_NAME("_quantized::_wrapped_linear_prepack"),
_wrapped_linear_prepack);
m.impl(
TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear_prepacked"),
wrapped_quantized_linear_prepacked);
TORCH_SELECTIVE_NAME("_quantized::_wrapped_quantized_linear_prepacked"),
_wrapped_quantized_linear_prepacked);
}
TORCH_LIBRARY_IMPL(_quantized, Meta, m) {
m.impl(TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear"), TORCH_FN(wrapped_quantized_linear_meta));
m.impl(
TORCH_SELECTIVE_NAME("_quantized::wrapped_linear_prepack"),
wrapped_linear_prepack_meta);
TORCH_SELECTIVE_NAME("_quantized::_wrapped_linear_prepack"),
_wrapped_linear_prepack_meta);
m.impl(
TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear_prepacked"),
wrapped_quantized_linear_prepacked_meta);
TORCH_SELECTIVE_NAME("_quantized::_wrapped_quantized_linear_prepacked"),
_wrapped_quantized_linear_prepacked_meta);
}
TORCH_LIBRARY_IMPL(onednn, CPU, m) {

View File

@ -251,8 +251,8 @@ TORCH_LIBRARY(_quantized, m) {
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_fbgemm_pack_gemm_matrix_fp16(Tensor W) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_fbgemm_linear_fp16_weight(Tensor X, Tensor W, Tensor B, int out_channel) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_quantized_linear(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_linear_prepack(Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_quantized_linear_prepacked(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W_prepack, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::_wrapped_linear_prepack(Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("_quantized::_wrapped_quantized_linear_prepacked(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W_prepack, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
}
TORCH_LIBRARY(onednn, m) {

View File

@ -145,6 +145,11 @@ ALLOW_LIST = [
("onednn::qlinear_pointwise.binary_tensor", datetime.date(2024, 12, 31)),
("aten::_scaled_mm.out", datetime.date(2024, 12, 31)),
("aten::_scaled_mm", datetime.date(2024, 12, 31)),
("aten::wrapped_quantized_linear_prepacked", datetime.date(2024, 12, 31)),
("aten::wrapped_linear_prepack", datetime.date(2024, 12, 31)),
("_quantized::wrapped_linear_prepack", datetime.date(2024, 12, 31)),
("_quantized::wrapped_linear_prepacked", datetime.date(2024, 12, 31)),
("_quantized::wrapped_quantized_linear_prepacked", datetime.date(2024, 12, 31)),
# BC-breaking change in can_cast signature: 'from' -> 'from_'
("aten::can_cast", datetime.date(2024, 5, 31)),
]

View File

@ -4223,8 +4223,8 @@ class TestQuantizedLinear(TestCase):
ret_ref = qlinear.dequantize()
self.assertEqual(ret, ret_ref)
"""Tests the correctness of the _quantized::wrapped_linear_prepack and
_quantized::wrapped_quantized_linear_prepacked ops."""
"""Tests the correctness of the _quantized::_wrapped_linear_prepack and
_quantized::_wrapped_quantized_linear_prepacked ops."""
@skipIfNoFBGEMM
@given(
m=st.integers(2, 6),
@ -4243,13 +4243,13 @@ class TestQuantizedLinear(TestCase):
output_zero_point = torch.tensor(0)
out_channel = n
ret_1 = torch.ops._quantized.wrapped_linear_prepack(
ret_1 = torch.ops._quantized._wrapped_linear_prepack(
weight,
weight_scale,
weight_zero_point,
bias
)
ret_2 = torch.ops._quantized.wrapped_quantized_linear_prepacked(
ret_2 = torch.ops._quantized._wrapped_quantized_linear_prepacked(
input,
input_scale,
input_zero_point,

View File

@ -651,10 +651,10 @@ def wrapped_quantized_linear(
out_zero_point: torch.Tensor,
out_channel: int,
) -> torch.Tensor:
packed_weight = torch.ops._quantized.wrapped_linear_prepack(
packed_weight = torch.ops._quantized._wrapped_linear_prepack(
weight, weight_scale, weight_zero_point, bias
)
return torch.ops._quantized.wrapped_quantized_linear_prepacked(
return torch.ops._quantized._wrapped_quantized_linear_prepacked(
input,
input_scale,
input_zero_point,

View File

@ -493,7 +493,7 @@ aoti_torch_cpu_wrapped_fbgemm_pack_gemm_matrix_fp16(
// This will soon be deprecated after ao_quantization is complete.
// Please refrain from using this or increasing callsites.
AOTI_TORCH_EXPORT AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
AOTI_TORCH_EXPORT AOTITorchError aoti_torch_cpu__wrapped_linear_prepack(
AtenTensorHandle weight,
AtenTensorHandle weight_scale,
AtenTensorHandle weight_zero_point,
@ -513,7 +513,7 @@ aoti_torch_cpu_wrapped_fbgemm_linear_fp16_weight(
// This will soon be deprecated after ao_quantization is complete.
// Please refrain from using this or increasing callsites.
AOTI_TORCH_EXPORT AOTITorchError
aoti_torch_cpu_wrapped_quantized_linear_prepacked(
aoti_torch_cpu__wrapped_quantized_linear_prepacked(
AtenTensorHandle input,
AtenTensorHandle input_scale,
AtenTensorHandle input_zero_point,

View File

@ -26,6 +26,8 @@
#include <ATen/ops/_scaled_dot_product_efficient_attention.h>
#include <ATen/ops/_scaled_dot_product_flash_attention.h>
#include <ATen/ops/_scaled_mm.h>
#include <ATen/ops/_wrapped_linear_prepack.h>
#include <ATen/ops/_wrapped_quantized_linear_prepacked.h>
#include <ATen/ops/addmm.h>
#include <ATen/ops/as_strided.h>
#include <ATen/ops/bmm.h>
@ -42,8 +44,6 @@
#include <ATen/ops/scatter_reduce.h>
#include <ATen/ops/view_as_real_ops.h>
#include <ATen/ops/view_ops.h>
#include <ATen/ops/wrapped_linear_prepack.h>
#include <ATen/ops/wrapped_quantized_linear_prepacked.h>
#endif
@ -814,7 +814,7 @@ AOTITorchError aoti_torch_cpu_wrapped_fbgemm_pack_gemm_matrix_fp16(
});
}
AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
AOTITorchError aoti_torch_cpu__wrapped_linear_prepack(
AtenTensorHandle weight,
AtenTensorHandle weight_scale,
AtenTensorHandle weight_zero_point,
@ -828,7 +828,7 @@ AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
tensor_handle_to_tensor_pointer(weight_zero_point);
at::Tensor* bias_tensor = tensor_handle_to_tensor_pointer(bias);
*out = new_tensor_handle(at::wrapped_linear_prepack(
*out = new_tensor_handle(at::_wrapped_linear_prepack(
*weight_tensor,
*weight_scale_tensor,
*weight_zero_point_tensor,
@ -852,7 +852,7 @@ AOTITorchError aoti_torch_cpu_wrapped_fbgemm_linear_fp16_weight(
});
}
AOTITorchError aoti_torch_cpu_wrapped_quantized_linear_prepacked(
AOTITorchError aoti_torch_cpu__wrapped_quantized_linear_prepacked(
AtenTensorHandle input,
AtenTensorHandle input_scale,
AtenTensorHandle input_zero_point,
@ -871,7 +871,7 @@ AOTITorchError aoti_torch_cpu_wrapped_quantized_linear_prepacked(
at::Tensor* out_scale_tensor = tensor_handle_to_tensor_pointer(out_scale);
at::Tensor* out_zeropoint_tensor =
tensor_handle_to_tensor_pointer(out_zeropoint);
*out = new_tensor_handle(at::wrapped_quantized_linear_prepacked(
*out = new_tensor_handle(at::_wrapped_quantized_linear_prepacked(
*input_tensor,
*input_scale_tensor,
*input_zero_point_tensor,

View File

@ -1252,8 +1252,8 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
torch.vsplit: lambda input, indices_or_sections: -1,
torch.vstack: lambda tensors, out=None: -1,
torch.where: lambda condition, x=None, y=None: -1,
torch.wrapped_linear_prepack: lambda weight, weight_scale, weight_zero_point, bias : -1,
torch.wrapped_quantized_linear_prepacked: (
torch._wrapped_linear_prepack: lambda weight, weight_scale, weight_zero_point, bias : -1,
torch._wrapped_quantized_linear_prepacked: (
lambda input, input_scale, input_zero_point, prepacked, out_scale, out_zero_point, out_channel : -1 # noqa: B950
),
torch.zeros_like: lambda input, dtype=None, layout=None, device=None, requires_grad=False: -1,