Change wrapped_linear_prepack and wrapped_quantized_linear_prepacked to private by adding _ as prefix (#135401)

Summary: In https://github.com/pytorch/pytorch/pull/134232, we added two new ops wrapped_linear_prepack and wrapped_quantized_linear_prepacked. From the review comments and offline discussion, we are changing them to private by adding `_` as prefix Differential Revision: D62325142 Pull Request resolved: https://github.com/pytorch/pytorch/pull/135401 Approved by: https://github.com/houseroad
2025-12-06 12:20:52 +01:00 · 2024-09-08 04:16:24 +00:00 · 2024-09-08 04:16:24 +00:00 · fd494dd426
commit fd494dd426
parent 8334cb2fb9
9 changed files with 41 additions and 36 deletions
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@ -3400,9 +3400,9 @@

 - func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor

- func: wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
+- func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor

- func: wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
+- func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor

 - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor

--- a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
@ -436,12 +436,12 @@ at::Tensor wrapped_quantized_linear_meta(
 #endif // USE_FBGEMM
 }

-at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
+at::Tensor _wrapped_linear_prepack(const at::Tensor& weight,
    const at::Tensor& weight_scale,
    const at::Tensor& weight_zero_point,
    const at::Tensor& bias);

-at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
+at::Tensor _wrapped_linear_prepack(const at::Tensor& weight,
    const at::Tensor& weight_scale,
    const at::Tensor& weight_zero_point,
    const at::Tensor& bias) {
@ -474,14 +474,14 @@ at::Tensor wrapped_linear_prepack(const at::Tensor& weight,
 #endif // USE_FBGEMM
 }

-at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
+at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
    const at::Tensor& input_zero_point,
    const at::Tensor& packed_weight,
    const at::Tensor& output_scale,
    const at::Tensor& output_zero_point,
    [[maybe_unused]] const int64_t out_channel);

-at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
+at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor& input, const at::Tensor& input_scale,
    const at::Tensor& input_zero_point,
    const at::Tensor& packed_weight,
    const at::Tensor& output_scale,
@ -507,12 +507,12 @@ at::Tensor wrapped_quantized_linear_prepacked(const at::Tensor& input, const at:
 #endif // USE_FBGEMM
 }

-at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
+at::Tensor _wrapped_linear_prepack_meta(const at::Tensor& weight,
    [[maybe_unused]] const at::Tensor& weight_scale,
    [[maybe_unused]] const at::Tensor& weight_zero_point,
    [[maybe_unused]] const at::Tensor& bias);

-at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
+at::Tensor _wrapped_linear_prepack_meta(const at::Tensor& weight,
    [[maybe_unused]] const at::Tensor& weight_scale,
    [[maybe_unused]] const at::Tensor& weight_zero_point,
    [[maybe_unused]] const at::Tensor& bias) {
@ -530,7 +530,7 @@ at::Tensor wrapped_linear_prepack_meta(const at::Tensor& weight,
 #endif // USE_FBGEMM
 }

-at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
+at::Tensor _wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
    [[maybe_unused]] const at::Tensor& input_scale,
    [[maybe_unused]] const at::Tensor& input_zero_point,
    [[maybe_unused]] const at::Tensor& packed_weight,
@ -538,7 +538,7 @@ at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
    [[maybe_unused]] const at::Tensor& output_zero_point,
    const int64_t out_channel);

-at::Tensor wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
+at::Tensor _wrapped_quantized_linear_prepacked_meta(const at::Tensor& input,
    [[maybe_unused]] const at::Tensor& input_scale,
    [[maybe_unused]] const at::Tensor& input_zero_point,
    [[maybe_unused]] const at::Tensor& packed_weight,
@ -695,21 +695,21 @@ TORCH_LIBRARY_IMPL(_quantized, CPU, m) {
  m.impl(TORCH_SELECTIVE_NAME("_quantized::linear_prepack_fp16_legacy"), TORCH_FN(QLinearPackWeightFp16Legacy::run));
  m.impl(TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear"), TORCH_FN(wrapped_quantized_linear));
  m.impl(
-      TORCH_SELECTIVE_NAME("_quantized::wrapped_linear_prepack"),
-      wrapped_linear_prepack);
+      TORCH_SELECTIVE_NAME("_quantized::_wrapped_linear_prepack"),
+      _wrapped_linear_prepack);
  m.impl(
-      TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear_prepacked"),
-      wrapped_quantized_linear_prepacked);
+      TORCH_SELECTIVE_NAME("_quantized::_wrapped_quantized_linear_prepacked"),
+      _wrapped_quantized_linear_prepacked);
 }

 TORCH_LIBRARY_IMPL(_quantized, Meta, m) {
  m.impl(TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear"), TORCH_FN(wrapped_quantized_linear_meta));
  m.impl(
-      TORCH_SELECTIVE_NAME("_quantized::wrapped_linear_prepack"),
-      wrapped_linear_prepack_meta);
+      TORCH_SELECTIVE_NAME("_quantized::_wrapped_linear_prepack"),
+      _wrapped_linear_prepack_meta);
  m.impl(
-      TORCH_SELECTIVE_NAME("_quantized::wrapped_quantized_linear_prepacked"),
-      wrapped_quantized_linear_prepacked_meta);
+      TORCH_SELECTIVE_NAME("_quantized::_wrapped_quantized_linear_prepacked"),
+      _wrapped_quantized_linear_prepacked_meta);
 }

 TORCH_LIBRARY_IMPL(onednn, CPU, m) {
--- a/aten/src/ATen/native/quantized/library.cpp
+++ b/aten/src/ATen/native/quantized/library.cpp
@ -251,8 +251,8 @@ TORCH_LIBRARY(_quantized, m) {
  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_fbgemm_pack_gemm_matrix_fp16(Tensor W) -> Tensor"));
  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_fbgemm_linear_fp16_weight(Tensor X, Tensor W, Tensor B, int out_channel) -> Tensor"));
  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_quantized_linear(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
-  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_linear_prepack(Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B) -> Tensor"));
-  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::wrapped_quantized_linear_prepacked(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W_prepack, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
+  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::_wrapped_linear_prepack(Tensor W, Tensor W_scale, Tensor W_zero_point, Tensor B) -> Tensor"));
+  m.def(TORCH_SELECTIVE_SCHEMA("_quantized::_wrapped_quantized_linear_prepacked(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor W_prepack, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor Y"));
 }

 TORCH_LIBRARY(onednn, m) {
--- a/test/forward_backward_compatibility/check_forward_backward_compatibility.py
+++ b/test/forward_backward_compatibility/check_forward_backward_compatibility.py
@ -145,6 +145,11 @@ ALLOW_LIST = [
    ("onednn::qlinear_pointwise.binary_tensor", datetime.date(2024, 12, 31)),
    ("aten::_scaled_mm.out", datetime.date(2024, 12, 31)),
    ("aten::_scaled_mm", datetime.date(2024, 12, 31)),
+    ("aten::wrapped_quantized_linear_prepacked", datetime.date(2024, 12, 31)),
+    ("aten::wrapped_linear_prepack", datetime.date(2024, 12, 31)),
+    ("_quantized::wrapped_linear_prepack", datetime.date(2024, 12, 31)),
+    ("_quantized::wrapped_linear_prepacked", datetime.date(2024, 12, 31)),
+    ("_quantized::wrapped_quantized_linear_prepacked", datetime.date(2024, 12, 31)),
    # BC-breaking change in can_cast signature: 'from' -> 'from_'
    ("aten::can_cast", datetime.date(2024, 5, 31)),
 ]
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py
@ -4223,8 +4223,8 @@ class TestQuantizedLinear(TestCase):
        ret_ref = qlinear.dequantize()
        self.assertEqual(ret, ret_ref)

-    """Tests the correctness of the _quantized::wrapped_linear_prepack and
-    _quantized::wrapped_quantized_linear_prepacked ops."""
+    """Tests the correctness of the _quantized::_wrapped_linear_prepack and
+    _quantized::_wrapped_quantized_linear_prepacked ops."""
    @skipIfNoFBGEMM
    @given(
        m=st.integers(2, 6),
@ -4243,13 +4243,13 @@ class TestQuantizedLinear(TestCase):
        output_zero_point = torch.tensor(0)
        out_channel = n

-        ret_1 = torch.ops._quantized.wrapped_linear_prepack(
+        ret_1 = torch.ops._quantized._wrapped_linear_prepack(
            weight,
            weight_scale,
            weight_zero_point,
            bias
        )
-        ret_2 = torch.ops._quantized.wrapped_quantized_linear_prepacked(
+        ret_2 = torch.ops._quantized._wrapped_quantized_linear_prepacked(
            input,
            input_scale,
            input_zero_point,
--- a/torch/_inductor/decomposition.py
+++ b/torch/_inductor/decomposition.py
@ -651,10 +651,10 @@ def wrapped_quantized_linear(
    out_zero_point: torch.Tensor,
    out_channel: int,
 ) -> torch.Tensor:
-    packed_weight = torch.ops._quantized.wrapped_linear_prepack(
+    packed_weight = torch.ops._quantized._wrapped_linear_prepack(
        weight, weight_scale, weight_zero_point, bias
    )
-    return torch.ops._quantized.wrapped_quantized_linear_prepacked(
+    return torch.ops._quantized._wrapped_quantized_linear_prepacked(
        input,
        input_scale,
        input_zero_point,
--- a/torch/csrc/inductor/aoti_torch/c/shim.h
+++ b/torch/csrc/inductor/aoti_torch/c/shim.h
@ -493,7 +493,7 @@ aoti_torch_cpu_wrapped_fbgemm_pack_gemm_matrix_fp16(

 // This will soon be deprecated after ao_quantization is complete.
 // Please refrain from using this or increasing callsites.
-AOTI_TORCH_EXPORT AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
+AOTI_TORCH_EXPORT AOTITorchError aoti_torch_cpu__wrapped_linear_prepack(
    AtenTensorHandle weight,
    AtenTensorHandle weight_scale,
    AtenTensorHandle weight_zero_point,
@ -513,7 +513,7 @@ aoti_torch_cpu_wrapped_fbgemm_linear_fp16_weight(
 // This will soon be deprecated after ao_quantization is complete.
 // Please refrain from using this or increasing callsites.
 AOTI_TORCH_EXPORT AOTITorchError
-aoti_torch_cpu_wrapped_quantized_linear_prepacked(
+aoti_torch_cpu__wrapped_quantized_linear_prepacked(
    AtenTensorHandle input,
    AtenTensorHandle input_scale,
    AtenTensorHandle input_zero_point,
--- a/torch/csrc/inductor/aoti_torch/shim_common.cpp
+++ b/torch/csrc/inductor/aoti_torch/shim_common.cpp
@ -26,6 +26,8 @@
 #include <ATen/ops/_scaled_dot_product_efficient_attention.h>
 #include <ATen/ops/_scaled_dot_product_flash_attention.h>
 #include <ATen/ops/_scaled_mm.h>
+#include <ATen/ops/_wrapped_linear_prepack.h>
+#include <ATen/ops/_wrapped_quantized_linear_prepacked.h>
 #include <ATen/ops/addmm.h>
 #include <ATen/ops/as_strided.h>
 #include <ATen/ops/bmm.h>
@ -42,8 +44,6 @@
 #include <ATen/ops/scatter_reduce.h>
 #include <ATen/ops/view_as_real_ops.h>
 #include <ATen/ops/view_ops.h>
-#include <ATen/ops/wrapped_linear_prepack.h>
-#include <ATen/ops/wrapped_quantized_linear_prepacked.h>

 #endif

@ -814,7 +814,7 @@ AOTITorchError aoti_torch_cpu_wrapped_fbgemm_pack_gemm_matrix_fp16(
  });
 }

-AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
+AOTITorchError aoti_torch_cpu__wrapped_linear_prepack(
    AtenTensorHandle weight,
    AtenTensorHandle weight_scale,
    AtenTensorHandle weight_zero_point,
@ -828,7 +828,7 @@ AOTITorchError aoti_torch_cpu_wrapped_linear_prepack(
        tensor_handle_to_tensor_pointer(weight_zero_point);
    at::Tensor* bias_tensor = tensor_handle_to_tensor_pointer(bias);

-    *out = new_tensor_handle(at::wrapped_linear_prepack(
+    *out = new_tensor_handle(at::_wrapped_linear_prepack(
        *weight_tensor,
        *weight_scale_tensor,
        *weight_zero_point_tensor,
@ -852,7 +852,7 @@ AOTITorchError aoti_torch_cpu_wrapped_fbgemm_linear_fp16_weight(
  });
 }

-AOTITorchError aoti_torch_cpu_wrapped_quantized_linear_prepacked(
+AOTITorchError aoti_torch_cpu__wrapped_quantized_linear_prepacked(
    AtenTensorHandle input,
    AtenTensorHandle input_scale,
    AtenTensorHandle input_zero_point,
@ -871,7 +871,7 @@ AOTITorchError aoti_torch_cpu_wrapped_quantized_linear_prepacked(
    at::Tensor* out_scale_tensor = tensor_handle_to_tensor_pointer(out_scale);
    at::Tensor* out_zeropoint_tensor =
        tensor_handle_to_tensor_pointer(out_zeropoint);
-    *out = new_tensor_handle(at::wrapped_quantized_linear_prepacked(
+    *out = new_tensor_handle(at::_wrapped_quantized_linear_prepacked(
        *input_tensor,
        *input_scale_tensor,
        *input_zero_point_tensor,
--- a/torch/overrides.py
+++ b/torch/overrides.py
@ -1252,8 +1252,8 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
        torch.vsplit: lambda input, indices_or_sections: -1,
        torch.vstack: lambda tensors, out=None: -1,
        torch.where: lambda condition, x=None, y=None: -1,
-        torch.wrapped_linear_prepack: lambda weight, weight_scale, weight_zero_point, bias : -1,
-        torch.wrapped_quantized_linear_prepacked: (
+        torch._wrapped_linear_prepack: lambda weight, weight_scale, weight_zero_point, bias : -1,
+        torch._wrapped_quantized_linear_prepacked: (
            lambda input, input_scale, input_zero_point, prepacked, out_scale, out_zero_point, out_channel : -1  # noqa: B950
        ),
        torch.zeros_like: lambda input, dtype=None, layout=None, device=None, requires_grad=False: -1,