[OpenReg][3/N] Migrate cpp_extensions_open_device_registration to OpenReg (#154181)

As the title stated. **Involved testcases**: - test_open_device_quantized - test_open_device_random - test_open_device_tensor - test_open_device_packed_sequence - test_open_device_storage Pull Request resolved: https://github.com/pytorch/pytorch/pull/154181 Approved by: https://github.com/albanD ghstack dependencies: #153947, #154018, #154019, #154106
2025-12-06 12:20:52 +01:00 · 2025-06-13 16:41:10 +08:00 · 2025-06-13 16:41:10 +08:00 · 1e7989cad5
commit 1e7989cad5
parent 7e5f29b2de
4 changed files with 89 additions and 130 deletions
--- a/test/cpp_extensions/open_registration_extension.cpp
+++ b/test/cpp_extensions/open_registration_extension.cpp
@ -16,7 +16,6 @@
 #include <ATen/native/Resize.h>
 #include <ATen/native/UnaryOps.h>
 #include <ATen/native/cpu/Loops.h>
-#include <ATen/native/quantized/AffineQuantizer.h>
 #include <ATen/native/transformers/attention.h>
 #include <ATen/native/transformers/sdp_utils_cpp.h>
 #include <ATen/ops/view.h>
@ -30,23 +29,6 @@ static c10::DeviceIndex custom_device_index = 0;
 static uint64_t storageImpl_counter = 0;
 static uint64_t last_storageImpl_saved_value = 0;

-namespace {
-
-void quantize_tensor_per_tensor_affine_privateuse1(
-    const at::Tensor& rtensor,
-    at::Tensor& qtensor,
-    double scale,
-    int64_t zero_point) {
-    // do nothing
-}
-
-} // namespace
-
-namespace at::native {
-
-REGISTER_PRIVATEUSE1_DISPATCH(quantize_tensor_per_tensor_affine_stub, &quantize_tensor_per_tensor_affine_privateuse1);
-
-} // namespace at::native
 struct CustomBackendMetadata : public c10::BackendMeta {
  // for testing this field will mutate when clone() is called by shallow_copy_from.
  int backend_version_format_{-1};
@ -184,7 +166,6 @@ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
  m.impl("add.Tensor", &custom_add_Tensor);
  m.impl("_copy_from_and_resize", &custom__copy_from_and_resize);
  m.impl("set_.source_Storage", &custom_set_source_Storage);
-  m.impl("quantize_per_tensor", at::native::quantize_per_tensor);
 }

 void custom_cpu_fallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
--- a/test/cpp_extensions/open_registration_extension/pytorch_openreg/csrc/OpenRegMem.cpp
+++ b/test/cpp_extensions/open_registration_extension/pytorch_openreg/csrc/OpenRegMem.cpp
@ -4,10 +4,12 @@
 #include <ATen/TensorIterator.h>
 #include <ATen/native/UnaryOps.h>
 #include <ATen/ops/as_strided_cpu_dispatch.h>
+#include <ATen/ops/quantize_per_tensor_native.h>
 #include <ATen/ops/set_cpu_dispatch.h>
 #include <ATen/native/DispatchStub.h>
 #include <ATen/native/transformers/attention.h>
 #include <ATen/native/transformers/sdp_utils_cpp.h>
+#include <ATen/native/quantized/AffineQuantizer.h>

 #include <c10/core/Allocator.h>

@ -254,11 +256,20 @@ int64_t _fused_sdp_choice_privateuse1(
  return static_cast<int64_t>(backend);
 }

+void quantize_tensor_per_tensor_affine_privateuse1(
+    const at::Tensor& rtensor,
+    at::Tensor& qtensor,
+    double scale,
+    int64_t zero_point) {
+    // Just test the process, so do nothing
+}
+
 TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
  m.impl("empty.memory_format", empty_openreg);
  m.impl("empty_strided", empty_strided_openreg);
  m.impl("as_strided", as_strided_openreg);
  m.impl("set_.source_Storage_storage_offset", set_openreg);
+  m.impl("quantize_per_tensor", at::native::quantize_per_tensor);
  m.impl("_fused_sdp_choice", &_fused_sdp_choice_privateuse1);
  m.impl("_scaled_dot_product_fused_attention_overrideable", &custom_scaled_dot_product_fused_attention_overrideable);
  m.impl("_scaled_dot_product_fused_attention_overrideable_backward", &custom_scaled_dot_product_fused_attention_overrideable_backward);
@ -267,6 +278,9 @@ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {

 namespace at::native {
 REGISTER_PRIVATEUSE1_DISPATCH(abs_stub, &openreg::abs_kernel);
+REGISTER_PRIVATEUSE1_DISPATCH(
+    quantize_tensor_per_tensor_affine_stub,
+    &openreg::quantize_tensor_per_tensor_affine_privateuse1);
 REGISTER_PRIVATEUSE1_DISPATCH(
    _fused_sdp_choice_stub,
    &openreg::_fused_sdp_choice_privateuse1);
--- a/test/test_cpp_extensions_open_device_registration.py
+++ b/test/test_cpp_extensions_open_device_registration.py
@ -55,117 +55,6 @@ class TestCppExtensionOpenRegistration(common.TestCase):
            verbose=True,
        )

-    def test_open_device_quantized(self):
-        input_data = torch.randn(3, 4, 5, dtype=torch.float32, device="cpu").to(
-            "openreg"
-        )
-        quantized_tensor = torch.quantize_per_tensor(input_data, 0.1, 10, torch.qint8)
-        self.assertEqual(quantized_tensor.device, torch.device("openreg:0"))
-        self.assertEqual(quantized_tensor.dtype, torch.qint8)
-
-    def test_open_device_random(self):
-        # check if torch.openreg have implemented get_rng_state
-        with torch.random.fork_rng(device_type="openreg"):
-            pass
-
-    def test_open_device_tensor(self):
-        device = self.module.custom_device()
-
-        # check whether print tensor.type() meets the expectation
-        dtypes = {
-            torch.bool: "torch.openreg.BoolTensor",
-            torch.double: "torch.openreg.DoubleTensor",
-            torch.float32: "torch.openreg.FloatTensor",
-            torch.half: "torch.openreg.HalfTensor",
-            torch.int32: "torch.openreg.IntTensor",
-            torch.int64: "torch.openreg.LongTensor",
-            torch.int8: "torch.openreg.CharTensor",
-            torch.short: "torch.openreg.ShortTensor",
-            torch.uint8: "torch.openreg.ByteTensor",
-        }
-        for tt, dt in dtypes.items():
-            test_tensor = torch.empty(4, 4, dtype=tt, device=device)
-            self.assertTrue(test_tensor.type() == dt)
-
-        # check whether the attributes and methods of the corresponding custom backend are generated correctly
-        x = torch.empty(4, 4)
-        self.assertFalse(x.is_openreg)
-
-        x = x.openreg(torch.device("openreg"))
-        self.assertFalse(self.module.custom_add_called())
-        self.assertTrue(x.is_openreg)
-
-        # test different device type input
-        y = torch.empty(4, 4)
-        self.assertFalse(y.is_openreg)
-
-        y = y.openreg(torch.device("openreg:0"))
-        self.assertFalse(self.module.custom_add_called())
-        self.assertTrue(y.is_openreg)
-
-        # test different device type input
-        z = torch.empty(4, 4)
-        self.assertFalse(z.is_openreg)
-
-        z = z.openreg(0)
-        self.assertFalse(self.module.custom_add_called())
-        self.assertTrue(z.is_openreg)
-
-    def test_open_device_packed_sequence(self):
-        device = self.module.custom_device()  # noqa: F841
-        a = torch.rand(5, 3)
-        b = torch.tensor([1, 1, 1, 1, 1])
-        input = torch.nn.utils.rnn.PackedSequence(a, b)
-        self.assertFalse(input.is_openreg)
-        input_openreg = input.openreg()
-        self.assertTrue(input_openreg.is_openreg)
-
-    def test_open_device_storage(self):
-        # check whether the attributes and methods for storage of the corresponding custom backend are generated correctly
-        x = torch.empty(4, 4)
-        z1 = x.storage()
-        self.assertFalse(z1.is_openreg)
-
-        z1 = z1.openreg()
-        self.assertFalse(self.module.custom_add_called())
-        self.assertTrue(z1.is_openreg)
-
-        with self.assertRaisesRegex(RuntimeError, "Invalid device"):
-            z1.openreg(torch.device("cpu"))
-
-        z1 = z1.cpu()
-        self.assertFalse(self.module.custom_add_called())
-        self.assertFalse(z1.is_openreg)
-
-        z1 = z1.openreg(device="openreg:0", non_blocking=False)
-        self.assertFalse(self.module.custom_add_called())
-        self.assertTrue(z1.is_openreg)
-
-        with self.assertRaisesRegex(RuntimeError, "Invalid device"):
-            z1.openreg(device="cuda:0", non_blocking=False)
-
-        # check UntypedStorage
-        y = torch.empty(4, 4)
-        z2 = y.untyped_storage()
-        self.assertFalse(z2.is_openreg)
-
-        z2 = z2.openreg()
-        self.assertFalse(self.module.custom_add_called())
-        self.assertTrue(z2.is_openreg)
-
-        # check custom StorageImpl create
-        self.module.custom_storage_registry()
-
-        z3 = y.untyped_storage()
-        self.assertFalse(self.module.custom_storageImpl_called())
-
-        z3 = z3.openreg()
-        self.assertTrue(self.module.custom_storageImpl_called())
-        self.assertFalse(self.module.custom_storageImpl_called())
-
-        z3 = z3[0:3]
-        self.assertTrue(self.module.custom_storageImpl_called())
-
    @unittest.skipIf(
        sys.version_info >= (3, 13),
        "Error: Please register PrivateUse1HooksInterface by `RegisterPrivateUse1HooksInterface` first.",
--- a/test/test_openreg.py
+++ b/test/test_openreg.py
@ -88,6 +88,75 @@ class TestPrivateUse1(TestCase):
        torch.abs(x_openreg, out=o_openreg[:, :, 0:6:3])
        self.assertEqual(o_cpu, o_openreg.cpu())

+    def test_backend_tensor_type(self):
+        dtypes_map = {
+            torch.bool: "torch.openreg.BoolTensor",
+            torch.double: "torch.openreg.DoubleTensor",
+            torch.float32: "torch.openreg.FloatTensor",
+            torch.half: "torch.openreg.HalfTensor",
+            torch.int32: "torch.openreg.IntTensor",
+            torch.int64: "torch.openreg.LongTensor",
+            torch.int8: "torch.openreg.CharTensor",
+            torch.short: "torch.openreg.ShortTensor",
+            torch.uint8: "torch.openreg.ByteTensor",
+        }
+
+        for dtype, str in dtypes_map.items():
+            x = torch.empty(4, 4, dtype=dtype, device="openreg")
+            self.assertTrue(x.type() == str)
+
+    def test_backend_tensor_methods(self):
+        x = torch.empty(4, 4)
+        self.assertFalse(x.is_openreg)  # type: ignore[misc]
+
+        y = x.openreg(torch.device("openreg"))  # type: ignore[misc]
+        self.assertTrue(y.is_openreg)  # type: ignore[misc]
+        z = x.openreg(torch.device("openreg:0"))  # type: ignore[misc]
+        self.assertTrue(z.is_openreg)  # type: ignore[misc]
+        n = x.openreg(0)  # type: ignore[misc]
+        self.assertTrue(n.is_openreg)  # type: ignore[misc]
+
+    @unittest.skip("Need to support Parameter in openreg")
+    def test_backend_module_methods(self):
+        class FakeModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.x = torch.nn.Parameter(torch.randn(3, 3))
+
+            def forward(self):
+                pass
+
+        module = FakeModule()
+        self.assertEqual(module.x.device.type, "cpu")
+        module.openreg()  # type: ignore[misc]
+        self.assertEqual(module.x.device.type, "openreg")
+
+    @unittest.skip("Need to support untyped_storage in openreg")
+    def test_backend_storage_methods(self):
+        x = torch.empty(4, 4)
+
+        x_cpu = x.storage()
+        self.assertFalse(x_cpu.is_openreg)  # type: ignore[misc]
+        x_openreg = x_cpu.openreg()  # type: ignore[misc]
+        self.assertTrue(x_openreg.is_openreg)  # type: ignore[misc]
+
+        y = torch.empty(4, 4)
+
+        y_cpu = y.untyped_storage()
+        self.assertFalse(y_cpu.is_openreg)  # type: ignore[misc]
+        y_openreg = y_cpu.openreg()  # type: ignore[misc]
+        self.assertTrue(y_openreg.is_openreg)  # type: ignore[misc]
+
+    def test_backend_packed_sequence_methods(self):
+        x = torch.rand(5, 3)
+        y = torch.tensor([1, 1, 1, 1, 1])
+
+        z_cpu = torch.nn.utils.rnn.PackedSequence(x, y)
+        self.assertFalse(z_cpu.is_openreg)  # type: ignore[misc]
+
+        z_openreg = z_cpu.openreg()  # type: ignore[misc]
+        self.assertTrue(z_openreg.is_openreg)  # type: ignore[misc]
+
    def test_backend_fallback(self):
        pass

@ -247,6 +316,12 @@ class TestOpenReg(TestCase):
        self.assertEqual(y.to(device="cpu"), torch.tensor([[1, 1], [2, 2], [3, 3]]))
        self.assertEqual(x.data_ptr(), y.data_ptr())

+    def test_quantize(self):
+        x = torch.randn(3, 4, 5, dtype=torch.float32, device="openreg")
+        quantized_tensor = torch.quantize_per_tensor(x, 0.1, 10, torch.qint8)
+        self.assertEqual(quantized_tensor.device, torch.device("openreg:0"))
+        self.assertEqual(quantized_tensor.dtype, torch.qint8)
+

 if __name__ == "__main__":
    run_tests()