[AOTInductor] Add interface for user managed buffer in package api. (#151325)

Summary: https://github.com/pytorch/pytorch/pull/151141 We add interface for user managed buffer in the package api. Test Plan: Included in commit.] Reviewed By: henrylhtsang Differential Revision: D72985440 Pull Request resolved: https://github.com/pytorch/pytorch/pull/151325 Approved by: https://github.com/angelayi
2025-12-06 12:20:52 +01:00 · 2025-04-16 04:25:37 +00:00 · 2025-04-16 04:25:37 +00:00 · 107121dfad
commit 107121dfad
parent 82200e33b5
5 changed files with 83 additions and 9 deletions
--- a/test/inductor/test_aot_inductor_package.py
+++ b/test/inductor/test_aot_inductor_package.py
@ -467,6 +467,63 @@ class TestAOTInductorPackage(TestCase):
        output = compiled(test_inputs)
        self.assertEqual(expected, output)
    @skipif(
        lambda device, package_cpp_only: device == "cpu" or package_cpp_only,
        "No support for cpp only and cpu",
    )
    def test_package_user_managed_weight(self):
        class Model(torch.nn.Module):
            def __init__(self, n, k, device):
                super().__init__()
                self.linear = torch.nn.Linear(k, n, device=device)
            def forward(self, a):
                return self.linear(a)
        M, N, K = 128, 4096, 4096
        model = Model(N, K, self.device)
        example_inputs = (torch.randn(M, K, device=self.device),)
        inductor_configs = {
            "always_keep_tensor_constants": True,
            "aot_inductor.package_constants_in_so": False,
        }
        compiled = compile(model, example_inputs, inductor_configs=inductor_configs)
        self.assertEqual(
            set(compiled.get_constant_fqns()), set(model.state_dict().keys())
        )
        compiled.load_constants(
            model.state_dict(), check_full_update=True, user_managed=False
        )
        test_inputs = torch.randn(M, K, device=self.device)
        expected = model(test_inputs)
        output = compiled(test_inputs)
        self.assertEqual(expected, output)
        # Let's try to modify the weight in-place, result shouldn't change.
        model.linear.weight.data *= 3.7
        new_output = compiled(test_inputs)
        self.assertEqual(new_output, output)
        # Recreate a new model that we will test against user_managed=True
        new_compiled = compile(model, example_inputs, inductor_configs=inductor_configs)
        new_compiled.load_constants(
            model.state_dict(), check_full_update=True, user_managed=True
        )
        expected = model(test_inputs)
        new_output = new_compiled(test_inputs)
        self.assertEqual(expected, new_output)
        # Try to modify the weight in-place, result should change.
        model.linear.weight.data *= 3.7
        expected = model(test_inputs)
        new_output = new_compiled(test_inputs)
        self.assertEqual(new_output, expected)
    def test_deepcopy_compiled_model(self):
        class Model(torch.nn.Module):
            def forward(self, x, y):
--- a/torch/_inductor/package/package.py
+++ b/torch/_inductor/package/package.py
@ -259,6 +259,7 @@ class AOTICompiledModel:
        constants_map: dict[str, torch.Tensor],
        *,
        check_full_update: bool,
        user_managed: bool = False,
    ) -> None:
        """
        Given a mapping of constant fqns to tensors, load the constants into the model.
@ -270,7 +271,9 @@ class AOTICompiledModel:
            check_full_update: Whether to add check to see if all the constants
            are updated and have values.
        """
-        self.loader.load_constants(constants_map, False, check_full_update)  # type: ignore[attr-defined]
+        self.loader.load_constants(  # type: ignore[attr-defined]
            constants_map, False, check_full_update, user_managed
        )
    def get_constant_fqns(self) -> list[str]:
        return self.loader.get_constant_fqns()  # type: ignore[attr-defined]
--- a/torch/csrc/inductor/aoti_package/model_package_loader.cpp
+++ b/torch/csrc/inductor/aoti_package/model_package_loader.cpp
@ -523,7 +523,8 @@ std::vector<std::string> AOTIModelPackageLoader::get_call_spec() {
 void AOTIModelPackageLoader::load_constants(
    std::unordered_map<std::string, at::Tensor>& constants_map,
    bool use_inactive,
-    bool check_full_update) {
+    bool check_full_update,
    bool user_managed) {
  std::unordered_map<std::string, std::string> constant_name_to_fqn =
      runner_->getConstantNamesToOriginalFQNs();
  std::unordered_map<std::string, at::string> fqn_to_constant_name;
@ -541,7 +542,7 @@ void AOTIModelPackageLoader::load_constants(
  }
  return runner_->update_constant_buffer(
-      updated_constants_map, use_inactive, check_full_update);
+      updated_constants_map, use_inactive, check_full_update, user_managed);
 }
 std::vector<std::string> AOTIModelPackageLoader::get_constant_fqns() {
@ -558,9 +559,10 @@ std::vector<std::string> AOTIModelPackageLoader::get_constant_fqns() {
 void AOTIModelPackageLoader::update_constant_buffer(
    std::unordered_map<std::string, at::Tensor>& tensor_map,
    bool use_inactive,
-    bool validate_full_updates) {
+    bool validate_full_updates,
    bool user_managed) {
  runner_->update_constant_buffer(
-      tensor_map, use_inactive, validate_full_updates);
+      tensor_map, use_inactive, validate_full_updates, user_managed);
 }
 } // namespace torch::inductor
 #endif
--- a/torch/csrc/inductor/aoti_package/model_package_loader.h
+++ b/torch/csrc/inductor/aoti_package/model_package_loader.h
@ -30,13 +30,15 @@ class TORCH_API AOTIModelPackageLoader {
  void load_constants(
      std::unordered_map<std::string, at::Tensor>& constants_map,
      bool use_inactive,
-      bool check_full_update);
+      bool check_full_update,
      bool user_managed = false);
  std::vector<std::string> get_constant_fqns();
  void update_constant_buffer(
      std::unordered_map<std::string, at::Tensor>& tensor_map,
      bool use_inactive,
-      bool validate_full_updates);
+      bool validate_full_updates,
      bool user_managed = false);
 private:
  std::string temp_dir_;
--- a/torch/csrc/inductor/aoti_package/pybind.cpp
+++ b/torch/csrc/inductor/aoti_package/pybind.cpp
@ -69,9 +69,19 @@ void initAOTIPackageBindings(PyObject* module) {
      .def("get_call_spec", &AOTIModelPackageLoaderPybind::get_call_spec)
      .def(
          "get_constant_fqns", &AOTIModelPackageLoaderPybind::get_constant_fqns)
-      .def("load_constants", &AOTIModelPackageLoaderPybind::load_constants)
+      .def(
          "load_constants",
          &AOTIModelPackageLoaderPybind::load_constants,
          py::arg("constants_map"),
          py::arg("use_inactive"),
          py::arg("check_full_update"),
          py::arg("user_managed") = false)
      .def(
          "update_constant_buffer",
-          &AOTIModelPackageLoaderPybind::update_constant_buffer);
+          &AOTIModelPackageLoaderPybind::update_constant_buffer,
          py::arg("tensor_map"),
          py::arg("use_inactive"),
          py::arg("validate_full_updates"),
          py::arg("user_managed") = false);
 }
 } // namespace torch::inductor