[BE] Modify PyObjectSlot the assume only a single interpreter is in use (#158407)

This PR makes some less risky changes to PyObjectSlot as there is a lot of stuff we do not need since there is only one interpreter. Specifically `check_interpreter` and `has_pyobj_nonhermetic` are removed Pull Request resolved: https://github.com/pytorch/pytorch/pull/158407 Approved by: https://github.com/albanD ghstack dependencies: #158290, #158291
2025-12-07 00:21:07 +01:00 · 2025-07-29 12:57:36 -07:00 · 2025-07-29 12:57:36 -07:00 · a753a72b14
commit a753a72b14
parent b57d1ef110
3 changed files with 7 additions and 92 deletions
--- a/c10/core/impl/PyObjectSlot.cpp
+++ b/c10/core/impl/PyObjectSlot.cpp
@ -44,19 +44,7 @@ PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const {
  if (interpreter) {
    return *interpreter;
  }
-  TORCH_CHECK(
+  TORCH_CHECK(false, "cannot access PyObject for Tensor - no interpreter set");
      false,
      "cannot access PyObject for Tensor on interpreter ",
      (*pyobj_interpreter_.load())->name());
 }
 bool PyObjectSlot::check_interpreter(PyInterpreter* interpreter) {
  return interpreter == pyobj_interpreter();
 }
 bool PyObjectSlot::has_pyobj_nonhermetic() {
  return check_pyobj(pyobj_interpreter(), /*ignore_hermetic_tls=*/true)
      .has_value();
 }
 bool PyObjectSlot::owns_pyobj() {
--- a/c10/core/impl/PyObjectSlot.h
+++ b/c10/core/impl/PyObjectSlot.h
@ -28,48 +28,7 @@ struct C10_API PyObjectSlot {
      PyInterpreter* self_interpreter,
      PyObject* pyobj,
      PyInterpreterStatus status) {
-    impl::PyInterpreter* expected = nullptr;
+    pyobj_interpreter_.store(self_interpreter, std::memory_order_relaxed);
    switch (status) {
      case impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED:
        // caller guarantees there is no multithreaded access; if there is
        // no data race OK to do a relaxed store
        pyobj_interpreter_.store(self_interpreter, std::memory_order_relaxed);
        break;
      case impl::PyInterpreterStatus::TAGGED_BY_US:
        // no tagging is necessary, the tag is already correct
        break;
      case impl::PyInterpreterStatus::MAYBE_UNINITIALIZED:
        // attempt to claim this TensorImpl with the specified interpreter
        // tag
        if (pyobj_interpreter_.compare_exchange_strong(
                expected, self_interpreter, std::memory_order_acq_rel)) {
          break;
        }
        // test if, actually, it was already tagged by us!  this situation can't
        // be caused by a race, but it could be caused by a situation
        // where someone conservatively tagged the tensor as MAYBE_UNINITIALIZED
        // (because they didn't pre-check the tag) when actually it was
        // owned by the interpreter
        if (expected == self_interpreter) {
          break;
        }
        // fallthrough, we lost the race.  We are guaranteed not to lose the
        // race with ourself, as calls to init_pyobj with the same interpreter
        // ID must be sequentialized by the GIL
        [[fallthrough]];
      case impl::PyInterpreterStatus::TAGGED_BY_OTHER:
        TORCH_CHECK(
            false,
            "cannot allocate PyObject for Tensor on interpreter ",
            self_interpreter,
            " that has already been used by another torch deploy interpreter ",
            pyobj_interpreter_.load());
    }
    // we are the ONLY thread that can have gotten to this point.  It is not
    // possible to conflict with another zero interpreter as access is protected
    // by GIL
    // NB: owns_pyobj tag is initially false
    pyobj_ = pyobj;
  }
@ -97,30 +56,16 @@ struct C10_API PyObjectSlot {
  std::optional<PyObject*> check_pyobj(
      PyInterpreter* self_interpreter,
      bool ignore_hermetic_tls = false) const {
    // Note [Memory ordering on Python interpreter tag]
    impl::PyInterpreter* interpreter =
        pyobj_interpreter_.load(std::memory_order_acquire);
    if (interpreter == nullptr) {
      // NB: This never returns DEFINITELY_UNINITIALIZED because there is
      // always the possibility that another thread races to initialize
      // after we query here.  The only time when we can conclude a tensor
      // is definitely uninitialized is when we have just allocated it and
      // it cannot have escaped to other threads yet
      return std::nullopt;
-    } else if (interpreter == self_interpreter) {
+    }
-      // NB: pyobj_ could still be null!
+
-      if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
+    if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
-        return std::nullopt;
+      return std::nullopt;
      } else {
        return _unchecked_untagged_pyobj();
      }
    } else {
-      TORCH_CHECK(
+      return _unchecked_untagged_pyobj();
          false,
          "cannot access PyObject for Tensor on interpreter ",
          (*self_interpreter)->name(),
          " that has already been used by another torch deploy interpreter ",
          (*pyobj_interpreter_.load())->name());
    }
  }
@ -130,13 +75,6 @@ struct C10_API PyObjectSlot {
  PyInterpreter& load_pyobj_interpreter() const;
  // Check if the PyObjectSlot's interpreter is the same as the specified
  // interpreter
  bool check_interpreter(PyInterpreter* interpreter);
  // Check if the PyObjectSlot is holding a PyObject, owned or non-owned
  bool has_pyobj_nonhermetic();
  bool owns_pyobj();
  void set_owns_pyobj(bool b);
--- a/torch/csrc/Storage.cpp
+++ b/torch/csrc/Storage.cpp
@ -98,17 +98,6 @@ PyObject* THPStorage_Wrap(c10::Storage storage) {
  }
  c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
  // If the StorageImpl has a PyObject that is managed by a different
  // interpreter than the current one, create a new StorageImpl that points to
  // the same data and then create the Python storage from that.
  // NOTE: This is only supposed to happen in MultiPy  // codespell:ignore
  if (pyobj_slot->has_pyobj_nonhermetic() &&
      !pyobj_slot->check_interpreter(getPyInterpreter())) {
    return THPStorage_NewWithStorage(
        THPStorageClass,
        c10::newStorageImplFromRefcountedDataPtr(storage),
        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
  }
  std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj(
      getPyInterpreter(), /*ignore_hermetic_tls=*/false);
  c10::impl::PyInterpreterStatus status =