#pragma once #include #include #include #include #include namespace c10 { // A PyHandleCache represents a cached pointer from a C++ object to // a Python object that represents that object analogously in Python. // Upon a cache hit, the relevant object can be retrieved after a test // and then a memory load. Two conditions must hold to be able to use this // class: // // - This must truly be a cache; e.g., the caller must be able to produce // the object some other way if the cache hit misses. // // - This must truly be a handle; e.g., the Python object referenced by // this class must have static lifetime. This means we don't have to // maintain strong ownership or deallocate the object when the C++ object // dies. Static lifetime is a good idea in conjunction with the cache, // since if you are producing a fresh object on miss you won't be // maintaining object identity. If you need bidirectional ownership, // you will want to factor out the pattern in TensorImpl with // resurrection. // // This cache is expected to not improve perf under torchdeploy, as one // interpreter will fill up the cache, and all the interpreters will be // unable to use the slot. A potential improvement is to have multiple // slots (one per interpreter), which will work in deployment scenarios // where there a stable, fixed number of interpreters. You can also store // the relevant state in the Python library, rather than in the non-Python // library (although in many cases, this is not convenient, as there may // not be a way to conveniently index based on the object.) class PyHandleCache { public: PyHandleCache() : pyinterpreter_(nullptr) {} // Attempt to fetch the pointer from the cache, if the PyInterpreter // matches. If it doesn't exist, or the cache entry is not valid, // use slow_accessor to get the real pointer value and return that // (possibly writing it to the cache, if the cache entry is // available.) template PyObject* ptr_or(impl::PyInterpreter* self_interpreter, F slow_accessor) const { // Note [Memory ordering on Python interpreter tag] impl::PyInterpreter* interpreter = pyinterpreter_.load(std::memory_order_acquire); if (C10_LIKELY(interpreter == self_interpreter)) { return data_; } else if (interpreter == nullptr) { auto* r = slow_accessor(); impl::PyInterpreter* expected = nullptr; // attempt to claim this cache entry with the specified interpreter tag if (pyinterpreter_.compare_exchange_strong( expected, self_interpreter, std::memory_order_acq_rel)) { data_ = r; } // This shouldn't be possible, as you should be GIL protected TORCH_INTERNAL_ASSERT(expected != self_interpreter); return r; } else { return slow_accessor(); } } private: mutable std::atomic pyinterpreter_; mutable PyObject* data_{nullptr}; }; } // namespace c10