#include #include #include #include // these symbols are generated by cmake, using ld -r -b binary // libtorch_deployinterpreter.so which takes the contents of the so and embeds // it into a symbol that is then linked into libtorch_deploy.so. This enables us // to simply copy the contents of this symbol to disk and dlopen it to create an // instance of python. extern "C" char _binary_libtorch_deployinterpreter_so_start[]; extern "C" char _binary_libtorch_deployinterpreter_so_end[]; namespace torch { namespace deploy { Package InterpreterManager::load_package(const std::string& uri) { TORCH_DEPLOY_TRY return Package(uri, this); TORCH_DEPLOY_SAFE_CATCH_RETHROW } Package InterpreterManager::load_package( std::shared_ptr reader) { TORCH_DEPLOY_TRY return Package(reader, this); TORCH_DEPLOY_SAFE_CATCH_RETHROW } Obj InterpreterSession::from_movable(const ReplicatedObj& obj) { TORCH_DEPLOY_TRY return impl_->unpickle_or_get(obj.pImpl_->object_id_, obj.pImpl_->data_); TORCH_DEPLOY_SAFE_CATCH_RETHROW } InterpreterSession ReplicatedObj::acquire_session( const Interpreter* on_this_interpreter) const { TORCH_DEPLOY_TRY InterpreterSession I = on_this_interpreter ? on_this_interpreter->acquire_session() : pImpl_->manager_->acquire_one(); I.self = I.from_movable(*this); return I; TORCH_DEPLOY_SAFE_CATCH_RETHROW } InterpreterSession::~InterpreterSession() { if (manager_ && notify_idx_ >= 0) { manager_->resources_.free(notify_idx_); } } void ReplicatedObjImpl::unload(const Interpreter* on_this_interpreter) { TORCH_DEPLOY_TRY if (!on_this_interpreter) { // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) for (auto& interp : manager_->all_instances()) { unload(&interp); } return; } InterpreterSession I = on_this_interpreter->acquire_session(); I.impl_->unload(object_id_); TORCH_DEPLOY_SAFE_CATCH_RETHROW } ReplicatedObjImpl::~ReplicatedObjImpl() { unload(nullptr); } void ReplicatedObj::unload(const Interpreter* on_this_interpreter) { TORCH_DEPLOY_TRY pImpl_->unload(on_this_interpreter); TORCH_DEPLOY_SAFE_CATCH_RETHROW } ReplicatedObj InterpreterSession::create_movable(Obj obj) { TORCH_DEPLOY_TRY TORCH_CHECK( manager_, "Can only create a movable object when the session was created from an interpreter that is part of a InterpreterManager"); auto pickled = impl_->pickle(self, obj); return ReplicatedObj(std::make_shared( manager_->next_object_id_++, std::move(pickled), manager_)); TORCH_DEPLOY_SAFE_CATCH_RETHROW } Interpreter::Interpreter(InterpreterManager* manager) : handle_(nullptr), manager_(manager) { // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) char library_name[] = "/tmp/torch_deployXXXXXX"; int fd = mkstemp(library_name); TORCH_INTERNAL_ASSERT(fd != -1, "failed to create temporary file"); library_name_ = library_name; FILE* dst = fdopen(fd, "wb"); TORCH_INTERNAL_ASSERT(dst); size_t size = _binary_libtorch_deployinterpreter_so_end - _binary_libtorch_deployinterpreter_so_start; TORCH_INTERNAL_ASSERT( size == fwrite(_binary_libtorch_deployinterpreter_so_start, 1, size, dst)); fclose(dst); handle_ = dlopen(library_name, RTLD_LOCAL | RTLD_LAZY); if (!handle_) { throw std::runtime_error(dlerror()); } // note: if you want better debugging symbols for things inside // new_intepreter_impl, comment out this line so that the so lasts long enough // for the debugger to see it. unlink(library_name_.c_str()); void* new_interpreter_impl = dlsym(handle_, "new_interpreter_impl"); assert(new_interpreter_impl); pImpl_ = std::unique_ptr( // NOLINTNEXTLINE(modernize-redundant-void-arg) ((InterpreterImpl * (*)(void)) new_interpreter_impl)()); } Interpreter::~Interpreter() { if (handle_) { // ensure python uninitialization runs before we dlclose the library pImpl_.reset(); dlclose(handle_); } } int LoadBalancer::acquire() { TORCH_DEPLOY_TRY thread_local int last = 0; size_t minusers = SIZE_MAX; int min_idx = 0; for (size_t i = 0; i < n_; ++i, ++last) { // NOLINTNEXTLINE(clang-diagnostic-sign-compare) if (last >= n_) { last = 0; } uint64_t prev = 0; bool acquired = __atomic_compare_exchange_n( &uses_[8 * last], &prev, 1ULL, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); if (acquired) { // fast path, we found an interpreter with no users return last; } // slow path, we don't want to use this interpreter because it is being // used by someone else. if (prev < minusers) { minusers = prev; min_idx = last; } } // we failed to find a completely free interpreter. heuristically use the // one with the least number of user (note that this may have changed since // then, so this is only a heuristic). __atomic_fetch_add(&uses_[8 * min_idx], 1ULL, __ATOMIC_SEQ_CST); return min_idx; TORCH_DEPLOY_SAFE_CATCH_RETHROW } void LoadBalancer::free(int where) { TORCH_DEPLOY_TRY // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) __atomic_fetch_sub(&uses_[8 * where], 1ULL, __ATOMIC_SEQ_CST); TORCH_DEPLOY_SAFE_CATCH_RETHROW } } // namespace deploy } // namespace torch