From cdfa33a328f2d3fdaeb645fde186194ddc2e66c0 Mon Sep 17 00:00:00 2001 From: dolpm <34420038+dolpm@users.noreply.github.com> Date: Sat, 14 Jun 2025 03:28:55 +0000 Subject: [PATCH] [nativert] move execution frame to torch (#155830) Summary: att Test Plan: ci Rollback Plan: Differential Revision: D76369008 Pull Request resolved: https://github.com/pytorch/pytorch/pull/155830 Approved by: https://github.com/zhxchen17 --- build_variables.bzl | 1 + test/cpp/nativert/CMakeLists.txt | 1 + test/cpp/nativert/test_execution_frame.cpp | 96 ++++++++++++++ torch/nativert/executor/ExecutionFrame.cpp | 145 +++++++++++++++++++++ torch/nativert/executor/ExecutionFrame.h | 139 ++++++++++++++++++++ 5 files changed, 382 insertions(+) create mode 100644 test/cpp/nativert/test_execution_frame.cpp create mode 100644 torch/nativert/executor/ExecutionFrame.cpp create mode 100644 torch/nativert/executor/ExecutionFrame.h diff --git a/build_variables.bzl b/build_variables.bzl index 288d0dd8546..ee29c5bdf81 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -599,6 +599,7 @@ libtorch_nativert_sources = [ "torch/nativert/executor/DelegateExecutor.cpp", "torch/nativert/executor/Placement.cpp", "torch/nativert/executor/ExecutionPlanner.cpp", + "torch/nativert/executor/ExecutionFrame.cpp", "torch/nativert/executor/PlacementUtils.cpp", "torch/nativert/executor/Weights.cpp", "torch/nativert/executor/memory/FunctionSchema.cpp", diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index d68e0f0eed8..521e56f8980 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -15,6 +15,7 @@ set(NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp ${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp ${TORCH_ROOT}/torch/nativert/detail/ITree.cpp + ${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp ) add_executable(test_nativert diff --git a/test/cpp/nativert/test_execution_frame.cpp b/test/cpp/nativert/test_execution_frame.cpp new file mode 100644 index 00000000000..ab5fa2e146d --- /dev/null +++ b/test/cpp/nativert/test_execution_frame.cpp @@ -0,0 +1,96 @@ +#include +#include + +namespace torch::nativert { + +TEST(ExecutionFrameTest, CreateFrame) { + auto graph = stringToGraph(R"( + graph(%x, %y): + %a = foo(a=%x, b=%y) + %b = foo1(a=%x, b=%y) + %c = foo2(c=%a, d=%b) + return(%c) + )"); + + auto frame = ExecutionFrame(*graph); + + for (auto* v : graph->values()) { + frame.setIValue(v->id(), c10::IValue(at::tensor({v->id()}, at::kInt))); + auto& frame_v = frame.getIValue(v->id()); + EXPECT_EQ(frame_v.tagKind(), "Tensor"); + } + + auto outputs = frame.tryMoveUserOutputs(); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_EQ(outputs[0].tagKind(), "Tensor"); + EXPECT_EQ(outputs[0].toTensor().item().toInt(), graph->getValue("c")->id()); +} + +TEST(ExecutionFrameTest, TestSetBorrowedValue) { + auto graph = stringToGraph(R"( + graph(%x, %y): + %a = foo(a=%x, b=%y) + %b = foo1(a=%x, b=%y) + %c = foo2(c=%a, d=%b) + return(%c) + )"); + + auto x = c10::IValue(at::tensor({1}, at::kInt)); + auto y = c10::IValue(at::tensor({2}, at::kInt)); + + { + auto frame = ExecutionFrame(*graph); + + frame.setBorrowedIValue( + graph->getValue("x")->id(), + c10::MaybeOwnedTraits::createBorrow(x)); + frame.setBorrowedIValue( + graph->getValue("y")->id(), + c10::MaybeOwnedTraits::createBorrow(y)); + + [[maybe_unused]] auto& w = frame.getIValue(graph->getValue("x")->id()); + [[maybe_unused]] auto& z = frame.getIValue(graph->getValue("y")->id()); + + EXPECT_EQ(x.use_count(), 1); + EXPECT_EQ(y.use_count(), 1); + + EXPECT_TRUE(c10::MaybeOwnedTraits{}.debugBorrowIsValid( + frame.getIValue(graph->getValue("x")->id()))); + EXPECT_TRUE(c10::MaybeOwnedTraits{}.debugBorrowIsValid( + frame.getIValue(graph->getValue("y")->id()))); + } + + EXPECT_EQ(x.use_count(), 1); + EXPECT_EQ(y.use_count(), 1); +} + +TEST(ExecutionFrameTest, TestPersistentValue) { + auto graph = stringToGraph(R"( + graph(%x, %y, %my_weight): + %a = foo(a=%x, b=%y) + %b = foo1(a=%x, b=%y) + %c = foo2(c=%a, d=%b) + return(%c) + )"); + + Weights weights(graph.get()); + weights.setValue("my_weight", at::tensor({1}, at::kInt)); + + auto new_sig = graph->signature(); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + const_cast>&>( + new_sig.inputsToWeights()) + .emplace_back("my_weight", "my_weight"); + graph->setSignature(new_sig); + + auto frame = ExecutionFrame(*graph, weights); + + EXPECT_EQ(frame.weightVersion(), 0); + auto wid = graph->getValue("my_weight")->id(); + + EXPECT_NO_THROW(frame.getTensor(wid)); + EXPECT_DEATH(frame.releaseValue(wid), "Cannot release persistent value"); +} + +} // namespace torch::nativert diff --git a/torch/nativert/executor/ExecutionFrame.cpp b/torch/nativert/executor/ExecutionFrame.cpp new file mode 100644 index 00000000000..2aa11e6eaba --- /dev/null +++ b/torch/nativert/executor/ExecutionFrame.cpp @@ -0,0 +1,145 @@ +#include +#include + +#include +#include + +namespace torch::nativert { + +ExecutionFrame::ExecutionFrame(const Graph& graph) + : graph_(graph), + allValues_(graph.numValues()), + persistent_(graph.numValues()), + moveable_output_mask_(graph.userOutputs().size()) { + // load constant SymInts into execution frame + for (const auto& [valueId, constSymintValue] : + graph_.getConstantSymIntValues()) { + setPersistentIValue(valueId, constSymintValue); + } + + for (const Node& node : graph_.nodes()) { + if (node.target() == "torch.ops.higher_order.run_const_graph") { + const auto& const_graph = + std::get>(node.attributes().at(0).value); + for (size_t i = 0; i < node.outputs().size(); ++i) { + foldedConstIds_[std::string{const_graph->outputs().at(i)->name()}] = + node.outputs()[i]->id(); + } + } + } +} + +ExecutionFrame::ExecutionFrame(const Graph& graph, const Weights& weights) + : ExecutionFrame(graph) { + setWeights(weights); +} + +void ExecutionFrame::setWeights(const Weights& weights) { + weightVersion_ = weights.version(); + + const auto& inputsToWeights = graph_.signature().inputsToWeights(); + for (const auto& [inputName, weightName] : inputsToWeights) { + const Value* value = graph_.getValue(inputName); + setPersistentIValue(value->id(), weights.at(weightName)); + } + + const auto& inputsToCustomObjs = graph_.signature().inputsToCustomObjs(); + for (const auto& [inputName, customObjName] : inputsToCustomObjs) { + const Value* value = graph_.getValue(inputName); + setPersistentIValue(value->id(), weights.getCustomObj(customObjName)); + } + + for (const auto& [value, tensor] : weights.getFoldedConsts()) { + setPersistentIValue(foldedConstIds_.at(value), tensor); + } + + for (const auto& [n, iv] : weights.getConstFoldedValues()) { + const Value* v = graph_.getValue(n); + setPersistentIValue(v->id(), iv); + } + + updateMovableOutputs(); +} + +void ExecutionFrame::updateMovableOutputs() { + moveable_output_mask_.assign(moveable_output_mask_.size(), true); + + c10::FastSet inputs; + for (const auto* input : graph_.userInputs()) { + if (input) { + inputs.insert(input->id()); + } + } + + const auto& outputs = graph_.userOutputs(); + const size_t num_outputs = outputs.size(); + + c10::FastSet seen; + for (size_t i = 0; i < num_outputs; i++) { + auto idx = num_outputs - 1 - i; + if (const Value* const* valuePtr = std::get_if(&outputs[idx]); + valuePtr && *valuePtr) { + auto id = (*valuePtr)->id(); + + /* + values are not moveable if: + 1. they are persistent + 2. they are inputs (since inputs are borrowed) + 3. the value will be moved in a later (right-more) output + */ + + if (!seen.insert(id).second || persistent_[id] || + inputs.find(id) != inputs.end()) { + moveable_output_mask_[idx] = false; + } + } + } +} + +ExecutionFrame::ExecutionFrame( + const Graph& graph, + size_t numValues, + const std::vector&, + const std::vector&) + : graph_(graph) { + allValues_.resize(numValues); +} + +void ExecutionFrame::setIValue(ValueId id, c10::IValue ivalue) { + DCHECK(static_cast(id) < allValues_.size()); + allValues_[id] = std::move(ivalue); +} + +void ExecutionFrame::setBorrowedIValue(ValueId id, c10::IValue ivalue) { + DCHECK(static_cast(id) < allValues_.size()); + borrowedValueIds_.push_back(id); + allValues_[id] = std::move(ivalue); +} + +at::Tensor ExecutionFrame::getTensor(ValueId id) const { + const auto& ivalue = getIValue(id); + if (C10_LIKELY(ivalue.isTensor())) { + return ivalue.toTensor(); + } else { + throw std::runtime_error("getTensor called on non-tensor value"); + } +} + +std::vector ExecutionFrame::tryMoveUserOutputs() { + std::vector ret; + const auto& outputs = graph_.userOutputs(); + ret.reserve(outputs.size()); + for (const auto& [i, outputValue] : c10::enumerate(outputs)) { + if (const Value* const* valuePtr = std::get_if(&outputValue); + valuePtr && *valuePtr) { + ret.push_back( + isOutputMovable(i) ? moveIValue((*valuePtr)->id()) + : getIValue((*valuePtr)->id())); + } else if (Constant const* constant = std::get_if(&outputValue)) { + ret.push_back(constantToIValue(*constant)); + } + } + return ret; +} + +} // namespace torch::nativert diff --git a/torch/nativert/executor/ExecutionFrame.h b/torch/nativert/executor/ExecutionFrame.h new file mode 100644 index 00000000000..bebf1cfa5b9 --- /dev/null +++ b/torch/nativert/executor/ExecutionFrame.h @@ -0,0 +1,139 @@ +#pragma once + +#include + +#include +#include +#include + +#include + +namespace torch::nativert { + +/** + * This class encapsulate the stateful values of an execution, + * most notably, the tensor values passed between nodes, aka intermediate + * activations. + */ +class ExecutionFrame { + public: + // Constructor for weight-less graph, used for higher order ops, e.g. + // torch.cond + explicit ExecutionFrame(const Graph& graph); + + explicit ExecutionFrame(const Graph& graph, const Weights& weights); + + // Constructor for testing purpose + explicit ExecutionFrame( + const Graph& graph, + size_t numValues, + const std::vector& graphInputIds, + const std::vector& graphOutputIds); + + ~ExecutionFrame() { + destroyBorrowedIValues(); + } + + std::vector tryMoveUserOutputs(); + + c10::IValue moveIValue(ValueId id) { + return std::move(allValues_[id]); + } + + const c10::IValue& getIValue(ValueId id, bool allowNone = true) const { + const auto& iValue = allValues_[id]; + if (allowNone && iValue.isNone()) { + return iValue; + } + DCHECK(!iValue.isNone()); + return iValue; + } + + c10::IValue& getIValue(ValueId id, bool allowNone = true) { + auto& iValue = allValues_[id]; + if (allowNone && iValue.isNone()) { + return iValue; + } + DCHECK(!iValue.isNone()); + return iValue; + } + + void setIValue(ValueId id, c10::IValue ivalue); + void setBorrowedIValue(ValueId id, c10::IValue ivalue); + + at::Tensor getTensor(ValueId id) const; + + std::vector getTensorVector(ValueId id) const { + return getIValue(id).toTensorVector(); + } + + int64_t getSymInt(ValueId id) const { + return getIValue(id).toInt(); + } + + double getSymFloat(ValueId id) const { + return getIValue(id).toDouble(); + } + + void setPersistentIValue(ValueId id, c10::IValue ivalue) { + setIValue(id, std::move(ivalue)); + persistent_[id] = true; + } + + void releaseValue(ValueId id) { + CHECK(!persistent_[id]) << "Cannot release persistent value"; + allValues_[id] = c10::IValue(); + } + + void destroyBorrowedIValues() { + for (const auto& id : borrowedValueIds_) { + c10::MaybeOwnedTraits::destroyBorrow(getIValue(id)); + } + borrowedValueIds_.clear(); + } + + void setWork(int64_t workId, const c10::intrusive_ptr& work) { + work_[workId] = work; + } + + c10::intrusive_ptr getWork(int64_t workId) const { + CHECK(work_.find(workId) != work_.end()) + << "Couldn't find work with Id: " << workId; + return work_.at(workId); + } + + WeightVersion weightVersion() const { + return weightVersion_; + } + + void setWeights(const Weights& weights); + + private: + bool isOutputMovable(size_t idx) const { + TORCH_CHECK_LT(idx, moveable_output_mask_.size()); + return moveable_output_mask_[idx]; + } + void updateMovableOutputs(); + + const Graph& graph_; + WeightVersion weightVersion_ = -1; + + // All the intermediate values for the entire graph, including graph inputs + // and outputs This table is fixed once constructed + std::vector allValues_; + std::vector persistent_; + + std::unordered_map> work_; + + std::vector borrowedValueIds_; + + std::unordered_map foldedConstIds_; + + // moveable_output_mask_[i] corresponds to user_outputs_[i] + // + // if moveable_output_mask_[i] is true, then user_outputs_[i] + // can be moved + std::vector moveable_output_mask_; +}; + +} // namespace torch::nativert