From d98fa4a1033079ae19591aff5c3c354617bff482 Mon Sep 17 00:00:00 2001 From: Dylan Maloy Date: Wed, 25 Jun 2025 22:43:40 +0000 Subject: [PATCH] implement SR's storage group planning algorithm (#156715) Summary: att Test Plan: tested on a localnet. it's ~15% worse performance than greedy-by-size, but more performant. local: gbs: 110656b dsg: 131584b local_ro: gbs: 38208 dsg: 44544 Differential Revision: D75653840 Pull Request resolved: https://github.com/pytorch/pytorch/pull/156715 Approved by: https://github.com/zhxchen17 --- build_variables.bzl | 1 + test/cpp/nativert/CMakeLists.txt | 1 + .../test_layout_planner_algorithm.cpp | 23 +++ .../executor/memory/DisjointStorageGroups.cpp | 175 ++++++++++++++++++ .../executor/memory/DisjointStorageGroups.h | 10 + .../executor/memory/LayoutPlannerSettings.h | 1 + 6 files changed, 211 insertions(+) create mode 100644 torch/nativert/executor/memory/DisjointStorageGroups.cpp create mode 100644 torch/nativert/executor/memory/DisjointStorageGroups.h diff --git a/build_variables.bzl b/build_variables.bzl index 788c7820cc6..da49ed05dad 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -617,6 +617,7 @@ libtorch_nativert_sources = [ "torch/nativert/executor/ParallelGraphExecutor.cpp", "torch/nativert/kernels/CallTorchBindKernel.cpp", "torch/nativert/kernels/PrimKernelRegistry.cpp", + "torch/nativert/executor/memory/DisjointStorageGroups.cpp", ] torch_mobile_tracer_sources = [ diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 9f2ad858dfd..81ca5869774 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -20,6 +20,7 @@ set(NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp ${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp ${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp + ${TORCH_ROOT}/torch/nativert/executor/memory/DisjointStorageGroups.cpp ) add_executable(test_nativert diff --git a/test/cpp/nativert/test_layout_planner_algorithm.cpp b/test/cpp/nativert/test_layout_planner_algorithm.cpp index 72b29db861e..0d4f8fb0d27 100644 --- a/test/cpp/nativert/test_layout_planner_algorithm.cpp +++ b/test/cpp/nativert/test_layout_planner_algorithm.cpp @@ -2,6 +2,7 @@ #include #include +#include #include using namespace ::testing; @@ -61,3 +62,25 @@ TEST(LayoutPlannerAlgorithmTests, TestBump) { EXPECT_EQ(result.total_size, offset); } + +TEST(LayoutPlannerAlgorithmTests, TestStorageGroup) { + auto specs = create_test_allocation_specs(); + auto result = DisjointStorageGroupsPlanner(create_test_allocation_specs()); + + auto& allocations = result.allocations; + + EXPECT_EQ(allocations[0].offset, 0); + EXPECT_EQ(allocations[1].offset, 36); + EXPECT_EQ(allocations[2].offset, 0); + EXPECT_EQ(allocations[3].offset, 100); + EXPECT_EQ(allocations[4].offset, 140); + EXPECT_EQ(allocations[5].offset, 36); + EXPECT_EQ(allocations[6].offset, 140); + EXPECT_EQ(allocations[7].offset, 100); + + for (auto&& [i, spec] : c10::enumerate(specs)) { + EXPECT_EQ(allocations[i].size, spec.size); + } + + EXPECT_EQ(result.total_size, 150); +} diff --git a/torch/nativert/executor/memory/DisjointStorageGroups.cpp b/torch/nativert/executor/memory/DisjointStorageGroups.cpp new file mode 100644 index 00000000000..3d9fac55a7c --- /dev/null +++ b/torch/nativert/executor/memory/DisjointStorageGroups.cpp @@ -0,0 +1,175 @@ +#include + +#include + +#include +#include +#include + +namespace { + +using namespace torch::nativert; + +// A StorageGroup represents a collection of allocations that share backing +// storage +class StorageGroup { + public: + // every storage group must contain at least one allocation spec. + explicit StorageGroup(const AllocationSpec* spec) + : max_spec_size_(spec->size), + lifetime_(spec->lifetime), + spec_group_({spec}) {} + + void add_spec(const AllocationSpec* spec) { + spec_group_.push_back(spec); + max_spec_size_ = std::max(max_spec_size_, spec->size); + TORCH_DCHECK_LT(lifetime_.end, spec->lifetime.end); + lifetime_.end = spec->lifetime.end; + is_free_ = false; + } + + const std::vector& spec_group() const { + return spec_group_; + } + + size_t max_spec_size() const { + return max_spec_size_; + } + + size_t num_specs() const { + return spec_group_.size(); + } + + const AllocationLifetime& lifetime() const { + return lifetime_; + } + + bool is_free() const { + return is_free_; + } + + void set_free(bool is_free) { + is_free_ = is_free; + } + + private: + // whether or not this storage group is free + // to add new specs + bool is_free_{false}; + // represents the amount of memory that will be + // allocated for all specs in this group... + size_t max_spec_size_; + // the lifetime of this storage group + AllocationLifetime lifetime_; + // all the specs in this group + std::vector spec_group_; +}; + +} // namespace + +namespace torch::nativert { + +LayoutPlan DisjointStorageGroupsPlanner( + const std::vector& allocation_specs) { + struct CompareAllocationSpecsBySize { + bool operator()(const AllocationSpec* a, const AllocationSpec* b) + const /* noexcept */ + { + return a->size > b->size; + } + }; + + std::vector< + std::multiset> + allocation_indices; + std::vector> deallocation_indices; + + for (const auto& spec : allocation_specs) { + size_t alloc_index = spec.lifetime.start; + size_t dealloc_index = spec.lifetime.end; + + TORCH_DCHECK_LT(alloc_index, dealloc_index); + + if (alloc_index >= allocation_indices.size()) { + allocation_indices.resize(alloc_index + 1); + } + + if (dealloc_index >= deallocation_indices.size()) { + deallocation_indices.resize(dealloc_index + 1); + } + + allocation_indices[alloc_index].insert(&spec); + deallocation_indices[dealloc_index].emplace_back(&spec); + } + + // don't want to invalidate pointers + // so let's make this a list + std::list storage_groups; + // maps each AllocationSpec to its assigned storage group. + c10::FastMap spec_to_storage_group; + // stores the set of storage groups that + // are available for re-use. + std::vector free_storage_groups; + + auto createStorageGroup = [&](const AllocationSpec* spec) { + auto& group = storage_groups.emplace_back(spec); + spec_to_storage_group.emplace(spec, &group); + }; + + auto assignToAvailableStorageGroup = [&](const AllocationSpec* spec) { + DCHECK(!free_storage_groups.empty()); + auto* storage_group = free_storage_groups.back(); + TORCH_DCHECK_NOTNULL(storage_group); + TORCH_DCHECK_EQ(storage_group->is_free(), true); + storage_group->add_spec(spec); + spec_to_storage_group.emplace(spec, storage_group); + free_storage_groups.pop_back(); + }; + + for (const auto i : c10::irange(allocation_indices.size())) { + for (auto* spec : allocation_indices[i]) { + TORCH_DCHECK_NOTNULL(spec); + if (free_storage_groups.empty()) { + createStorageGroup(spec); + } else { + assignToAvailableStorageGroup(spec); + } + } + + if (i < deallocation_indices.size()) { + for (auto* spec : deallocation_indices[i]) { + TORCH_DCHECK_NOTNULL(spec); + auto* storage_group = spec_to_storage_group.at(spec); + if (!storage_group->is_free() && + storage_group->lifetime().end == spec->lifetime.end) { + storage_group->set_free(true); + free_storage_groups.push_back(storage_group); + } + } + } + } + + LayoutPlan plan; + + c10::FastMap storage_group_to_offset; + size_t offset = 0; + for (const auto& storage_group : storage_groups) { + storage_group_to_offset.emplace(&storage_group, offset); + offset += storage_group.max_spec_size(); + } + + plan.total_size = offset; + plan.allocations.reserve(allocation_specs.size()); + + for (const auto& spec : allocation_specs) { + // specs in storage groups lifetime's shouldn't be overlapping + // so we can just set their offset to the offset of the group + plan.allocations.emplace_back(Allocation{ + spec.size, + storage_group_to_offset.at(spec_to_storage_group.at(&spec))}); + } + + return plan; +} + +} // namespace torch::nativert diff --git a/torch/nativert/executor/memory/DisjointStorageGroups.h b/torch/nativert/executor/memory/DisjointStorageGroups.h new file mode 100644 index 00000000000..8131a7000da --- /dev/null +++ b/torch/nativert/executor/memory/DisjointStorageGroups.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace torch::nativert { + +LayoutPlan DisjointStorageGroupsPlanner( + const std::vector& allocation_specs); + +} // namespace torch::nativert diff --git a/torch/nativert/executor/memory/LayoutPlannerSettings.h b/torch/nativert/executor/memory/LayoutPlannerSettings.h index 2c6a75cfd86..8ade27997bd 100644 --- a/torch/nativert/executor/memory/LayoutPlannerSettings.h +++ b/torch/nativert/executor/memory/LayoutPlannerSettings.h @@ -7,6 +7,7 @@ namespace torch::nativert { enum class LayoutPlannerAlgorithmType { Bump, GreedyBySize, + DisjointStorageGroups, }; class LayoutManagerSettings {