From 2e440e39a6cc99057b01051aa8ef565e08ac67d1 Mon Sep 17 00:00:00 2001 From: Shangdi Yu Date: Wed, 14 May 2025 15:26:50 +0000 Subject: [PATCH] [nativert] Move Placement to pytorch core (#152953) Summary: Move Placement to pytorch core. Using `torch::nativert::isSameDevice` explicitly in code to avoid confusion with the `isSameDevice` in torch namespace. Test Plan: ``` buck run fbcode//mode/dev-nosan //caffe2/test/cpp/nativert:placement_test ./bin/test_nativert ``` OSS and internal CI Differential Revision: D74190745 Pull Request resolved: https://github.com/pytorch/pytorch/pull/152953 Approved by: https://github.com/Skylion007, https://github.com/swolchok, https://github.com/zhxchen17, https://github.com/cyyever --- build_variables.bzl | 2 + test/cpp/nativert/CMakeLists.txt | 2 + test/cpp/nativert/test_placement.cpp | 104 +++++++++++++++++++++ torch/nativert/executor/Placement.cpp | 61 ++++++++++++ torch/nativert/executor/Placement.h | 57 +++++++++++ torch/nativert/executor/PlacementUtils.cpp | 37 ++++++++ 6 files changed, 263 insertions(+) create mode 100644 test/cpp/nativert/test_placement.cpp create mode 100644 torch/nativert/executor/Placement.cpp create mode 100644 torch/nativert/executor/Placement.h create mode 100644 torch/nativert/executor/PlacementUtils.cpp diff --git a/build_variables.bzl b/build_variables.bzl index 02d12c17000..7cac3da1210 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -590,6 +590,8 @@ libtorch_core_jit_sources = sorted(jit_sources_full) libtorch_nativert_sources = [ "torch/nativert/graph/TensorMeta.cpp", + "torch/nativert/executor/Placement.cpp", + "torch/nativert/executor/PlacementUtils.cpp", ] torch_mobile_tracer_sources = [ diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 48c4ed7adf5..f702e996836 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -6,6 +6,7 @@ file(GLOB_RECURSE NATIVERT_ALL_TEST_FILES "${NATIVERT_TEST_ROOT}/test_*.cpp") set(NATIVERT_TEST_SRCS ${NATIVERT_ALL_TEST_FILES} ${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp + ${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp ) add_executable(test_nativert @@ -19,6 +20,7 @@ target_compile_definitions(test_nativert PRIVATE USE_GTEST) set(NATIVERT_TEST_DEPENDENCIES torch gtest) target_link_libraries(test_nativert PRIVATE ${NATIVERT_TEST_DEPENDENCIES}) +target_link_libraries(test_nativert PRIVATE fmt::fmt-header-only) target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE}) if(USE_CUDA) diff --git a/test/cpp/nativert/test_placement.cpp b/test/cpp/nativert/test_placement.cpp new file mode 100644 index 00000000000..e88ae20e1de --- /dev/null +++ b/test/cpp/nativert/test_placement.cpp @@ -0,0 +1,104 @@ + +#include +#include +#include + +#include + +using namespace ::testing; + +namespace torch::nativert { +TEST(PlacementTest, NormalizeDevice) { + c10::Device cpuDevice = c10::Device(c10::DeviceType::CPU); + c10::Device cpuDevice1 = c10::Device(c10::DeviceType::CPU); + cpuDevice1.set_index(1); + + EXPECT_EQ(normalizeDevice(cpuDevice), cpuDevice); + EXPECT_NE(normalizeDevice(cpuDevice1), cpuDevice1); + + c10::Device cudaDevice = c10::Device(c10::DeviceType::CUDA); + c10::Device cudaDevice1 = c10::Device(c10::DeviceType::CUDA, 1); + EXPECT_EQ(normalizeDevice(cudaDevice), c10::Device(c10::DeviceType::CUDA, 0)); + EXPECT_EQ( + normalizeDevice(cudaDevice1), c10::Device(c10::DeviceType::CUDA, 1)); + + EXPECT_NE( + normalizeDevice(cudaDevice1), c10::Device(c10::DeviceType::CUDA, 0)); +} + +TEST(PlacementTest, IsSameDevice) { + c10::Device cpuDevice = c10::Device(c10::DeviceType::CPU); + c10::Device cpuDevice1 = c10::Device(c10::DeviceType::CPU); + cpuDevice1.set_index(1); + + EXPECT_TRUE(isSameDevice(cpuDevice, cpuDevice)); + EXPECT_TRUE(isSameDevice(cpuDevice, cpuDevice1)); + + c10::Device cudaDevice = c10::Device(c10::DeviceType::CUDA); + c10::Device cudaDevice0 = c10::Device(c10::DeviceType::CUDA, 0); + c10::Device cudaDevice1 = c10::Device(c10::DeviceType::CUDA, 1); + EXPECT_TRUE(isSameDevice(cudaDevice, cudaDevice0)); + EXPECT_FALSE(isSameDevice(cudaDevice0, cudaDevice1)); + + EXPECT_FALSE(isSameDevice(cudaDevice0, cpuDevice)); +} + +TEST(PlacementTest, PlacementDefaultOnly) { + Placement placement(c10::Device(c10::DeviceType::CUDA, 0)); + + std::ostringstream os; + os << placement; + EXPECT_EQ(os.str(), "|cuda:0"); + + c10::Device cuda0 = c10::Device(c10::DeviceType::CUDA, 0); + c10::Device cuda1 = c10::Device(c10::DeviceType::CUDA, 1); + c10::Device cuda2 = c10::Device(c10::DeviceType::CUDA, 2); + + EXPECT_EQ(placement.getMappedDevice(cuda0), cuda0); + EXPECT_EQ(placement.getMappedDevice(cuda1), cuda0); + EXPECT_EQ(placement.getMappedDevice(cuda2), cuda0); +} + +TEST(PlacementTest, PlacementBasic) { + Placement placement( + {{c10::Device(c10::DeviceType::CPU), c10::Device(c10::DeviceType::CPU)}, + {c10::Device(c10::DeviceType::CUDA, 0), + c10::Device(c10::DeviceType::CUDA, 1)}, + {c10::Device(c10::DeviceType::CUDA, 1), + c10::Device(c10::DeviceType::CUDA, 2)}}, + c10::Device(c10::DeviceType::CUDA, 0)); + + std::ostringstream os; + os << placement; + EXPECT_EQ(os.str(), "cpu|cpu,cuda:0|cuda:1,cuda:1|cuda:2,|cuda:0"); + + c10::Device cpu = c10::Device(c10::DeviceType::CPU); + c10::Device cuda0 = c10::Device(c10::DeviceType::CUDA, 0); + c10::Device cuda1 = c10::Device(c10::DeviceType::CUDA, 1); + c10::Device cuda2 = c10::Device(c10::DeviceType::CUDA, 2); + c10::Device cuda3 = c10::Device(c10::DeviceType::CUDA, 3); + + EXPECT_EQ(placement.getMappedDevice(cpu), cpu); + EXPECT_EQ(placement.getMappedDevice(cuda0), cuda1); + EXPECT_EQ(placement.getMappedDevice(cuda1), cuda2); + EXPECT_EQ(placement.getMappedDevice(cuda2), cuda0); + EXPECT_EQ(placement.getMappedDevice(cuda3), cuda0); +} + +TEST(PlacementTest, Placement) { + std::unordered_map deviceMap1 = { + {c10::Device("cuda:0"), c10::Device("cuda:1")}}; + Placement p1(deviceMap1); + EXPECT_EQ(p1.getMappedDevice(c10::Device("cpu")), c10::Device("cpu")); + EXPECT_EQ(p1.getMappedDevice(c10::Device("cuda")), c10::Device("cuda:1")); + EXPECT_EQ(p1.getMappedDevice(c10::Device("cuda:0")), c10::Device("cuda:1")); + + std::unordered_map deviceMap2 = { + {c10::Device("cpu"), c10::Device("cuda")}}; + Placement p2(deviceMap2); + EXPECT_EQ(p2.getMappedDevice(c10::Device("cpu")), c10::Device("cuda:0")); + EXPECT_EQ(p2.getMappedDevice(c10::Device("cuda:0")), c10::Device("cuda:0")); + EXPECT_EQ(p2.getMappedDevice(c10::Device("cuda:1")), c10::Device("cuda:1")); +} + +} // namespace torch::nativert diff --git a/torch/nativert/executor/Placement.cpp b/torch/nativert/executor/Placement.cpp new file mode 100644 index 00000000000..be8b6e6df96 --- /dev/null +++ b/torch/nativert/executor/Placement.cpp @@ -0,0 +1,61 @@ +#include + +#include +#include + +namespace torch::nativert { + +std::ostream& operator<<(std::ostream& os, const Placement& placement) { + std::vector> sorted_keys; + sorted_keys.reserve(placement.deviceMap_.size()); + for (const auto& pair : placement.deviceMap_) { + sorted_keys.emplace_back(pair.first.str(), pair.first); + } + std::sort( + sorted_keys.begin(), sorted_keys.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + + bool first = true; + for (const auto& pair : sorted_keys) { + if (!first) { + fmt::print(os, ","); + } + first = false; + const auto& key = pair.second; + const auto& value = placement.deviceMap_.at(key); + fmt::print(os, "{}|{}", pair.first, value.str()); + } + if (placement.defaultDevice_.has_value()) { + fmt::print(os, "{}|{}", first ? "" : ",", placement.defaultDevice_->str()); + } + return os; +} + +Placement::Placement(std::optional defaultDevice) + : Placement({}, defaultDevice) {} + +Placement::Placement( + const std::unordered_map& deviceMap, + std::optional defaultDevice) { + for (const auto& [srcDevice, dstDevice] : deviceMap) { + deviceMap_.try_emplace( + normalizeDevice(srcDevice), normalizeDevice(dstDevice)); + } + if (defaultDevice.has_value()) { + defaultDevice_ = normalizeDevice(defaultDevice.value()); + } +} + +c10::Device Placement::getMappedDevice(const c10::Device& srcDevice) const { + auto it = deviceMap_.find(normalizeDevice(srcDevice)); + if (it != deviceMap_.end()) { + return it->second; + } + if (defaultDevice_.has_value()) { + return defaultDevice_.value(); + } + return srcDevice; +} + +} // namespace torch::nativert diff --git a/torch/nativert/executor/Placement.h b/torch/nativert/executor/Placement.h new file mode 100644 index 00000000000..9f9a2c627d2 --- /dev/null +++ b/torch/nativert/executor/Placement.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include +#include + +namespace torch::nativert { + +/** + * This function returns a normalized version of the input device: + * - For CPU devices, the returned device will have no index (i.e., the default + * CPU device). + * - For CUDA devices, if no index is specified, index 0 is assumed. + * - For other device types, the function will raise an error. + * + * @param device The input c10::Device to normalize. + * @return A normalized c10::Device with standardized indexing. + * + * @throws c10::Error If the device type is not CPU or CUDA. + */ + +c10::Device normalizeDevice(const c10::Device& device); + +/** + * Returns true if the two devices are the same and has the same device index + * (if cuda). + */ +bool isSameDevice(const c10::Device& device1, const c10::Device& device2); + +/** + * @brief A utility class for managing device placement mappings. + * + * The Placement class provides a way to map source devices to target devices. + * It supports both explicit per-device mappings and a default device fallback. + * This is the argument taken in NativeRT to map from model artifact device to + * the device it should run on. + */ +struct TORCH_API Placement { + Placement() = default; + explicit Placement(std::optional defaultDevice); + explicit Placement( + const std::unordered_map& deviceMap, + std::optional defaultDevice = std::nullopt); + c10::Device getMappedDevice(const c10::Device& srcDevice) const; + + TORCH_API friend std::ostream& operator<<( + std::ostream& os, + const Placement& obj); + + protected: + std::unordered_map deviceMap_; + std::optional defaultDevice_; +}; + +} // namespace torch::nativert diff --git a/torch/nativert/executor/PlacementUtils.cpp b/torch/nativert/executor/PlacementUtils.cpp new file mode 100644 index 00000000000..988c9997ed0 --- /dev/null +++ b/torch/nativert/executor/PlacementUtils.cpp @@ -0,0 +1,37 @@ +#include + +#include + +namespace torch::nativert { + +c10::Device normalizeDevice(const c10::Device& device) { + // cpu device doesn't have index + // cuda device index must have a index + if (device.is_cpu()) { + return c10::Device(c10::DeviceType::CPU); + } else if (device.is_cuda()) { + return c10::Device( + c10::DeviceType::CUDA, + device.has_index() ? device.index() : static_cast(0)); + } else { + TORCH_CHECK(false, "Unsupported device type", device); + } +} + +bool isSameDevice(const c10::Device& a, const c10::Device& b) { + if (a.is_cpu()) { + return b.is_cpu(); + } + if (a.is_cuda()) { + if (b.is_cuda()) { + auto aIndex = a.has_index() ? a.index() : 0; + auto bIndex = b.has_index() ? b.index() : 0; + return aIndex == bIndex; + } else { + return false; + } + } + TORCH_CHECK(false, "Unsupported device type", a, " and ", b); + return false; +} +} // namespace torch::nativert