mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[nativert] Move Placement to pytorch core (#152953)
Summary: Move Placement to pytorch core. Using `torch::nativert::isSameDevice` explicitly in code to avoid confusion with the `isSameDevice` in torch namespace. Test Plan: ``` buck run fbcode//mode/dev-nosan //caffe2/test/cpp/nativert:placement_test ./bin/test_nativert ``` OSS and internal CI Differential Revision: D74190745 Pull Request resolved: https://github.com/pytorch/pytorch/pull/152953 Approved by: https://github.com/Skylion007, https://github.com/swolchok, https://github.com/zhxchen17, https://github.com/cyyever
This commit is contained in:
parent
ced90d23d3
commit
2e440e39a6
|
|
@ -590,6 +590,8 @@ libtorch_core_jit_sources = sorted(jit_sources_full)
|
||||||
|
|
||||||
libtorch_nativert_sources = [
|
libtorch_nativert_sources = [
|
||||||
"torch/nativert/graph/TensorMeta.cpp",
|
"torch/nativert/graph/TensorMeta.cpp",
|
||||||
|
"torch/nativert/executor/Placement.cpp",
|
||||||
|
"torch/nativert/executor/PlacementUtils.cpp",
|
||||||
]
|
]
|
||||||
|
|
||||||
torch_mobile_tracer_sources = [
|
torch_mobile_tracer_sources = [
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ file(GLOB_RECURSE NATIVERT_ALL_TEST_FILES "${NATIVERT_TEST_ROOT}/test_*.cpp")
|
||||||
set(NATIVERT_TEST_SRCS
|
set(NATIVERT_TEST_SRCS
|
||||||
${NATIVERT_ALL_TEST_FILES}
|
${NATIVERT_ALL_TEST_FILES}
|
||||||
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
|
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
|
||||||
|
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
add_executable(test_nativert
|
add_executable(test_nativert
|
||||||
|
|
@ -19,6 +20,7 @@ target_compile_definitions(test_nativert PRIVATE USE_GTEST)
|
||||||
set(NATIVERT_TEST_DEPENDENCIES torch gtest)
|
set(NATIVERT_TEST_DEPENDENCIES torch gtest)
|
||||||
|
|
||||||
target_link_libraries(test_nativert PRIVATE ${NATIVERT_TEST_DEPENDENCIES})
|
target_link_libraries(test_nativert PRIVATE ${NATIVERT_TEST_DEPENDENCIES})
|
||||||
|
target_link_libraries(test_nativert PRIVATE fmt::fmt-header-only)
|
||||||
target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE})
|
target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE})
|
||||||
|
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
|
|
|
||||||
104
test/cpp/nativert/test_placement.cpp
Normal file
104
test/cpp/nativert/test_placement.cpp
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
|
||||||
|
#include <c10/core/Device.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include <torch/nativert/executor/Placement.h>
|
||||||
|
|
||||||
|
using namespace ::testing;
|
||||||
|
|
||||||
|
namespace torch::nativert {
|
||||||
|
TEST(PlacementTest, NormalizeDevice) {
|
||||||
|
c10::Device cpuDevice = c10::Device(c10::DeviceType::CPU);
|
||||||
|
c10::Device cpuDevice1 = c10::Device(c10::DeviceType::CPU);
|
||||||
|
cpuDevice1.set_index(1);
|
||||||
|
|
||||||
|
EXPECT_EQ(normalizeDevice(cpuDevice), cpuDevice);
|
||||||
|
EXPECT_NE(normalizeDevice(cpuDevice1), cpuDevice1);
|
||||||
|
|
||||||
|
c10::Device cudaDevice = c10::Device(c10::DeviceType::CUDA);
|
||||||
|
c10::Device cudaDevice1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||||
|
EXPECT_EQ(normalizeDevice(cudaDevice), c10::Device(c10::DeviceType::CUDA, 0));
|
||||||
|
EXPECT_EQ(
|
||||||
|
normalizeDevice(cudaDevice1), c10::Device(c10::DeviceType::CUDA, 1));
|
||||||
|
|
||||||
|
EXPECT_NE(
|
||||||
|
normalizeDevice(cudaDevice1), c10::Device(c10::DeviceType::CUDA, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PlacementTest, IsSameDevice) {
|
||||||
|
c10::Device cpuDevice = c10::Device(c10::DeviceType::CPU);
|
||||||
|
c10::Device cpuDevice1 = c10::Device(c10::DeviceType::CPU);
|
||||||
|
cpuDevice1.set_index(1);
|
||||||
|
|
||||||
|
EXPECT_TRUE(isSameDevice(cpuDevice, cpuDevice));
|
||||||
|
EXPECT_TRUE(isSameDevice(cpuDevice, cpuDevice1));
|
||||||
|
|
||||||
|
c10::Device cudaDevice = c10::Device(c10::DeviceType::CUDA);
|
||||||
|
c10::Device cudaDevice0 = c10::Device(c10::DeviceType::CUDA, 0);
|
||||||
|
c10::Device cudaDevice1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||||
|
EXPECT_TRUE(isSameDevice(cudaDevice, cudaDevice0));
|
||||||
|
EXPECT_FALSE(isSameDevice(cudaDevice0, cudaDevice1));
|
||||||
|
|
||||||
|
EXPECT_FALSE(isSameDevice(cudaDevice0, cpuDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PlacementTest, PlacementDefaultOnly) {
|
||||||
|
Placement placement(c10::Device(c10::DeviceType::CUDA, 0));
|
||||||
|
|
||||||
|
std::ostringstream os;
|
||||||
|
os << placement;
|
||||||
|
EXPECT_EQ(os.str(), "|cuda:0");
|
||||||
|
|
||||||
|
c10::Device cuda0 = c10::Device(c10::DeviceType::CUDA, 0);
|
||||||
|
c10::Device cuda1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||||
|
c10::Device cuda2 = c10::Device(c10::DeviceType::CUDA, 2);
|
||||||
|
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda0), cuda0);
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda1), cuda0);
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda2), cuda0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PlacementTest, PlacementBasic) {
|
||||||
|
Placement placement(
|
||||||
|
{{c10::Device(c10::DeviceType::CPU), c10::Device(c10::DeviceType::CPU)},
|
||||||
|
{c10::Device(c10::DeviceType::CUDA, 0),
|
||||||
|
c10::Device(c10::DeviceType::CUDA, 1)},
|
||||||
|
{c10::Device(c10::DeviceType::CUDA, 1),
|
||||||
|
c10::Device(c10::DeviceType::CUDA, 2)}},
|
||||||
|
c10::Device(c10::DeviceType::CUDA, 0));
|
||||||
|
|
||||||
|
std::ostringstream os;
|
||||||
|
os << placement;
|
||||||
|
EXPECT_EQ(os.str(), "cpu|cpu,cuda:0|cuda:1,cuda:1|cuda:2,|cuda:0");
|
||||||
|
|
||||||
|
c10::Device cpu = c10::Device(c10::DeviceType::CPU);
|
||||||
|
c10::Device cuda0 = c10::Device(c10::DeviceType::CUDA, 0);
|
||||||
|
c10::Device cuda1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||||
|
c10::Device cuda2 = c10::Device(c10::DeviceType::CUDA, 2);
|
||||||
|
c10::Device cuda3 = c10::Device(c10::DeviceType::CUDA, 3);
|
||||||
|
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cpu), cpu);
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda0), cuda1);
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda1), cuda2);
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda2), cuda0);
|
||||||
|
EXPECT_EQ(placement.getMappedDevice(cuda3), cuda0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PlacementTest, Placement) {
|
||||||
|
std::unordered_map<c10::Device, c10::Device> deviceMap1 = {
|
||||||
|
{c10::Device("cuda:0"), c10::Device("cuda:1")}};
|
||||||
|
Placement p1(deviceMap1);
|
||||||
|
EXPECT_EQ(p1.getMappedDevice(c10::Device("cpu")), c10::Device("cpu"));
|
||||||
|
EXPECT_EQ(p1.getMappedDevice(c10::Device("cuda")), c10::Device("cuda:1"));
|
||||||
|
EXPECT_EQ(p1.getMappedDevice(c10::Device("cuda:0")), c10::Device("cuda:1"));
|
||||||
|
|
||||||
|
std::unordered_map<c10::Device, c10::Device> deviceMap2 = {
|
||||||
|
{c10::Device("cpu"), c10::Device("cuda")}};
|
||||||
|
Placement p2(deviceMap2);
|
||||||
|
EXPECT_EQ(p2.getMappedDevice(c10::Device("cpu")), c10::Device("cuda:0"));
|
||||||
|
EXPECT_EQ(p2.getMappedDevice(c10::Device("cuda:0")), c10::Device("cuda:0"));
|
||||||
|
EXPECT_EQ(p2.getMappedDevice(c10::Device("cuda:1")), c10::Device("cuda:1"));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace torch::nativert
|
||||||
61
torch/nativert/executor/Placement.cpp
Normal file
61
torch/nativert/executor/Placement.cpp
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
#include <torch/nativert/executor/Placement.h>
|
||||||
|
|
||||||
|
#include <fmt/ostream.h>
|
||||||
|
#include <ostream>
|
||||||
|
|
||||||
|
namespace torch::nativert {
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, const Placement& placement) {
|
||||||
|
std::vector<std::pair<std::string, c10::Device>> sorted_keys;
|
||||||
|
sorted_keys.reserve(placement.deviceMap_.size());
|
||||||
|
for (const auto& pair : placement.deviceMap_) {
|
||||||
|
sorted_keys.emplace_back(pair.first.str(), pair.first);
|
||||||
|
}
|
||||||
|
std::sort(
|
||||||
|
sorted_keys.begin(), sorted_keys.end(), [](const auto& a, const auto& b) {
|
||||||
|
return a.first < b.first;
|
||||||
|
});
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (const auto& pair : sorted_keys) {
|
||||||
|
if (!first) {
|
||||||
|
fmt::print(os, ",");
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
const auto& key = pair.second;
|
||||||
|
const auto& value = placement.deviceMap_.at(key);
|
||||||
|
fmt::print(os, "{}|{}", pair.first, value.str());
|
||||||
|
}
|
||||||
|
if (placement.defaultDevice_.has_value()) {
|
||||||
|
fmt::print(os, "{}|{}", first ? "" : ",", placement.defaultDevice_->str());
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
Placement::Placement(std::optional<c10::Device> defaultDevice)
|
||||||
|
: Placement({}, defaultDevice) {}
|
||||||
|
|
||||||
|
Placement::Placement(
|
||||||
|
const std::unordered_map<c10::Device, c10::Device>& deviceMap,
|
||||||
|
std::optional<c10::Device> defaultDevice) {
|
||||||
|
for (const auto& [srcDevice, dstDevice] : deviceMap) {
|
||||||
|
deviceMap_.try_emplace(
|
||||||
|
normalizeDevice(srcDevice), normalizeDevice(dstDevice));
|
||||||
|
}
|
||||||
|
if (defaultDevice.has_value()) {
|
||||||
|
defaultDevice_ = normalizeDevice(defaultDevice.value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c10::Device Placement::getMappedDevice(const c10::Device& srcDevice) const {
|
||||||
|
auto it = deviceMap_.find(normalizeDevice(srcDevice));
|
||||||
|
if (it != deviceMap_.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
if (defaultDevice_.has_value()) {
|
||||||
|
return defaultDevice_.value();
|
||||||
|
}
|
||||||
|
return srcDevice;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace torch::nativert
|
||||||
57
torch/nativert/executor/Placement.h
Normal file
57
torch/nativert/executor/Placement.h
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <c10/core/Device.h>
|
||||||
|
#include <c10/util/Logging.h>
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
namespace torch::nativert {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function returns a normalized version of the input device:
|
||||||
|
* - For CPU devices, the returned device will have no index (i.e., the default
|
||||||
|
* CPU device).
|
||||||
|
* - For CUDA devices, if no index is specified, index 0 is assumed.
|
||||||
|
* - For other device types, the function will raise an error.
|
||||||
|
*
|
||||||
|
* @param device The input c10::Device to normalize.
|
||||||
|
* @return A normalized c10::Device with standardized indexing.
|
||||||
|
*
|
||||||
|
* @throws c10::Error If the device type is not CPU or CUDA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
c10::Device normalizeDevice(const c10::Device& device);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the two devices are the same and has the same device index
|
||||||
|
* (if cuda).
|
||||||
|
*/
|
||||||
|
bool isSameDevice(const c10::Device& device1, const c10::Device& device2);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A utility class for managing device placement mappings.
|
||||||
|
*
|
||||||
|
* The Placement class provides a way to map source devices to target devices.
|
||||||
|
* It supports both explicit per-device mappings and a default device fallback.
|
||||||
|
* This is the argument taken in NativeRT to map from model artifact device to
|
||||||
|
* the device it should run on.
|
||||||
|
*/
|
||||||
|
struct TORCH_API Placement {
|
||||||
|
Placement() = default;
|
||||||
|
explicit Placement(std::optional<c10::Device> defaultDevice);
|
||||||
|
explicit Placement(
|
||||||
|
const std::unordered_map<c10::Device, c10::Device>& deviceMap,
|
||||||
|
std::optional<c10::Device> defaultDevice = std::nullopt);
|
||||||
|
c10::Device getMappedDevice(const c10::Device& srcDevice) const;
|
||||||
|
|
||||||
|
TORCH_API friend std::ostream& operator<<(
|
||||||
|
std::ostream& os,
|
||||||
|
const Placement& obj);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::unordered_map<c10::Device, c10::Device> deviceMap_;
|
||||||
|
std::optional<c10::Device> defaultDevice_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace torch::nativert
|
||||||
37
torch/nativert/executor/PlacementUtils.cpp
Normal file
37
torch/nativert/executor/PlacementUtils.cpp
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
#include <torch/nativert/executor/Placement.h>
|
||||||
|
|
||||||
|
#include <fmt/ostream.h>
|
||||||
|
|
||||||
|
namespace torch::nativert {
|
||||||
|
|
||||||
|
c10::Device normalizeDevice(const c10::Device& device) {
|
||||||
|
// cpu device doesn't have index
|
||||||
|
// cuda device index must have a index
|
||||||
|
if (device.is_cpu()) {
|
||||||
|
return c10::Device(c10::DeviceType::CPU);
|
||||||
|
} else if (device.is_cuda()) {
|
||||||
|
return c10::Device(
|
||||||
|
c10::DeviceType::CUDA,
|
||||||
|
device.has_index() ? device.index() : static_cast<c10::DeviceIndex>(0));
|
||||||
|
} else {
|
||||||
|
TORCH_CHECK(false, "Unsupported device type", device);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isSameDevice(const c10::Device& a, const c10::Device& b) {
|
||||||
|
if (a.is_cpu()) {
|
||||||
|
return b.is_cpu();
|
||||||
|
}
|
||||||
|
if (a.is_cuda()) {
|
||||||
|
if (b.is_cuda()) {
|
||||||
|
auto aIndex = a.has_index() ? a.index() : 0;
|
||||||
|
auto bIndex = b.has_index() ? b.index() : 0;
|
||||||
|
return aIndex == bIndex;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TORCH_CHECK(false, "Unsupported device type", a, " and ", b);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} // namespace torch::nativert
|
||||||
Loading…
Reference in New Issue
Block a user