mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
[TSL] Consolidate NUMA code across different platforms.
No functional change is intended. PiperOrigin-RevId: 821216963
This commit is contained in:
parent
a139a50e56
commit
8cf42017ec
|
|
@ -16,8 +16,7 @@ limitations under the License.
|
|||
#ifndef TENSORFLOW_TSL_PLATFORM_NUMA_H_
|
||||
#define TENSORFLOW_TSL_PLATFORM_NUMA_H_
|
||||
|
||||
#include "xla/tsl/platform/types.h"
|
||||
#include "tsl/platform/platform.h"
|
||||
#include <cstddef>
|
||||
|
||||
namespace tsl {
|
||||
namespace port {
|
||||
|
|
|
|||
2
third_party/xla/xla/tsl/platform/BUILD
vendored
2
third_party/xla/xla/tsl/platform/BUILD
vendored
|
|
@ -58,6 +58,8 @@ exports_files(
|
|||
"threadpool.cc",
|
||||
"threadpool.h",
|
||||
"env.h",
|
||||
"numa_hwloc.cc",
|
||||
"numa_noop.cc",
|
||||
],
|
||||
visibility = internal_visibility([
|
||||
"//tensorflow/core/platform:__subpackages__",
|
||||
|
|
|
|||
22
third_party/xla/xla/tsl/platform/default/BUILD
vendored
22
third_party/xla/xla/tsl/platform/default/BUILD
vendored
|
|
@ -324,7 +324,14 @@ cc_library(
|
|||
srcs = [
|
||||
"port.cc",
|
||||
"@local_tsl//tsl/platform:cpu_info.cc",
|
||||
],
|
||||
] + select({
|
||||
"//xla/tsl:with_numa_support": [
|
||||
"//xla/tsl/platform:numa_hwloc.cc",
|
||||
],
|
||||
"//conditions:default": [
|
||||
"//xla/tsl/platform:numa_noop.cc",
|
||||
],
|
||||
}),
|
||||
hdrs = [
|
||||
"//xla/tsl/platform/profile_utils:cpu_utils.h",
|
||||
"@local_tsl//tsl/platform:cpu_info.h",
|
||||
|
|
@ -336,11 +343,7 @@ cc_library(
|
|||
"@local_tsl//tsl/platform:snappy.h",
|
||||
],
|
||||
copts = tsl_copts(),
|
||||
defines = ["TF_USE_SNAPPY"] + select({
|
||||
# TF Additional NUMA defines
|
||||
"//xla/tsl:with_numa_support": ["TENSORFLOW_USE_NUMA"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
defines = ["TF_USE_SNAPPY"],
|
||||
tags = [
|
||||
"manual",
|
||||
"no_oss",
|
||||
|
|
@ -357,12 +360,12 @@ cc_library(
|
|||
"@local_tsl//tsl/platform",
|
||||
"@snappy",
|
||||
] + select({
|
||||
# TF Additional NUMA dependencies
|
||||
"//xla/tsl:with_numa_support": [
|
||||
# Don't merge in a single line
|
||||
"@com_google_absl//absl/log",
|
||||
"@hwloc",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
"//conditions:default": [
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
|
|
@ -608,6 +611,7 @@ filegroup(
|
|||
"status.h",
|
||||
"statusor.h",
|
||||
"tracing_impl.h",
|
||||
"//xla/tsl/platform:numa_noop.cc",
|
||||
"//xla/tsl/platform/profile_utils:cpu_utils.h",
|
||||
"//xla/tsl/platform/profile_utils:i_cpu_utils_helper.h",
|
||||
],
|
||||
|
|
|
|||
143
third_party/xla/xla/tsl/platform/default/port.cc
vendored
143
third_party/xla/xla/tsl/platform/default/port.cc
vendored
|
|
@ -48,10 +48,6 @@ limitations under the License.
|
|||
#include <thread>
|
||||
#endif
|
||||
|
||||
#if TENSORFLOW_USE_NUMA
|
||||
#include "hwloc.h"
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define TENSORFLOW_HAS_CXA_DEMANGLE 0
|
||||
#elif (__GNUC__ >= 4 || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 4)) && \
|
||||
|
|
@ -170,145 +166,6 @@ int NumHyperthreadsPerCore() {
|
|||
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||
}
|
||||
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
namespace {
|
||||
static hwloc_topology_t hwloc_topology_handle;
|
||||
|
||||
bool HaveHWLocTopology() {
|
||||
// One time initialization
|
||||
static bool init = []() {
|
||||
if (hwloc_topology_init(&hwloc_topology_handle)) {
|
||||
LOG(ERROR) << "Call to hwloc_topology_init() failed";
|
||||
return false;
|
||||
}
|
||||
if (hwloc_topology_load(hwloc_topology_handle)) {
|
||||
LOG(ERROR) << "Call to hwloc_topology_load() failed";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}();
|
||||
return init;
|
||||
}
|
||||
|
||||
// Return the first hwloc object of the given type whose os_index
|
||||
// matches 'index'.
|
||||
hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
|
||||
hwloc_obj_t obj = nullptr;
|
||||
if (index >= 0) {
|
||||
while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
|
||||
nullptr) {
|
||||
if (obj->os_index == index) break;
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
} // namespace
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
|
||||
bool NUMAEnabled() { return (NUMANumNodes() > 1); }
|
||||
|
||||
int NUMANumNodes() {
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
if (HaveHWLocTopology()) {
|
||||
int num_numanodes =
|
||||
hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
|
||||
return std::max(1, num_numanodes);
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
return 1;
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
}
|
||||
|
||||
void NUMASetThreadNodeAffinity(int node) {
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
if (HaveHWLocTopology()) {
|
||||
// Find the corresponding NUMA node topology object.
|
||||
hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
|
||||
if (obj) {
|
||||
hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
|
||||
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
|
||||
} else {
|
||||
LOG(ERROR) << "Could not find hwloc NUMA node " << node;
|
||||
}
|
||||
}
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
}
|
||||
|
||||
int NUMAGetThreadNodeAffinity() {
|
||||
int node_index = kNUMANoAffinity;
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
if (HaveHWLocTopology()) {
|
||||
hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
|
||||
HWLOC_CPUBIND_THREAD);
|
||||
hwloc_obj_t obj = nullptr;
|
||||
// Return the first NUMA node whose cpuset is a (non-proper) superset of
|
||||
// that of the current thread.
|
||||
while ((obj = hwloc_get_next_obj_by_type(
|
||||
hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
|
||||
if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
|
||||
node_index = obj->os_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
hwloc_bitmap_free(thread_cpuset);
|
||||
}
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
return node_index;
|
||||
}
|
||||
|
||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
if (HaveHWLocTopology()) {
|
||||
hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
|
||||
if (numa_node) {
|
||||
return hwloc_alloc_membind(hwloc_topology_handle, size,
|
||||
numa_node->nodeset, HWLOC_MEMBIND_BIND,
|
||||
HWLOC_MEMBIND_BYNODESET);
|
||||
} else {
|
||||
LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
|
||||
}
|
||||
}
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
return tsl::port::AlignedMalloc(size, minimum_alignment);
|
||||
}
|
||||
|
||||
void NUMAFree(void* ptr, size_t size) {
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
if (HaveHWLocTopology()) {
|
||||
hwloc_free(hwloc_topology_handle, ptr, size);
|
||||
return;
|
||||
}
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
tsl::port::Free(ptr);
|
||||
}
|
||||
|
||||
int NUMAGetMemAffinity(const void* addr) {
|
||||
int node = kNUMANoAffinity;
|
||||
#ifdef TENSORFLOW_USE_NUMA
|
||||
if (HaveHWLocTopology() && addr) {
|
||||
hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
|
||||
if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
|
||||
HWLOC_MEMBIND_BYNODESET)) {
|
||||
hwloc_obj_t obj = nullptr;
|
||||
while ((obj = hwloc_get_next_obj_by_type(
|
||||
hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
|
||||
if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
|
||||
node = obj->os_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
hwloc_bitmap_free(nodeset);
|
||||
} else {
|
||||
LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
|
||||
}
|
||||
}
|
||||
#endif // TENSORFLOW_USE_NUMA
|
||||
return node;
|
||||
}
|
||||
|
||||
bool Snappy_Compress(const char* input, size_t length, string* output) {
|
||||
#ifdef TF_USE_SNAPPY
|
||||
output->resize(snappy::MaxCompressedLength(length));
|
||||
|
|
|
|||
205
third_party/xla/xla/tsl/platform/numa_hwloc.cc
vendored
Normal file
205
third_party/xla/xla/tsl/platform/numa_hwloc.cc
vendored
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
/* Copyright 2025 The OpenXLA Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include "absl/base/call_once.h"
|
||||
#include "absl/log/log.h"
|
||||
#include "hwloc.h"
|
||||
#include "tsl/platform/mem.h"
|
||||
#include "tsl/platform/numa.h"
|
||||
|
||||
namespace tsl {
|
||||
namespace port {
|
||||
|
||||
namespace {
|
||||
hwloc_topology_t GetHWLocTopology() {
|
||||
static absl::once_flag init_once;
|
||||
static hwloc_topology_t hwloc_topology_handle = nullptr;
|
||||
absl::call_once(init_once, [] {
|
||||
if (hwloc_topology_init(&hwloc_topology_handle)) {
|
||||
LOG(ERROR) << "Call to hwloc_topology_init() failed";
|
||||
return;
|
||||
}
|
||||
if (hwloc_topology_load(hwloc_topology_handle)) {
|
||||
LOG(ERROR) << "Call to hwloc_topology_load() failed";
|
||||
return;
|
||||
}
|
||||
});
|
||||
return hwloc_topology_handle;
|
||||
}
|
||||
|
||||
// Return the first hwloc object of the given type whose os_index
|
||||
// matches 'index'.
|
||||
hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
|
||||
auto* topology = GetHWLocTopology();
|
||||
if (!topology) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (index < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
hwloc_obj_t obj = nullptr;
|
||||
while ((obj = hwloc_get_next_obj_by_type(topology, tp, obj)) != nullptr) {
|
||||
if (obj->os_index == index) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
struct HWLocBitmapDeleter {
|
||||
void operator()(hwloc_bitmap_t bitmap) const { hwloc_bitmap_free(bitmap); }
|
||||
};
|
||||
|
||||
auto AllocateBitmap() {
|
||||
return std::unique_ptr<std::remove_pointer_t<hwloc_bitmap_t>,
|
||||
HWLocBitmapDeleter>(hwloc_bitmap_alloc());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool NUMAEnabled() { return NUMANumNodes() > 1; }
|
||||
|
||||
int NUMANumNodes() {
|
||||
static int num_numanodes = 1;
|
||||
static absl::once_flag init_once;
|
||||
absl::call_once(init_once, [] {
|
||||
auto* topology = GetHWLocTopology();
|
||||
if (!topology) {
|
||||
return;
|
||||
}
|
||||
num_numanodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
|
||||
if (num_numanodes < 1) {
|
||||
LOG(ERROR) << "Unknown number of NUMA nodes (got " << num_numanodes
|
||||
<< "), assuming 1.";
|
||||
num_numanodes = 1;
|
||||
}
|
||||
});
|
||||
return num_numanodes;
|
||||
}
|
||||
|
||||
void NUMASetThreadNodeAffinity(int node) {
|
||||
if (node == kNUMANoAffinity) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto* topology = GetHWLocTopology();
|
||||
if (!topology) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Find the corresponding NUMA node topology object.
|
||||
hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
|
||||
if (!obj) {
|
||||
LOG(ERROR) << "Could not find hwloc NUMA node " << node;
|
||||
return;
|
||||
}
|
||||
|
||||
if (hwloc_set_cpubind(topology, obj->cpuset,
|
||||
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT)) {
|
||||
LOG(ERROR).WithPerror() << "Call to hwloc_set_cpubind() failed";
|
||||
}
|
||||
}
|
||||
|
||||
int NUMAGetThreadNodeAffinity() {
|
||||
auto* topology = GetHWLocTopology();
|
||||
if (!topology) {
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
auto thread_cpuset = AllocateBitmap();
|
||||
if (!thread_cpuset) {
|
||||
LOG(ERROR) << "Call to hwloc_bitmap_alloc() failed";
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
if (hwloc_get_cpubind(topology, thread_cpuset.get(), HWLOC_CPUBIND_THREAD)) {
|
||||
LOG(ERROR).WithPerror() << "Call to hwloc_get_cpubind() failed";
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
hwloc_obj_t obj = nullptr;
|
||||
// Return the first NUMA node whose cpuset is a (non-proper) superset of
|
||||
// that of the current thread.
|
||||
while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE,
|
||||
obj)) != nullptr) {
|
||||
if (hwloc_bitmap_isincluded(thread_cpuset.get(), obj->cpuset)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return obj ? obj->os_index : kNUMANoAffinity;
|
||||
}
|
||||
|
||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||
if (node != kNUMANoAffinity) {
|
||||
if (auto* topology = GetHWLocTopology()) {
|
||||
hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
|
||||
if (numa_node) {
|
||||
return hwloc_alloc_membind(topology, size, numa_node->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
}
|
||||
LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
|
||||
}
|
||||
}
|
||||
return ::tsl::port::AlignedMalloc(size, minimum_alignment);
|
||||
}
|
||||
|
||||
void NUMAFree(void* ptr, size_t size) {
|
||||
auto* topology = GetHWLocTopology();
|
||||
if (!topology) {
|
||||
::tsl::port::Free(ptr);
|
||||
return;
|
||||
}
|
||||
hwloc_free(topology, ptr, size);
|
||||
}
|
||||
|
||||
int NUMAGetMemAffinity(const void* ptr) {
|
||||
if (!ptr) {
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
auto* topology = GetHWLocTopology();
|
||||
if (!topology) {
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
auto nodeset = AllocateBitmap();
|
||||
if (!nodeset) {
|
||||
LOG(ERROR) << "Call to hwloc_bitmap_alloc() failed";
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
if (hwloc_get_area_memlocation(topology, ptr, 4, nodeset.get(),
|
||||
HWLOC_MEMBIND_BYNODESET)) {
|
||||
LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
hwloc_obj_t obj = nullptr;
|
||||
while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE,
|
||||
obj)) != nullptr) {
|
||||
if (hwloc_bitmap_isincluded(nodeset.get(), obj->nodeset)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return obj ? obj->os_index : kNUMANoAffinity;
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace tsl
|
||||
41
third_party/xla/xla/tsl/platform/numa_noop.cc
vendored
Normal file
41
third_party/xla/xla/tsl/platform/numa_noop.cc
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/* Copyright 2025 The OpenXLA Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "tsl/platform/mem.h"
|
||||
#include "tsl/platform/numa.h"
|
||||
|
||||
namespace tsl {
|
||||
namespace port {
|
||||
|
||||
bool NUMAEnabled() { return false; }
|
||||
|
||||
int NUMANumNodes() { return 1; }
|
||||
|
||||
void NUMASetThreadNodeAffinity(int node) {}
|
||||
|
||||
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
|
||||
|
||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||
return ::tsl::port::AlignedMalloc(size, minimum_alignment);
|
||||
}
|
||||
|
||||
void NUMAFree(void* ptr, size_t size) { ::tsl::port::Free(ptr); }
|
||||
|
||||
int NUMAGetMemAffinity(const void* ptr) { return kNUMANoAffinity; }
|
||||
|
||||
} // namespace port
|
||||
} // namespace tsl
|
||||
|
|
@ -174,6 +174,7 @@ cc_library(
|
|||
name = "platform_port",
|
||||
srcs = [
|
||||
"port.cc",
|
||||
"//xla/tsl/platform:numa_noop.cc",
|
||||
"@local_tsl//tsl/platform:cpu_info.cc",
|
||||
],
|
||||
hdrs = [
|
||||
|
|
|
|||
19
third_party/xla/xla/tsl/platform/windows/port.cc
vendored
19
third_party/xla/xla/tsl/platform/windows/port.cc
vendored
|
|
@ -105,25 +105,6 @@ int GetCurrentCPU() {
|
|||
return GetCurrentProcessorNumber();
|
||||
}
|
||||
|
||||
bool NUMAEnabled() {
|
||||
// Not yet implemented: coming soon.
|
||||
return false;
|
||||
}
|
||||
|
||||
int NUMANumNodes() { return 1; }
|
||||
|
||||
void NUMASetThreadNodeAffinity(int node) {}
|
||||
|
||||
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
|
||||
|
||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||
return tsl::port::AlignedMalloc(size, minimum_alignment);
|
||||
}
|
||||
|
||||
void NUMAFree(void* ptr, size_t size) { tsl::port::Free(ptr); }
|
||||
|
||||
int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
|
||||
|
||||
bool Snappy_Compress(const char* input, size_t length, string* output) {
|
||||
#ifdef TF_USE_SNAPPY
|
||||
output->resize(snappy::MaxCompressedLength(length));
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user