[TSL] Consolidate NUMA code across different platforms.

No functional change is intended.

PiperOrigin-RevId: 821216963
This commit is contained in:
David Majnemer 2025-10-18 22:04:57 -07:00 committed by TensorFlower Gardener
parent a139a50e56
commit 8cf42017ec
8 changed files with 263 additions and 173 deletions

View File

@ -16,8 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_TSL_PLATFORM_NUMA_H_
#define TENSORFLOW_TSL_PLATFORM_NUMA_H_
#include "xla/tsl/platform/types.h"
#include "tsl/platform/platform.h"
#include <cstddef>
namespace tsl {
namespace port {

View File

@ -58,6 +58,8 @@ exports_files(
"threadpool.cc",
"threadpool.h",
"env.h",
"numa_hwloc.cc",
"numa_noop.cc",
],
visibility = internal_visibility([
"//tensorflow/core/platform:__subpackages__",

View File

@ -324,7 +324,14 @@ cc_library(
srcs = [
"port.cc",
"@local_tsl//tsl/platform:cpu_info.cc",
],
] + select({
"//xla/tsl:with_numa_support": [
"//xla/tsl/platform:numa_hwloc.cc",
],
"//conditions:default": [
"//xla/tsl/platform:numa_noop.cc",
],
}),
hdrs = [
"//xla/tsl/platform/profile_utils:cpu_utils.h",
"@local_tsl//tsl/platform:cpu_info.h",
@ -336,11 +343,7 @@ cc_library(
"@local_tsl//tsl/platform:snappy.h",
],
copts = tsl_copts(),
defines = ["TF_USE_SNAPPY"] + select({
# TF Additional NUMA defines
"//xla/tsl:with_numa_support": ["TENSORFLOW_USE_NUMA"],
"//conditions:default": [],
}),
defines = ["TF_USE_SNAPPY"],
tags = [
"manual",
"no_oss",
@ -357,12 +360,12 @@ cc_library(
"@local_tsl//tsl/platform",
"@snappy",
] + select({
# TF Additional NUMA dependencies
"//xla/tsl:with_numa_support": [
# Don't merge in a single line
"@com_google_absl//absl/log",
"@hwloc",
],
"//conditions:default": [],
"//conditions:default": [
],
}),
)
@ -608,6 +611,7 @@ filegroup(
"status.h",
"statusor.h",
"tracing_impl.h",
"//xla/tsl/platform:numa_noop.cc",
"//xla/tsl/platform/profile_utils:cpu_utils.h",
"//xla/tsl/platform/profile_utils:i_cpu_utils_helper.h",
],

View File

@ -48,10 +48,6 @@ limitations under the License.
#include <thread>
#endif
#if TENSORFLOW_USE_NUMA
#include "hwloc.h"
#endif
#if defined(__ANDROID__) && (defined(__i386__) || defined(__x86_64__))
#define TENSORFLOW_HAS_CXA_DEMANGLE 0
#elif (__GNUC__ >= 4 || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 4)) && \
@ -170,145 +166,6 @@ int NumHyperthreadsPerCore() {
return (ht_per_core > 0) ? ht_per_core : 1;
}
#ifdef TENSORFLOW_USE_NUMA
namespace {
static hwloc_topology_t hwloc_topology_handle;
bool HaveHWLocTopology() {
// One time initialization
static bool init = []() {
if (hwloc_topology_init(&hwloc_topology_handle)) {
LOG(ERROR) << "Call to hwloc_topology_init() failed";
return false;
}
if (hwloc_topology_load(hwloc_topology_handle)) {
LOG(ERROR) << "Call to hwloc_topology_load() failed";
return false;
}
return true;
}();
return init;
}
// Return the first hwloc object of the given type whose os_index
// matches 'index'.
hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
hwloc_obj_t obj = nullptr;
if (index >= 0) {
while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
nullptr) {
if (obj->os_index == index) break;
}
}
return obj;
}
} // namespace
#endif // TENSORFLOW_USE_NUMA
bool NUMAEnabled() { return (NUMANumNodes() > 1); }
int NUMANumNodes() {
#ifdef TENSORFLOW_USE_NUMA
if (HaveHWLocTopology()) {
int num_numanodes =
hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
return std::max(1, num_numanodes);
} else {
return 1;
}
#else
return 1;
#endif // TENSORFLOW_USE_NUMA
}
void NUMASetThreadNodeAffinity(int node) {
#ifdef TENSORFLOW_USE_NUMA
if (HaveHWLocTopology()) {
// Find the corresponding NUMA node topology object.
hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
if (obj) {
hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
} else {
LOG(ERROR) << "Could not find hwloc NUMA node " << node;
}
}
#endif // TENSORFLOW_USE_NUMA
}
int NUMAGetThreadNodeAffinity() {
int node_index = kNUMANoAffinity;
#ifdef TENSORFLOW_USE_NUMA
if (HaveHWLocTopology()) {
hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
HWLOC_CPUBIND_THREAD);
hwloc_obj_t obj = nullptr;
// Return the first NUMA node whose cpuset is a (non-proper) superset of
// that of the current thread.
while ((obj = hwloc_get_next_obj_by_type(
hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
node_index = obj->os_index;
break;
}
}
hwloc_bitmap_free(thread_cpuset);
}
#endif // TENSORFLOW_USE_NUMA
return node_index;
}
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
#ifdef TENSORFLOW_USE_NUMA
if (HaveHWLocTopology()) {
hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
if (numa_node) {
return hwloc_alloc_membind(hwloc_topology_handle, size,
numa_node->nodeset, HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_BYNODESET);
} else {
LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
}
}
#endif // TENSORFLOW_USE_NUMA
return tsl::port::AlignedMalloc(size, minimum_alignment);
}
void NUMAFree(void* ptr, size_t size) {
#ifdef TENSORFLOW_USE_NUMA
if (HaveHWLocTopology()) {
hwloc_free(hwloc_topology_handle, ptr, size);
return;
}
#endif // TENSORFLOW_USE_NUMA
tsl::port::Free(ptr);
}
int NUMAGetMemAffinity(const void* addr) {
int node = kNUMANoAffinity;
#ifdef TENSORFLOW_USE_NUMA
if (HaveHWLocTopology() && addr) {
hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
HWLOC_MEMBIND_BYNODESET)) {
hwloc_obj_t obj = nullptr;
while ((obj = hwloc_get_next_obj_by_type(
hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
node = obj->os_index;
break;
}
}
hwloc_bitmap_free(nodeset);
} else {
LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
}
}
#endif // TENSORFLOW_USE_NUMA
return node;
}
bool Snappy_Compress(const char* input, size_t length, string* output) {
#ifdef TF_USE_SNAPPY
output->resize(snappy::MaxCompressedLength(length));

View File

@ -0,0 +1,205 @@
/* Copyright 2025 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstddef>
#include <memory>
#include <type_traits>
#include "absl/base/call_once.h"
#include "absl/log/log.h"
#include "hwloc.h"
#include "tsl/platform/mem.h"
#include "tsl/platform/numa.h"
namespace tsl {
namespace port {
namespace {
hwloc_topology_t GetHWLocTopology() {
static absl::once_flag init_once;
static hwloc_topology_t hwloc_topology_handle = nullptr;
absl::call_once(init_once, [] {
if (hwloc_topology_init(&hwloc_topology_handle)) {
LOG(ERROR) << "Call to hwloc_topology_init() failed";
return;
}
if (hwloc_topology_load(hwloc_topology_handle)) {
LOG(ERROR) << "Call to hwloc_topology_load() failed";
return;
}
});
return hwloc_topology_handle;
}
// Return the first hwloc object of the given type whose os_index
// matches 'index'.
hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
auto* topology = GetHWLocTopology();
if (!topology) {
return nullptr;
}
if (index < 0) {
return nullptr;
}
hwloc_obj_t obj = nullptr;
while ((obj = hwloc_get_next_obj_by_type(topology, tp, obj)) != nullptr) {
if (obj->os_index == index) {
break;
}
}
return obj;
}
struct HWLocBitmapDeleter {
void operator()(hwloc_bitmap_t bitmap) const { hwloc_bitmap_free(bitmap); }
};
auto AllocateBitmap() {
return std::unique_ptr<std::remove_pointer_t<hwloc_bitmap_t>,
HWLocBitmapDeleter>(hwloc_bitmap_alloc());
}
} // namespace
bool NUMAEnabled() { return NUMANumNodes() > 1; }
int NUMANumNodes() {
static int num_numanodes = 1;
static absl::once_flag init_once;
absl::call_once(init_once, [] {
auto* topology = GetHWLocTopology();
if (!topology) {
return;
}
num_numanodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
if (num_numanodes < 1) {
LOG(ERROR) << "Unknown number of NUMA nodes (got " << num_numanodes
<< "), assuming 1.";
num_numanodes = 1;
}
});
return num_numanodes;
}
void NUMASetThreadNodeAffinity(int node) {
if (node == kNUMANoAffinity) {
return;
}
auto* topology = GetHWLocTopology();
if (!topology) {
return;
}
// Find the corresponding NUMA node topology object.
hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
if (!obj) {
LOG(ERROR) << "Could not find hwloc NUMA node " << node;
return;
}
if (hwloc_set_cpubind(topology, obj->cpuset,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT)) {
LOG(ERROR).WithPerror() << "Call to hwloc_set_cpubind() failed";
}
}
int NUMAGetThreadNodeAffinity() {
auto* topology = GetHWLocTopology();
if (!topology) {
return kNUMANoAffinity;
}
auto thread_cpuset = AllocateBitmap();
if (!thread_cpuset) {
LOG(ERROR) << "Call to hwloc_bitmap_alloc() failed";
return kNUMANoAffinity;
}
if (hwloc_get_cpubind(topology, thread_cpuset.get(), HWLOC_CPUBIND_THREAD)) {
LOG(ERROR).WithPerror() << "Call to hwloc_get_cpubind() failed";
return kNUMANoAffinity;
}
hwloc_obj_t obj = nullptr;
// Return the first NUMA node whose cpuset is a (non-proper) superset of
// that of the current thread.
while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE,
obj)) != nullptr) {
if (hwloc_bitmap_isincluded(thread_cpuset.get(), obj->cpuset)) {
break;
}
}
return obj ? obj->os_index : kNUMANoAffinity;
}
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
if (node != kNUMANoAffinity) {
if (auto* topology = GetHWLocTopology()) {
hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
if (numa_node) {
return hwloc_alloc_membind(topology, size, numa_node->nodeset,
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
}
LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
}
}
return ::tsl::port::AlignedMalloc(size, minimum_alignment);
}
void NUMAFree(void* ptr, size_t size) {
auto* topology = GetHWLocTopology();
if (!topology) {
::tsl::port::Free(ptr);
return;
}
hwloc_free(topology, ptr, size);
}
int NUMAGetMemAffinity(const void* ptr) {
if (!ptr) {
return kNUMANoAffinity;
}
auto* topology = GetHWLocTopology();
if (!topology) {
return kNUMANoAffinity;
}
auto nodeset = AllocateBitmap();
if (!nodeset) {
LOG(ERROR) << "Call to hwloc_bitmap_alloc() failed";
return kNUMANoAffinity;
}
if (hwloc_get_area_memlocation(topology, ptr, 4, nodeset.get(),
HWLOC_MEMBIND_BYNODESET)) {
LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
return kNUMANoAffinity;
}
hwloc_obj_t obj = nullptr;
while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE,
obj)) != nullptr) {
if (hwloc_bitmap_isincluded(nodeset.get(), obj->nodeset)) {
break;
}
}
return obj ? obj->os_index : kNUMANoAffinity;
}
} // namespace port
} // namespace tsl

View File

@ -0,0 +1,41 @@
/* Copyright 2025 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstddef>
#include "tsl/platform/mem.h"
#include "tsl/platform/numa.h"
namespace tsl {
namespace port {
bool NUMAEnabled() { return false; }
int NUMANumNodes() { return 1; }
void NUMASetThreadNodeAffinity(int node) {}
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
return ::tsl::port::AlignedMalloc(size, minimum_alignment);
}
void NUMAFree(void* ptr, size_t size) { ::tsl::port::Free(ptr); }
int NUMAGetMemAffinity(const void* ptr) { return kNUMANoAffinity; }
} // namespace port
} // namespace tsl

View File

@ -174,6 +174,7 @@ cc_library(
name = "platform_port",
srcs = [
"port.cc",
"//xla/tsl/platform:numa_noop.cc",
"@local_tsl//tsl/platform:cpu_info.cc",
],
hdrs = [

View File

@ -105,25 +105,6 @@ int GetCurrentCPU() {
return GetCurrentProcessorNumber();
}
bool NUMAEnabled() {
// Not yet implemented: coming soon.
return false;
}
int NUMANumNodes() { return 1; }
void NUMASetThreadNodeAffinity(int node) {}
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
return tsl::port::AlignedMalloc(size, minimum_alignment);
}
void NUMAFree(void* ptr, size_t size) { tsl::port::Free(ptr); }
int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
bool Snappy_Compress(const char* input, size_t length, string* output) {
#ifdef TF_USE_SNAPPY
output->resize(snappy::MaxCompressedLength(length));