[TSL] Consolidate NUMA code across different platforms.

No functional change is intended. PiperOrigin-RevId: 821216963
2025-12-06 12:20:11 +01:00 · 2025-10-18 22:04:57 -07:00 · 2025-10-18 22:04:57 -07:00 · 8cf42017ec
commit 8cf42017ec
parent a139a50e56
8 changed files with 263 additions and 173 deletions
--- a/third_party/xla/third_party/tsl/tsl/platform/numa.h
+++ b/third_party/xla/third_party/tsl/tsl/platform/numa.h
@ -16,8 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_TSL_PLATFORM_NUMA_H_
 #define TENSORFLOW_TSL_PLATFORM_NUMA_H_

-#include "xla/tsl/platform/types.h"
-#include "tsl/platform/platform.h"
+#include <cstddef>

 namespace tsl {
 namespace port {
--- a/third_party/xla/xla/tsl/platform/BUILD
+++ b/third_party/xla/xla/tsl/platform/BUILD
@ -58,6 +58,8 @@ exports_files(
        "threadpool.cc",
        "threadpool.h",
        "env.h",
+        "numa_hwloc.cc",
+        "numa_noop.cc",
    ],
    visibility = internal_visibility([
        "//tensorflow/core/platform:__subpackages__",
--- a/third_party/xla/xla/tsl/platform/default/BUILD
+++ b/third_party/xla/xla/tsl/platform/default/BUILD
@ -324,7 +324,14 @@ cc_library(
    srcs = [
        "port.cc",
        "@local_tsl//tsl/platform:cpu_info.cc",
-    ],
+    ] + select({
+        "//xla/tsl:with_numa_support": [
+            "//xla/tsl/platform:numa_hwloc.cc",
+        ],
+        "//conditions:default": [
+            "//xla/tsl/platform:numa_noop.cc",
+        ],
+    }),
    hdrs = [
        "//xla/tsl/platform/profile_utils:cpu_utils.h",
        "@local_tsl//tsl/platform:cpu_info.h",
@ -336,11 +343,7 @@ cc_library(
        "@local_tsl//tsl/platform:snappy.h",
    ],
    copts = tsl_copts(),
-    defines = ["TF_USE_SNAPPY"] + select({
-        # TF Additional NUMA defines
-        "//xla/tsl:with_numa_support": ["TENSORFLOW_USE_NUMA"],
-        "//conditions:default": [],
-    }),
+    defines = ["TF_USE_SNAPPY"],
    tags = [
        "manual",
        "no_oss",
@ -357,12 +360,12 @@ cc_library(
        "@local_tsl//tsl/platform",
        "@snappy",
    ] + select({
-        # TF Additional NUMA dependencies
        "//xla/tsl:with_numa_support": [
-            # Don't merge in a single line
+            "@com_google_absl//absl/log",
            "@hwloc",
        ],
-        "//conditions:default": [],
+        "//conditions:default": [
+        ],
    }),
 )

@ -608,6 +611,7 @@ filegroup(
        "status.h",
        "statusor.h",
        "tracing_impl.h",
+        "//xla/tsl/platform:numa_noop.cc",
        "//xla/tsl/platform/profile_utils:cpu_utils.h",
        "//xla/tsl/platform/profile_utils:i_cpu_utils_helper.h",
    ],
--- a/third_party/xla/xla/tsl/platform/default/port.cc
+++ b/third_party/xla/xla/tsl/platform/default/port.cc
@ -48,10 +48,6 @@ limitations under the License.
 #include <thread>
 #endif

-#if TENSORFLOW_USE_NUMA
-#include "hwloc.h"
-#endif
-
 #if defined(__ANDROID__) && (defined(__i386__) || defined(__x86_64__))
 #define TENSORFLOW_HAS_CXA_DEMANGLE 0
 #elif (__GNUC__ >= 4 || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 4)) && \
@ -170,145 +166,6 @@ int NumHyperthreadsPerCore() {
  return (ht_per_core > 0) ? ht_per_core : 1;
 }

-#ifdef TENSORFLOW_USE_NUMA
-namespace {
-static hwloc_topology_t hwloc_topology_handle;
-
-bool HaveHWLocTopology() {
-  // One time initialization
-  static bool init = []() {
-    if (hwloc_topology_init(&hwloc_topology_handle)) {
-      LOG(ERROR) << "Call to hwloc_topology_init() failed";
-      return false;
-    }
-    if (hwloc_topology_load(hwloc_topology_handle)) {
-      LOG(ERROR) << "Call to hwloc_topology_load() failed";
-      return false;
-    }
-    return true;
-  }();
-  return init;
-}
-
-// Return the first hwloc object of the given type whose os_index
-// matches 'index'.
-hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
-  hwloc_obj_t obj = nullptr;
-  if (index >= 0) {
-    while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
-           nullptr) {
-      if (obj->os_index == index) break;
-    }
-  }
-  return obj;
-}
-}  // namespace
-#endif  // TENSORFLOW_USE_NUMA
-
-bool NUMAEnabled() { return (NUMANumNodes() > 1); }
-
-int NUMANumNodes() {
-#ifdef TENSORFLOW_USE_NUMA
-  if (HaveHWLocTopology()) {
-    int num_numanodes =
-        hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
-    return std::max(1, num_numanodes);
-  } else {
-    return 1;
-  }
-#else
-  return 1;
-#endif  // TENSORFLOW_USE_NUMA
-}
-
-void NUMASetThreadNodeAffinity(int node) {
-#ifdef TENSORFLOW_USE_NUMA
-  if (HaveHWLocTopology()) {
-    // Find the corresponding NUMA node topology object.
-    hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
-    if (obj) {
-      hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
-                        HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
-    } else {
-      LOG(ERROR) << "Could not find hwloc NUMA node " << node;
-    }
-  }
-#endif  // TENSORFLOW_USE_NUMA
-}
-
-int NUMAGetThreadNodeAffinity() {
-  int node_index = kNUMANoAffinity;
-#ifdef TENSORFLOW_USE_NUMA
-  if (HaveHWLocTopology()) {
-    hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
-    hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
-                      HWLOC_CPUBIND_THREAD);
-    hwloc_obj_t obj = nullptr;
-    // Return the first NUMA node whose cpuset is a (non-proper) superset of
-    // that of the current thread.
-    while ((obj = hwloc_get_next_obj_by_type(
-                hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
-      if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
-        node_index = obj->os_index;
-        break;
-      }
-    }
-    hwloc_bitmap_free(thread_cpuset);
-  }
-#endif  // TENSORFLOW_USE_NUMA
-  return node_index;
-}
-
-void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
-#ifdef TENSORFLOW_USE_NUMA
-  if (HaveHWLocTopology()) {
-    hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
-    if (numa_node) {
-      return hwloc_alloc_membind(hwloc_topology_handle, size,
-                                 numa_node->nodeset, HWLOC_MEMBIND_BIND,
-                                 HWLOC_MEMBIND_BYNODESET);
-    } else {
-      LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
-    }
-  }
-#endif  // TENSORFLOW_USE_NUMA
-  return tsl::port::AlignedMalloc(size, minimum_alignment);
-}
-
-void NUMAFree(void* ptr, size_t size) {
-#ifdef TENSORFLOW_USE_NUMA
-  if (HaveHWLocTopology()) {
-    hwloc_free(hwloc_topology_handle, ptr, size);
-    return;
-  }
-#endif  // TENSORFLOW_USE_NUMA
-  tsl::port::Free(ptr);
-}
-
-int NUMAGetMemAffinity(const void* addr) {
-  int node = kNUMANoAffinity;
-#ifdef TENSORFLOW_USE_NUMA
-  if (HaveHWLocTopology() && addr) {
-    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
-    if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
-                                    HWLOC_MEMBIND_BYNODESET)) {
-      hwloc_obj_t obj = nullptr;
-      while ((obj = hwloc_get_next_obj_by_type(
-                  hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
-        if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
-          node = obj->os_index;
-          break;
-        }
-      }
-      hwloc_bitmap_free(nodeset);
-    } else {
-      LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
-    }
-  }
-#endif  // TENSORFLOW_USE_NUMA
-  return node;
-}
-
 bool Snappy_Compress(const char* input, size_t length, string* output) {
 #ifdef TF_USE_SNAPPY
  output->resize(snappy::MaxCompressedLength(length));
--- a/third_party/xla/xla/tsl/platform/numa_hwloc.cc
+++ b/third_party/xla/xla/tsl/platform/numa_hwloc.cc
@ -0,0 +1,205 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+
+#include "absl/base/call_once.h"
+#include "absl/log/log.h"
+#include "hwloc.h"
+#include "tsl/platform/mem.h"
+#include "tsl/platform/numa.h"
+
+namespace tsl {
+namespace port {
+
+namespace {
+hwloc_topology_t GetHWLocTopology() {
+  static absl::once_flag init_once;
+  static hwloc_topology_t hwloc_topology_handle = nullptr;
+  absl::call_once(init_once, [] {
+    if (hwloc_topology_init(&hwloc_topology_handle)) {
+      LOG(ERROR) << "Call to hwloc_topology_init() failed";
+      return;
+    }
+    if (hwloc_topology_load(hwloc_topology_handle)) {
+      LOG(ERROR) << "Call to hwloc_topology_load() failed";
+      return;
+    }
+  });
+  return hwloc_topology_handle;
+}
+
+// Return the first hwloc object of the given type whose os_index
+// matches 'index'.
+hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
+  auto* topology = GetHWLocTopology();
+  if (!topology) {
+    return nullptr;
+  }
+
+  if (index < 0) {
+    return nullptr;
+  }
+
+  hwloc_obj_t obj = nullptr;
+  while ((obj = hwloc_get_next_obj_by_type(topology, tp, obj)) != nullptr) {
+    if (obj->os_index == index) {
+      break;
+    }
+  }
+  return obj;
+}
+
+struct HWLocBitmapDeleter {
+  void operator()(hwloc_bitmap_t bitmap) const { hwloc_bitmap_free(bitmap); }
+};
+
+auto AllocateBitmap() {
+  return std::unique_ptr<std::remove_pointer_t<hwloc_bitmap_t>,
+                         HWLocBitmapDeleter>(hwloc_bitmap_alloc());
+}
+}  // namespace
+
+bool NUMAEnabled() { return NUMANumNodes() > 1; }
+
+int NUMANumNodes() {
+  static int num_numanodes = 1;
+  static absl::once_flag init_once;
+  absl::call_once(init_once, [] {
+    auto* topology = GetHWLocTopology();
+    if (!topology) {
+      return;
+    }
+    num_numanodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
+    if (num_numanodes < 1) {
+      LOG(ERROR) << "Unknown number of NUMA nodes (got " << num_numanodes
+                 << "), assuming 1.";
+      num_numanodes = 1;
+    }
+  });
+  return num_numanodes;
+}
+
+void NUMASetThreadNodeAffinity(int node) {
+  if (node == kNUMANoAffinity) {
+    return;
+  }
+
+  auto* topology = GetHWLocTopology();
+  if (!topology) {
+    return;
+  }
+
+  // Find the corresponding NUMA node topology object.
+  hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
+  if (!obj) {
+    LOG(ERROR) << "Could not find hwloc NUMA node " << node;
+    return;
+  }
+
+  if (hwloc_set_cpubind(topology, obj->cpuset,
+                        HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT)) {
+    LOG(ERROR).WithPerror() << "Call to hwloc_set_cpubind() failed";
+  }
+}
+
+int NUMAGetThreadNodeAffinity() {
+  auto* topology = GetHWLocTopology();
+  if (!topology) {
+    return kNUMANoAffinity;
+  }
+
+  auto thread_cpuset = AllocateBitmap();
+  if (!thread_cpuset) {
+    LOG(ERROR) << "Call to hwloc_bitmap_alloc() failed";
+    return kNUMANoAffinity;
+  }
+
+  if (hwloc_get_cpubind(topology, thread_cpuset.get(), HWLOC_CPUBIND_THREAD)) {
+    LOG(ERROR).WithPerror() << "Call to hwloc_get_cpubind() failed";
+    return kNUMANoAffinity;
+  }
+
+  hwloc_obj_t obj = nullptr;
+  // Return the first NUMA node whose cpuset is a (non-proper) superset of
+  // that of the current thread.
+  while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE,
+                                           obj)) != nullptr) {
+    if (hwloc_bitmap_isincluded(thread_cpuset.get(), obj->cpuset)) {
+      break;
+    }
+  }
+  return obj ? obj->os_index : kNUMANoAffinity;
+}
+
+void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
+  if (node != kNUMANoAffinity) {
+    if (auto* topology = GetHWLocTopology()) {
+      hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
+      if (numa_node) {
+        return hwloc_alloc_membind(topology, size, numa_node->nodeset,
+                                   HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
+      }
+      LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
+    }
+  }
+  return ::tsl::port::AlignedMalloc(size, minimum_alignment);
+}
+
+void NUMAFree(void* ptr, size_t size) {
+  auto* topology = GetHWLocTopology();
+  if (!topology) {
+    ::tsl::port::Free(ptr);
+    return;
+  }
+  hwloc_free(topology, ptr, size);
+}
+
+int NUMAGetMemAffinity(const void* ptr) {
+  if (!ptr) {
+    return kNUMANoAffinity;
+  }
+
+  auto* topology = GetHWLocTopology();
+  if (!topology) {
+    return kNUMANoAffinity;
+  }
+
+  auto nodeset = AllocateBitmap();
+  if (!nodeset) {
+    LOG(ERROR) << "Call to hwloc_bitmap_alloc() failed";
+    return kNUMANoAffinity;
+  }
+
+  if (hwloc_get_area_memlocation(topology, ptr, 4, nodeset.get(),
+                                 HWLOC_MEMBIND_BYNODESET)) {
+    LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
+    return kNUMANoAffinity;
+  }
+
+  hwloc_obj_t obj = nullptr;
+  while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE,
+                                           obj)) != nullptr) {
+    if (hwloc_bitmap_isincluded(nodeset.get(), obj->nodeset)) {
+      break;
+    }
+  }
+  return obj ? obj->os_index : kNUMANoAffinity;
+}
+
+}  // namespace port
+}  // namespace tsl
--- a/third_party/xla/xla/tsl/platform/numa_noop.cc
+++ b/third_party/xla/xla/tsl/platform/numa_noop.cc
@ -0,0 +1,41 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstddef>
+
+#include "tsl/platform/mem.h"
+#include "tsl/platform/numa.h"
+
+namespace tsl {
+namespace port {
+
+bool NUMAEnabled() { return false; }
+
+int NUMANumNodes() { return 1; }
+
+void NUMASetThreadNodeAffinity(int node) {}
+
+int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
+
+void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
+  return ::tsl::port::AlignedMalloc(size, minimum_alignment);
+}
+
+void NUMAFree(void* ptr, size_t size) { ::tsl::port::Free(ptr); }
+
+int NUMAGetMemAffinity(const void* ptr) { return kNUMANoAffinity; }
+
+}  // namespace port
+}  // namespace tsl
--- a/third_party/xla/xla/tsl/platform/windows/BUILD
+++ b/third_party/xla/xla/tsl/platform/windows/BUILD
@ -174,6 +174,7 @@ cc_library(
    name = "platform_port",
    srcs = [
        "port.cc",
+        "//xla/tsl/platform:numa_noop.cc",
        "@local_tsl//tsl/platform:cpu_info.cc",
    ],
    hdrs = [
--- a/third_party/xla/xla/tsl/platform/windows/port.cc
+++ b/third_party/xla/xla/tsl/platform/windows/port.cc
@ -105,25 +105,6 @@ int GetCurrentCPU() {
  return GetCurrentProcessorNumber();
 }

-bool NUMAEnabled() {
-  // Not yet implemented: coming soon.
-  return false;
-}
-
-int NUMANumNodes() { return 1; }
-
-void NUMASetThreadNodeAffinity(int node) {}
-
-int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
-
-void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
-  return tsl::port::AlignedMalloc(size, minimum_alignment);
-}
-
-void NUMAFree(void* ptr, size_t size) { tsl::port::Free(ptr); }
-
-int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
-
 bool Snappy_Compress(const char* input, size_t length, string* output) {
 #ifdef TF_USE_SNAPPY
  output->resize(snappy::MaxCompressedLength(length));