From 44f19c7179f3eae2b96113e80284b9c40065a8e3 Mon Sep 17 00:00:00 2001
From: Panagiotis Kourdis <panagiotis.kourdis@intel.com>
Date: Tue, 20 May 2025 09:21:39 +0000
Subject: [PATCH] Record the XPU and XCCL build settings in the compiled binary
 (#147161)

Fixes #ISSUE_NUMBER

Currently the XPU and XCCL build settings are not recorded in the compiled binary and are not shown using the `torch.__config__.show()` which is a quick way to check if the binary has been built with such support.

Below is the output adding them (see end of last line):

```
Python 3.12.8 | packaged by conda-forge | (main, Dec  5 2024, 14:24:40) [GCC 13.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> print(torch.__config__.show())
PyTorch built with:
  - GCC 13.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2025.1-Product Build 20250203 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.5.3 (Git Hash 66f0cb9eb66affd2da3bf5f8d897376f04aae6af)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - CPU capability usage: AVX512
XPU backend  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=RelWithDebInfo, COMMIT_SHA=43eb39d7c832b5560f7bfa8d29cc7919ac21c0ca, CXX_COMPILER=/home/pkourdis/compilers/gcc-13.3.0/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=OFF -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-dangling-reference -Wno-error=dangling-reference -Wno-error=redundant-move -DUSE_XPU -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.0, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=1, USE_MPI=0, USE_NCCL=OFF, USE_NNPACK=0, USE_OPENMP=ON, USE_ROCM=0, USE_ROCM_KERNEL_ASSERT=OFF, USE_XCCL=1, USE_XPU=1,
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/147161
Approved by: https://github.com/guangyey, https://github.com/EikanWang, https://github.com/albanD

Co-authored-by: Yu, Guangye <106960996+guangyey@users.noreply.github.com>
---
 caffe2/core/macros.h.in |  4 +++-
 test/test_xpu.py        | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/caffe2/core/macros.h.in b/caffe2/core/macros.h.in
index 9b05d629ede..5b971b1bbb2 100644
--- a/caffe2/core/macros.h.in
+++ b/caffe2/core/macros.h.in
@@ -29,7 +29,7 @@
 #endif
 
 // Useful build settings that are recorded in the compiled binary
-// torch.__build__.show()
+// torch.__config__.show()
 #define CAFFE2_BUILD_STRINGS { \
   {"TORCH_VERSION", "${TORCH_VERSION}"}, \
   {"CXX_COMPILER", "${CMAKE_CXX_COMPILER}"}, \
@@ -65,4 +65,6 @@
   {"USE_ITT", "${CAFFE2_USE_ITT}"}, \
   {"USE_ROCM_KERNEL_ASSERT", "${USE_ROCM_KERNEL_ASSERT}"}, \
   {"USE_CUSPARSELT", "${USE_CUSPARSELT}"}, \
+  {"USE_XPU", "${USE_XPU}"}, \
+  {"USE_XCCL", "${USE_XCCL}"}, \
 }
diff --git a/test/test_xpu.py b/test/test_xpu.py
index aa91ce75530..b3bac2666d9 100644
--- a/test/test_xpu.py
+++ b/test/test_xpu.py
@@ -1,5 +1,6 @@
 # Owner(s): ["module: intel"]
 
+import re
 import subprocess
 import sys
 import tempfile
@@ -732,6 +733,24 @@ class TestXPUAPISanity(TestCase):
         if not torch.xpu._is_compiled():
             self.assertEqual(len(torch.xpu.get_arch_list()), 0)
 
+    def test_torch_config_for_xpu(self):
+        config = torch.__config__.show()
+        value = re.search(r"USE_XPU=([^,]+)", config)
+        self.assertIsNotNone(value)
+        if torch.xpu._is_compiled():
+            self.assertTrue(value.group(1) in ["ON", "1"])
+            value = re.search(r"USE_XCCL=([^,]+)", config)
+            if torch.distributed.is_xccl_available():
+                self.assertTrue(value.group(1) in ["ON", "1"])
+            else:
+                self.assertTrue(value.group(1) in ["OFF", "0"])
+        else:
+            self.assertTrue(value.group(1) in ["OFF", "0"])
+            self.assertFalse(torch.distributed.is_xccl_available())
+            value = re.search(r"USE_XCCL=([^,]+)", config)
+            self.assertIsNotNone(value)
+            self.assertTrue(value.group(1) in ["OFF", "0"])
+
 
 if __name__ == "__main__":
     run_tests()