From 5e18bc333144473f1f10bc8a5ba05dba7950fb8a Mon Sep 17 00:00:00 2001 From: Avanish Tiwari Date: Mon, 30 Jun 2025 17:54:37 +0000 Subject: [PATCH] [PowerPC] Fixed build issue for vsx vec256 complexfloat and scaled_mm_out_cpu (#155255) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pytorch build is failing on power system from this commit ec24f8f58a74502c5a2488f5d9e85a817616dda0 ***Build Failure Logs*** **Error related to mkldnn** ``` pytorch/aten/src/ATen/native/Blas.cpp:302:26: error: ‘cpuinfo_has_x86_amx_int8’ was not declared in this scope 302 | if ((!mixed_dtype && cpuinfo_has_x86_amx_int8()) || | ^~~~~~~~~~~~~~~~~~~~~~~~ pytorch/aten/src/ATen/native/Blas.cpp:303:25: error: ‘cpuinfo_has_x86_amx_fp16’ was not declared in this scope 303 | (mixed_dtype && cpuinfo_has_x86_amx_fp16())) { | ^~~~~~~~~~~~~~~~~~~~~~~~ ``` **Error related to vec256 complex float redefinition** ``` aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:19:7: error: specialization of ‘at::vec::DEFAULT::Vectorized >’ after instantiation 19 | class Vectorized { | ^~~~~~~~~~~~~~~~~~~~~~ aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:19:7: error: redefinition of ‘class at::vec::DEFAULT::Vectorized >’
 aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:633:18: error: ‘const class at::vec::DEFAULT::Vectorized >’ has no member named ‘abs_2_’ 633 | auto abs_a = a.abs_2_(); | ^~~~~~ aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:634:18: error: ‘const class at::vec::DEFAULT::Vectorized >’ has no member named ‘abs_2_’ 634 | auto abs_b = b.abs_2_(); | ^~~~~~ /aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:666:17: error: ‘const class at::vec::DEFAULT::Vectorized >’ has no member named ‘vec0’ 666 | vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())}; aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:673:17: error: ‘const class at::vec::DEFAULT::Vectorized >’ has no member named ‘vec0’ 673 | vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())}; | ^~~~ aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h:680:27: error: ‘const class at::vec::DEFAULT::Vectorized >’ has no member named ‘vec0’ 680 | vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())}; ``` ***With this changes build logs*** ``` Building wheel torch-2.8.0a0+gita3098a7 -- Building version 2.8.0a0+gita3098a7 -- Checkout nccl release tag: v2.26.5-1 cmake -GNinja -DBLAS=OpenBLAS -DBUILD_PYTHON=True -DBUILD_TEST=True -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/home/avanish/OfficeWork2025/JuneWork/pytorch_5Jun/pack/torch_night_5Jun/pytorch/torch -DCMAKE_PREFIX_PATH=/home/avanish/OfficeWork2025/JuneWork/pyenv/pytorch_5Jun/lib/python3.12/site-packages -DPython_EXECUTABLE=/home/avanish/OfficeWork2025/JuneWork/pyenv/pytorch_5Jun/bin/python -DTORCH_BUILD_VERSION=2.8.0a0+gita3098a7 -DUSE_MKLDNN=ON -DUSE_MKLDNN_CBLAS=ON -DUSE_NUMPY=True -DUSE_OPENMP=ON /home/avanish/OfficeWork2025/JuneWork/pytorch_5Jun/pack/torch_night_5Jun/pytorch cmake --build . --target install --config Release running build_ext -- Building with NumPy bindings -- Not using cuDNN -- Not using CUDA -- Not using XPU -- Using MKLDNN -- Not using Compute Library for the Arm architecture with MKLDNN -- Using CBLAS in MKLDNN -- Not using NCCL -- Building with distributed package: -- USE_TENSORPIPE=True -- USE_GLOO=True -- USE_MPI=False -- Building Executorch -- Not using ITT Copying functorch._C from functorch/functorch.so to /home/avanish/OfficeWork2025/JuneWork/pytorch_5Jun/pack/torch_night_5Jun/pytorch/build/lib.linux-ppc64le-cpython-312/functorch/_C.cpython-312-powerpc64le-linux-gnu.so copying functorch/functorch.so -> /home/avanish/OfficeWork2025/JuneWork/pytorch_5Jun/pack/torch_night_5Jun/pytorch/build/lib.linux-ppc64le-cpython-312/functorch/_C.cpython-312-powerpc64le-linux-gnu.so building 'torch._C' extension creating build/temp.linux-ppc64le-cpython-312/torch/csrc ``` This patch will fix the pytorch build issue on power, and i am able to build successfully. Hi @malfet @albanD Please review this PR for pytorch build issue that we are observing on power. Pull Request resolved: https://github.com/pytorch/pytorch/pull/155255 Approved by: https://github.com/albanD, https://github.com/malfet --- .../ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h | 2 +- aten/src/ATen/native/Blas.cpp | 2 +- cmake/Modules/FindMKLDNN.cmake | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h index 705d8436edf..a6a883e53b3 100644 --- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h +++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h @@ -478,7 +478,7 @@ class Vectorized { this->store(tmp1); b.store(tmp2); - for (const auto i : c10::irange(Vectorized>::size())) { + for (const auto i : c10::irange(Vectorized>::size())) { out[i] = tmp1[i] / tmp2[i]; } return loadu(out); diff --git a/aten/src/ATen/native/Blas.cpp b/aten/src/ATen/native/Blas.cpp index 2ca484f808b..674ccf11cfb 100644 --- a/aten/src/ATen/native/Blas.cpp +++ b/aten/src/ATen/native/Blas.cpp @@ -296,7 +296,7 @@ _scaled_mm_out_cpu(const Tensor& mat1, const Tensor& mat2, std::optional out_dtype, bool use_fast_accum, Tensor& out) { -#if AT_MKLDNN_ENABLED() +#if AT_MKLDNN_ENABLED() && !defined(__powerpc__) if (at::globalContext().userEnabledMkldnn()) { bool mixed_dtype = mat1.scalar_type() != mat2.scalar_type(); if ((!mixed_dtype && cpuinfo_has_x86_amx_int8()) || diff --git a/cmake/Modules/FindMKLDNN.cmake b/cmake/Modules/FindMKLDNN.cmake index d622261455d..00fd0130d83 100644 --- a/cmake/Modules/FindMKLDNN.cmake +++ b/cmake/Modules/FindMKLDNN.cmake @@ -85,8 +85,12 @@ IF(NOT MKLDNN_FOUND) ENDIF(NOT APPLE AND NOT WIN32 AND NOT BUILD_LITE_INTERPRETER) IF(EXISTS "${MKLDNN_ROOT}/include/oneapi/dnnl/dnnl_ukernel.hpp") - MESSAGE("-- Will build oneDNN UKERNEL") - SET(DNNL_EXPERIMENTAL_UKERNEL ON CACHE BOOL "" FORCE) + IF(CPU_POWER) + SET(DNNL_EXPERIMENTAL_UKERNEL OFF CACHE BOOL "" FORCE) + ELSE() + MESSAGE("-- Will build oneDNN UKERNEL") + SET(DNNL_EXPERIMENTAL_UKERNEL ON CACHE BOOL "" FORCE) + ENDIF() ENDIF(EXISTS "${MKLDNN_ROOT}/include/oneapi/dnnl/dnnl_ukernel.hpp") FIND_PACKAGE(BLAS)