Enable onednn in pytorch for ppc64le architecture (#143743)

This PR will enable onednn for powerpc Architecture which will help to do quantization of the model via onednn for powerpc.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/143743
Approved by: https://github.com/malfet, https://github.com/albanD
This commit is contained in:
Tiwari-Avanish 2025-03-06 18:00:51 +00:00 committed by PyTorch MergeBot
parent 097b0d372a
commit d4cf0e5af4
3 changed files with 16 additions and 3 deletions

View File

@ -180,11 +180,14 @@ endif()
set(CPU_AARCH64 OFF) set(CPU_AARCH64 OFF)
set(CPU_INTEL OFF) set(CPU_INTEL OFF)
set(CPU_POWER OFF)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)") if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)")
set(CPU_INTEL ON) set(CPU_INTEL ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
set(CPU_AARCH64 ON) set(CPU_AARCH64 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)")
set(CPU_POWER ON)
endif() endif()
# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not # For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
@ -315,8 +318,8 @@ cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality"
# Ensure that an MKLDNN build is the default for x86 CPUs but optional for # Ensure that an MKLDNN build is the default for x86 CPUs but optional for
# AArch64 (dependent on -DUSE_MKLDNN). # AArch64 (dependent on -DUSE_MKLDNN).
cmake_dependent_option( cmake_dependent_option(
USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, AArch64, and ppc64le."
"${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF) "${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF)
cmake_dependent_option( cmake_dependent_option(
USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
"USE_MKLDNN AND CPU_AARCH64" OFF) "USE_MKLDNN AND CPU_AARCH64" OFF)

View File

@ -348,6 +348,8 @@ void gemm(
// MKLDNN also supports ARM for bf16, and the bypass is only // MKLDNN also supports ARM for bf16, and the bypass is only
// currently intended for x86/x86_64. // currently intended for x86/x86_64.
const bool use_bf16_gemv_trans = false; const bool use_bf16_gemv_trans = false;
#elif defined(__powerpc__)
const bool use_bf16_gemv_trans = false;
#else #else
const bool bf16_gemv_trans_would_be_faster = cpuinfo_initialize() && const bool bf16_gemv_trans_would_be_faster = cpuinfo_initialize() &&
!cpuinfo_has_x86_avx512bf16(); !cpuinfo_has_x86_avx512bf16();
@ -378,8 +380,12 @@ void gemm(
// we should not bother checking for !cpuinfo_has_x86_avx512fp16() here, // we should not bother checking for !cpuinfo_has_x86_avx512fp16() here,
// because "onednn (mkldnn) won't use avx512fp16 to compute gemms by default // because "onednn (mkldnn) won't use avx512fp16 to compute gemms by default
// because the avx512fp16 fma would incur accuracy loss". // because the avx512fp16 fma would incur accuracy loss".
#if defined(__powerpc__)
const bool fp16_gemv_trans_would_be_faster = false;
#else
const bool fp16_gemv_trans_would_be_faster = cpuinfo_initialize() && const bool fp16_gemv_trans_would_be_faster = cpuinfo_initialize() &&
cpuinfo_has_x86_f16c(); cpuinfo_has_x86_f16c();
#endif
const bool use_fp16_gemv_trans = fp16_gemv_trans_would_be_faster && const bool use_fp16_gemv_trans = fp16_gemv_trans_would_be_faster &&
transa == TransposeType::Transpose && transa == TransposeType::Transpose &&
transb == TransposeType::NoTranspose && n == 1 && alpha == 1.0; transb == TransposeType::NoTranspose && n == 1 && alpha == 1.0;

View File

@ -5,7 +5,9 @@
#include <ATen/Tensor.h> #include <ATen/Tensor.h>
#include <ATen/native/quantized/PackedParams.h> #include <ATen/native/quantized/PackedParams.h>
#include <ideep.hpp> #include <ideep.hpp>
#if !defined(__powerpc__)
#include <cpuinfo.h> #include <cpuinfo.h>
#endif
#include <c10/util/CallOnce.h> #include <c10/util/CallOnce.h>
@ -431,14 +433,16 @@ inline bool should_use_onednn_quant(
// TODO Support more OSs. // TODO Support more OSs.
#if !defined(__linux__) #if !defined(__linux__)
return false; return false;
#elif defined(__powerpc__)
bool vnni_available = true;
#else #else
bool vnni_available = cpuinfo_has_x86_avx512vnni(); bool vnni_available = cpuinfo_has_x86_avx512vnni();
#endif
bool w_sym_quant = bool w_sym_quant =
is_weight_symmetric_quant(weight, is_transposed_conv); is_weight_symmetric_quant(weight, is_transposed_conv);
bool opad_all_zero = bool opad_all_zero =
std::all_of(output_padding.begin(), output_padding.end(), [](int i) { return i==0; }); std::all_of(output_padding.begin(), output_padding.end(), [](int i) { return i==0; });
return vnni_available && (groups <= 100) && w_sym_quant && opad_all_zero; return vnni_available && (groups <= 100) && w_sym_quant && opad_all_zero;
#endif
} }
} // onednn_utils } // onednn_utils