mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "Enable onednn in pytorch for ppc64le architecture (#143743)"
This reverts commitd4cf0e5af4. Reverted https://github.com/pytorch/pytorch/pull/143743 on behalf of https://github.com/davidberard98 due to windows build failures look related [GH job link](https://github.com/pytorch/pytorch/actions/runs/13705127978/job/38329845095) [HUD commit link](d4cf0e5af4) ([comment](https://github.com/pytorch/pytorch/pull/143743#issuecomment-2704903253))
This commit is contained in:
parent
1add61c242
commit
cf9efbdf16
|
|
@ -180,14 +180,11 @@ endif()
|
||||||
|
|
||||||
set(CPU_AARCH64 OFF)
|
set(CPU_AARCH64 OFF)
|
||||||
set(CPU_INTEL OFF)
|
set(CPU_INTEL OFF)
|
||||||
set(CPU_POWER OFF)
|
|
||||||
|
|
||||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)")
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)")
|
||||||
set(CPU_INTEL ON)
|
set(CPU_INTEL ON)
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
|
||||||
set(CPU_AARCH64 ON)
|
set(CPU_AARCH64 ON)
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)")
|
|
||||||
set(CPU_POWER ON)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
|
# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
|
||||||
|
|
@ -318,8 +315,8 @@ cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality"
|
||||||
# Ensure that an MKLDNN build is the default for x86 CPUs but optional for
|
# Ensure that an MKLDNN build is the default for x86 CPUs but optional for
|
||||||
# AArch64 (dependent on -DUSE_MKLDNN).
|
# AArch64 (dependent on -DUSE_MKLDNN).
|
||||||
cmake_dependent_option(
|
cmake_dependent_option(
|
||||||
USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, AArch64, and ppc64le."
|
USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64."
|
||||||
"${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF)
|
"${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF)
|
||||||
cmake_dependent_option(
|
cmake_dependent_option(
|
||||||
USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
|
USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
|
||||||
"USE_MKLDNN AND CPU_AARCH64" OFF)
|
"USE_MKLDNN AND CPU_AARCH64" OFF)
|
||||||
|
|
|
||||||
|
|
@ -348,8 +348,6 @@ void gemm(
|
||||||
// MKLDNN also supports ARM for bf16, and the bypass is only
|
// MKLDNN also supports ARM for bf16, and the bypass is only
|
||||||
// currently intended for x86/x86_64.
|
// currently intended for x86/x86_64.
|
||||||
const bool use_bf16_gemv_trans = false;
|
const bool use_bf16_gemv_trans = false;
|
||||||
#elif defined(__powerpc__)
|
|
||||||
const bool use_bf16_gemv_trans = false;
|
|
||||||
#else
|
#else
|
||||||
const bool bf16_gemv_trans_would_be_faster = cpuinfo_initialize() &&
|
const bool bf16_gemv_trans_would_be_faster = cpuinfo_initialize() &&
|
||||||
!cpuinfo_has_x86_avx512bf16();
|
!cpuinfo_has_x86_avx512bf16();
|
||||||
|
|
@ -380,12 +378,8 @@ void gemm(
|
||||||
// we should not bother checking for !cpuinfo_has_x86_avx512fp16() here,
|
// we should not bother checking for !cpuinfo_has_x86_avx512fp16() here,
|
||||||
// because "onednn (mkldnn) won't use avx512fp16 to compute gemms by default
|
// because "onednn (mkldnn) won't use avx512fp16 to compute gemms by default
|
||||||
// because the avx512fp16 fma would incur accuracy loss".
|
// because the avx512fp16 fma would incur accuracy loss".
|
||||||
#if defined(__powerpc__)
|
|
||||||
const bool fp16_gemv_trans_would_be_faster = false;
|
|
||||||
#else
|
|
||||||
const bool fp16_gemv_trans_would_be_faster = cpuinfo_initialize() &&
|
const bool fp16_gemv_trans_would_be_faster = cpuinfo_initialize() &&
|
||||||
cpuinfo_has_x86_f16c();
|
cpuinfo_has_x86_f16c();
|
||||||
#endif
|
|
||||||
const bool use_fp16_gemv_trans = fp16_gemv_trans_would_be_faster &&
|
const bool use_fp16_gemv_trans = fp16_gemv_trans_would_be_faster &&
|
||||||
transa == TransposeType::Transpose &&
|
transa == TransposeType::Transpose &&
|
||||||
transb == TransposeType::NoTranspose && n == 1 && alpha == 1.0;
|
transb == TransposeType::NoTranspose && n == 1 && alpha == 1.0;
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,7 @@
|
||||||
#include <ATen/Tensor.h>
|
#include <ATen/Tensor.h>
|
||||||
#include <ATen/native/quantized/PackedParams.h>
|
#include <ATen/native/quantized/PackedParams.h>
|
||||||
#include <ideep.hpp>
|
#include <ideep.hpp>
|
||||||
#if !defined(__powerpc__)
|
|
||||||
#include <cpuinfo.h>
|
#include <cpuinfo.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <c10/util/CallOnce.h>
|
#include <c10/util/CallOnce.h>
|
||||||
|
|
||||||
|
|
@ -433,16 +431,14 @@ inline bool should_use_onednn_quant(
|
||||||
// TODO Support more OSs.
|
// TODO Support more OSs.
|
||||||
#if !defined(__linux__)
|
#if !defined(__linux__)
|
||||||
return false;
|
return false;
|
||||||
#elif defined(__powerpc__)
|
|
||||||
bool vnni_available = true;
|
|
||||||
#else
|
#else
|
||||||
bool vnni_available = cpuinfo_has_x86_avx512vnni();
|
bool vnni_available = cpuinfo_has_x86_avx512vnni();
|
||||||
#endif
|
|
||||||
bool w_sym_quant =
|
bool w_sym_quant =
|
||||||
is_weight_symmetric_quant(weight, is_transposed_conv);
|
is_weight_symmetric_quant(weight, is_transposed_conv);
|
||||||
bool opad_all_zero =
|
bool opad_all_zero =
|
||||||
std::all_of(output_padding.begin(), output_padding.end(), [](int i) { return i==0; });
|
std::all_of(output_padding.begin(), output_padding.end(), [](int i) { return i==0; });
|
||||||
return vnni_available && (groups <= 100) && w_sym_quant && opad_all_zero;
|
return vnni_available && (groups <= 100) && w_sym_quant && opad_all_zero;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
} // onednn_utils
|
} // onednn_utils
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user