mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Use simd version for fp16 conversions (#31897)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/31897 Previous version only use avx2. The _simd version uses avx512 if CPU is capable of that. Test Plan: Unitttest Reviewed By: tracelogfb Differential Revision: D19291499 fbshipit-source-id: 3b1ee0ba756e5c9defbd5caf7f68982d9b2ca06c
This commit is contained in:
parent
1314f7f4f4
commit
8b4feff01d
|
|
@ -40,12 +40,8 @@ bool FloatToHalfOp<CPUContext>::RunOnDevice() {
|
|||
auto N = input.numel();
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
if (GetCpuId().avx2()) {
|
||||
fbgemm::FloatToFloat16_avx2(
|
||||
data, reinterpret_cast<fbgemm::float16*>(out), N, clip_);
|
||||
} else {
|
||||
FloatToFloat16_ref(data, out, N, clip_);
|
||||
}
|
||||
fbgemm::FloatToFloat16_simd(
|
||||
data, reinterpret_cast<fbgemm::float16*>(out), N, clip_);
|
||||
#else
|
||||
FloatToFloat16_ref(data, out, N, clip_);
|
||||
#endif
|
||||
|
|
@ -63,12 +59,8 @@ bool HalfToFloatOp<CPUContext>::RunOnDevice() {
|
|||
auto N = input.numel();
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
if (GetCpuId().avx2()) {
|
||||
fbgemm::Float16ToFloat_avx2(
|
||||
reinterpret_cast<const fbgemm::float16*>(data), out, N);
|
||||
} else {
|
||||
Float16ToFloat_ref(data, out, N);
|
||||
}
|
||||
fbgemm::Float16ToFloat_simd(
|
||||
reinterpret_cast<const fbgemm::float16*>(data), out, N);
|
||||
#else
|
||||
Float16ToFloat_ref(data, out, N);
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user