mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
add missing NEON {vld1,vst1}_*_x2 intrinsics (#43683)
Summary: Workaround for issue https://github.com/pytorch/pytorch/issues/43265. Add the missing intrinsics until gcc-7 gets the missing patches backported. Fixes https://github.com/pytorch/pytorch/issues/43265. Pull Request resolved: https://github.com/pytorch/pytorch/pull/43683 Reviewed By: albanD Differential Revision: D23467867 Pulled By: malfet fbshipit-source-id: 7c138dd3de3c45852a60f2cfe8b4d7f7cf76bc7e
This commit is contained in:
parent
137a4fcc3b
commit
c259146477
|
|
@ -609,6 +609,21 @@ if(USE_ASAN)
|
|||
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fsanitize=address")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
include(CheckCSourceCompiles)
|
||||
check_c_source_compiles("#include <arm_neon.h>
|
||||
int main() {
|
||||
float a[] = {1.0, 1.0};
|
||||
vld1q_f32_x2(a);
|
||||
return 0;
|
||||
}" HAS_VLD1)
|
||||
|
||||
if(NOT HAS_VLD1)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
# Add code coverage flags to supported compilers
|
||||
if(CODE_COVERAGE)
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@
|
|||
#elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__aarch64__))
|
||||
/* GCC-compatible compiler, targeting ARM with NEON */
|
||||
#include <arm_neon.h>
|
||||
#if defined (MISSING_ARM_VLD1)
|
||||
#include <ATen/cpu/vec256/missing_vld1_neon.h>
|
||||
#endif
|
||||
#elif defined(__GNUC__) && defined(__IWMMXT__)
|
||||
/* GCC-compatible compiler, targeting ARM with WMMX */
|
||||
#include <mmintrin.h>
|
||||
|
|
|
|||
452
aten/src/ATen/cpu/vec256/missing_vld1_neon.h
Normal file
452
aten/src/ATen/cpu/vec256/missing_vld1_neon.h
Normal file
|
|
@ -0,0 +1,452 @@
|
|||
/* Workaround for missing vld1_*_x2 and vst1_*_x2 intrinsics in gcc-7. */
|
||||
|
||||
__extension__ extern __inline uint8x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u8_x2 (const uint8_t *__a)
|
||||
{
|
||||
uint8x8x2_t ret;
|
||||
asm ("ld1 {%S0.8b - %T0.8b}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s8_x2 (const int8_t *__a)
|
||||
{
|
||||
int8x8x2_t ret;
|
||||
asm ("ld1 {%S0.8b - %T0.8b}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u16_x2 (const uint16_t *__a)
|
||||
{
|
||||
uint16x4x2_t ret;
|
||||
asm ("ld1 {%S0.4h - %T0.4h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s16_x2 (const int16_t *__a)
|
||||
{
|
||||
int16x4x2_t ret;
|
||||
asm ("ld1 {%S0.4h - %T0.4h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u32_x2 (const uint32_t *__a)
|
||||
{
|
||||
uint32x2x2_t ret;
|
||||
asm ("ld1 {%S0.2s - %T0.2s}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s32_x2 (const int32_t *__a)
|
||||
{
|
||||
int32x2x2_t ret;
|
||||
asm ("ld1 {%S0.2s - %T0.2s}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x1x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_u64_x2 (const uint64_t *__a)
|
||||
{
|
||||
uint64x1x2_t ret;
|
||||
asm ("ld1 {%S0.1d - %T0.1d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x1x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_s64_x2 (const int64_t *__a)
|
||||
{
|
||||
int64x1x2_t ret;
|
||||
__builtin_aarch64_simd_oi __o;
|
||||
asm ("ld1 {%S0.1d - %T0.1d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_f16_x2 (const float16_t *__a)
|
||||
{
|
||||
float16x4x2_t ret;
|
||||
asm ("ld1 {%S0.4h - %T0.4h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_f32_x2 (const float32_t *__a)
|
||||
{
|
||||
float32x2x2_t ret;
|
||||
asm ("ld1 {%S0.2s - %T0.2s}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x1x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_f64_x2 (const float64_t *__a)
|
||||
{
|
||||
float64x1x2_t ret;
|
||||
asm ("ld1 {%S0.1d - %T0.1d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_p8_x2 (const poly8_t *__a)
|
||||
{
|
||||
poly8x8x2_t ret;
|
||||
asm ("ld1 {%S0.8b - %T0.8b}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_p16_x2 (const poly16_t *__a)
|
||||
{
|
||||
poly16x4x2_t ret;
|
||||
asm ("ld1 {%S0.4h - %T0.4h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x1x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1_p64_x2 (const poly64_t *__a)
|
||||
{
|
||||
poly64x1x2_t ret;
|
||||
asm ("ld1 {%S0.1d - %T0.1d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint8x16x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u8_x2 (const uint8_t *__a)
|
||||
{
|
||||
uint8x16x2_t ret;
|
||||
asm ("ld1 {%S0.16b - %T0.16b}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int8x16x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s8_x2 (const int8_t *__a)
|
||||
{
|
||||
int8x16x2_t ret;
|
||||
asm ("ld1 {%S0.16b - %T0.16b}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint16x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u16_x2 (const uint16_t *__a)
|
||||
{
|
||||
uint16x8x2_t ret;
|
||||
asm ("ld1 {%S0.8h - %T0.8h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int16x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s16_x2 (const int16_t *__a)
|
||||
{
|
||||
int16x8x2_t ret;
|
||||
asm ("ld1 {%S0.8h - %T0.8h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint32x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u32_x2 (const uint32_t *__a)
|
||||
{
|
||||
uint32x4x2_t ret;
|
||||
asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int32x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s32_x2 (const int32_t *__a)
|
||||
{
|
||||
int32x4x2_t ret;
|
||||
asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline uint64x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_u64_x2 (const uint64_t *__a)
|
||||
{
|
||||
uint64x2x2_t ret;
|
||||
asm ("ld1 {%S0.2d - %T0.2d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline int64x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_s64_x2 (const int64_t *__a)
|
||||
{
|
||||
int64x2x2_t ret;
|
||||
asm ("ld1 {%S0.2d - %T0.2d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_f16_x2 (const float16_t *__a)
|
||||
{
|
||||
float16x8x2_t ret;
|
||||
asm ("ld1 {%S0.8h - %T0.8h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_f32_x2 (const float32_t *__a)
|
||||
{
|
||||
float32x4x2_t ret;
|
||||
asm ("ld1 {%S0.4s - %T0.4s}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline float64x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_f64_x2 (const float64_t *__a)
|
||||
{
|
||||
float64x2x2_t ret;
|
||||
asm ("ld1 {%S0.2d - %T0.2d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly8x16x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_p8_x2 (const poly8_t *__a)
|
||||
{
|
||||
poly8x16x2_t ret;
|
||||
asm ("ld1 {%S0.16b - %T0.16b}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly16x8x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_p16_x2 (const poly16_t *__a)
|
||||
{
|
||||
poly16x8x2_t ret;
|
||||
asm ("ld1 {%S0.8h - %T0.8h}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__extension__ extern __inline poly64x2x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vld1q_p64_x2 (const poly64_t *__a)
|
||||
{
|
||||
poly64x2x2_t ret;
|
||||
asm ("ld1 {%S0.2d - %T0.2d}, [%1]" : "=w" (ret) : "r"(__a) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* vst1x2 */
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_s64_x2 (int64_t * __a, int64x1x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.1d - %T0.1d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_u64_x2 (uint64_t * __a, uint64x1x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.1d - %T0.1d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_f64_x2 (float64_t * __a, float64x1x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.1d - %T0.1d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_s8_x2 (int8_t * __a, int8x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8b - %T0.8b}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_p8_x2 (poly8_t * __a, poly8x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8b - %T0.8b}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_s16_x2 (int16_t * __a, int16x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4h - %T0.4h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_p16_x2 (poly16_t * __a, poly16x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4h - %T0.4h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_s32_x2 (int32_t * __a, int32x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2s - %T0.2s}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_u8_x2 (uint8_t * __a, uint8x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8b - %T0.8b}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_u16_x2 (uint16_t * __a, uint16x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4h - %T0.4h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_u32_x2 (uint32_t * __a, uint32x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2s - %T0.2s}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_f16_x2 (float16_t * __a, float16x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4h - %T0.4h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_f32_x2 (float32_t * __a, float32x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2s - %T0.2s}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1_p64_x2 (poly64_t * __a, poly64x1x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.1d - %T0.1d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s8_x2 (int8_t * __a, int8x16x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.16b - %T0.16b}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.16b - %T0.16b}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s16_x2 (int16_t * __a, int16x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8h - %T0.8h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8h - %T0.8h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s32_x2 (int32_t * __a, int32x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4s - %T0.4s}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_s64_x2 (int64_t * __a, int64x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2d - %T0.2d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.16b - %T0.16b}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8h - %T0.8h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4s - %T0.4s}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2d - %T0.2d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_f16_x2 (float16_t * __a, float16x8x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.8h - %T0.8h}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_f32_x2 (float32_t * __a, float32x4x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.4s - %T0.4s}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_f64_x2 (float64_t * __a, float64x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2d - %T0.2d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t val)
|
||||
{
|
||||
asm ("st1 {%S0.2d - %T0.2d}, [%1]" :: "w" (val), "r"(__a) :);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user