mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[QNNPACK] Add more unaligned attributes (#91208)
Summary: Bypass "Runtime error: store to misaligned address [...] for type 'uint16_t' (aka 'unsigned short'), which requires 2 byte alignment" for q8conv. Reviewed By: scramsby Differential Revision: D42179009 Pull Request resolved: https://github.com/pytorch/pytorch/pull/91208 Approved by: https://github.com/kimishpatel
This commit is contained in:
parent
a274b5b99e
commit
37ea99cd25
|
|
@ -337,14 +337,15 @@ void pytorch_q8conv_ukernel_4x4c2__sse2(
|
|||
(uint32_t)_mm_cvtsi128_si32(_mm_unpackhi_epi32(vout, vout));
|
||||
*((uint32_t*)c3) = (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(vout, 12));
|
||||
} else {
|
||||
typedef PYTORCH_QNNP_UNALIGNED uint16_t unaligned_uint16_t;
|
||||
if (nr >= 2) {
|
||||
*((uint16_t*)c0) = (uint16_t)_mm_extract_epi16(vout, 0);
|
||||
*((unaligned_uint16_t*)c0) = (uint16_t)_mm_extract_epi16(vout, 0);
|
||||
c0 += 2;
|
||||
*((uint16_t*)c1) = (uint16_t)_mm_extract_epi16(vout, 2);
|
||||
*((unaligned_uint16_t*)c1) = (uint16_t)_mm_extract_epi16(vout, 2);
|
||||
c1 += 2;
|
||||
*((uint16_t*)c2) = (uint16_t)_mm_extract_epi16(vout, 4);
|
||||
*((unaligned_uint16_t*)c2) = (uint16_t)_mm_extract_epi16(vout, 4);
|
||||
c2 += 2;
|
||||
*((uint16_t*)c3) = (uint16_t)_mm_extract_epi16(vout, 6);
|
||||
*((unaligned_uint16_t*)c3) = (uint16_t)_mm_extract_epi16(vout, 6);
|
||||
c3 += 2;
|
||||
vout = _mm_srli_epi32(vout, 16);
|
||||
nr -= 2;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user