From 09f7f62cfebb0067b93d227c13fe9a94b51af762 Mon Sep 17 00:00:00 2001 From: maajidkhann Date: Sat, 15 Mar 2025 00:02:35 +0000 Subject: [PATCH] Fix atomic operation compatibility for ARMv8-A (Raspberry Pi 4) by adjusting compilation flags (#148070) **Issue:** * The ldaddal instruction is an AArch64 atomic operation available from ARMv8.1-A onwards. * Raspberry Pi 4 (Cortex-A72) is ARMv8-A, which does not support ldaddal, leading to failures when running PyTorch built with march=armv8.2-a+sve * This led to an issue when running PyTorch on ARMv8-A (Raspberry Pi 4), as unsupported atomic operations were generated. **Fix:** * Updated the build flags to explicitly use **-march=armv8-a+sve**, ensuring GCC and clang promotes it correctly and resolves compatibility issues with armv8 and still work correctly for SVE like before. * This ensures that PyTorch builds correctly for ARMv8-A platforms (e.g., Raspberry Pi 4) while still enabling SVE for supported hardware. Test plan: - Allocate `a1.4xlarge` on AWS - Run following script using wheel produced by this PR ```python import torch def f(x): return x.sin() + x.cos() print(torch.__version__) f_c = torch.jit.script(f) ``` - Observe no crash ``` $ python3 foo.py 2.7.0.dev20250313+cpu ``` - Observe crash with 2.6.0 ``` $ python3 foo.py 2.6.0+cpu Illegal instruction (core dumped) ``` Fixes #146792 Pull Request resolved: https://github.com/pytorch/pytorch/pull/148070 Approved by: https://github.com/malfet --- cmake/Codegen.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake index 06c199ef056..724d9930990 100644 --- a/cmake/Codegen.cmake +++ b/cmake/Codegen.cmake @@ -388,9 +388,9 @@ if(INTERN_BUILD_ATEN_OPS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION") list(APPEND CPU_CAPABILITY_NAMES "SVE256") if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang") - list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8.2-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256") + list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256") else() - list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8.2-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256") + list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256") endif() endif(CXX_SVE256_FOUND) endif(CXX_SVE_FOUND)