diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index d7bbdebc677..f66deb221c3 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -208,7 +208,9 @@ if __name__ == "__main__": build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " # MAX_JOB=5 is not required for CPU backend (see commit 465d98b) if enable_cuda: - build_vars = "MAX_JOBS=5 " + build_vars + build_vars += "MAX_JOBS=5 " + # nvshmem is broken for aarch64 see https://github.com/pytorch/pytorch/issues/160425 + build_vars += "USE_NVSHMEM=OFF " override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") desired_cuda = os.getenv("DESIRED_CUDA") diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh index 39586faa85f..8820b4fe221 100644 --- a/.ci/manywheel/build_cuda.sh +++ b/.ci/manywheel/build_cuda.sh @@ -134,6 +134,7 @@ if [[ $CUDA_VERSION == 12* ]]; then "/usr/local/cuda/lib64/libnvrtc-builtins.so" "/usr/local/cuda/lib64/libcufile.so.0" "/usr/local/cuda/lib64/libcufile_rdma.so.1" + "/usr/local/cuda/lib64/libnvshem_host.so.3" "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12" "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so" ) @@ -152,6 +153,7 @@ if [[ $CUDA_VERSION == 12* ]]; then "libcudart.so.12" "libnvrtc.so.12" "libnvrtc-builtins.so" + "libnvshmem_host.so.3" "libcufile.so.0" "libcufile_rdma.so.1" "libcupti.so.12"