diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index 51578fbfb77..a99e5f8f656 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -13,49 +13,6 @@ def list_dir(path: str) -> list[str]: return check_output(["ls", "-1", path]).decode().split("\n") -def build_ArmComputeLibrary() -> None: - """ - Using ArmComputeLibrary for aarch64 PyTorch - """ - print("Building Arm Compute Library") - acl_build_flags = [ - "debug=0", - "neon=1", - "opencl=0", - "os=linux", - "openmp=1", - "cppthreads=0", - "arch=armv8a", - "multi_isa=1", - "fixed_format_kernels=1", - "build=native", - ] - acl_install_dir = "/acl" - acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary") - if os.path.isdir(acl_install_dir): - shutil.rmtree(acl_install_dir) - if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)): - check_call( - [ - "git", - "clone", - "https://github.com/ARM-software/ComputeLibrary.git", - "-b", - "v25.02", - "--depth", - "1", - "--shallow-submodules", - ] - ) - - check_call( - ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags, - cwd=acl_checkout_dir, - ) - for d in ["arm_compute", "include", "utils", "support", "src", "build"]: - shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") - - def replace_tag(filename) -> None: with open(filename) as f: lines = f.readlines() @@ -356,19 +313,13 @@ if __name__ == "__main__": build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " if enable_mkldnn: - build_ArmComputeLibrary() print("build pytorch with mkldnn+acl backend") - build_vars += ( - "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " - "ACL_ROOT_DIR=/acl " - "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " - "ACL_INCLUDE_DIR=/acl/build " - "ACL_LIBRARY=/acl/build " - ) + build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " + build_vars += "ACL_ROOT_DIR=/acl " if enable_cuda: build_vars += "BLAS=NVPL " else: - build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " + build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS " else: print("build pytorch without mkldnn backend") diff --git a/.ci/aarch64_linux/build_aarch64_wheel.py b/.ci/aarch64_linux/build_aarch64_wheel.py index 37fc488c5b3..a157ec57b57 100755 --- a/.ci/aarch64_linux/build_aarch64_wheel.py +++ b/.ci/aarch64_linux/build_aarch64_wheel.py @@ -299,40 +299,6 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: ) -def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: - print("Building OpenBLAS") - host.run_cmd( - f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}" - ) - make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" - host.run_cmd( - f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS" - ) - - -def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: - print("Building Arm Compute Library") - acl_build_flags = " ".join( - [ - "debug=0", - "neon=1", - "opencl=0", - "os=linux", - "openmp=1", - "cppthreads=0", - "arch=armv8a", - "multi_isa=1", - "fixed_format_kernels=1", - "build=native", - ] - ) - host.run_cmd( - f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}" - ) - - host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") - - def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: host.run_cmd("pip3 install auditwheel") host.run_cmd( @@ -700,7 +666,6 @@ def start_build( configure_system( host, compiler=compiler, use_conda=use_conda, python_version=python_version ) - build_OpenBLAS(host, git_clone_flags) if host.using_docker(): print("Move libgfortant.a into a standard location") @@ -723,6 +688,8 @@ def start_build( f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}" ) + host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh") + print("Building PyTorch wheel") build_opts = "" if pytorch_build_number is not None: @@ -743,16 +710,18 @@ def start_build( if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" if enable_mkldnn: - build_ArmComputeLibrary(host, git_clone_flags) + host.run_cmd("pytorch/.ci/docker/common/install_acl.sh") print("build pytorch with mkldnn+acl backend") build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" + build_vars += " BLAS=OpenBLAS" + build_vars += " OpenBLAS_HOME=/opt/OpenBLAS" + build_vars += " ACL_ROOT_DIR=/acl" host.run_cmd( - f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && " - f"{build_vars} python3 -m build --wheel --no-isolation{build_opts}" + f"cd $HOME/pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}" ) print("Repair the wheel") pytorch_wheel_name = host.list_dir("pytorch/dist")[0] - ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib" + ld_library_path = "/acl/build:$HOME/pytorch/build/lib" host.run_cmd( f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}" ) @@ -908,7 +877,7 @@ def terminate_instances(instance_type: str) -> None: def parse_arguments(): from argparse import ArgumentParser - parser = ArgumentParser("Builid and test AARCH64 wheels using EC2") + parser = ArgumentParser("Build and test AARCH64 wheels using EC2") parser.add_argument("--key-name", type=str) parser.add_argument("--debug", action="store_true") parser.add_argument("--build-only", action="store_true") diff --git a/.ci/docker/common/install_acl.sh b/.ci/docker/common/install_acl.sh old mode 100644 new mode 100755 index bf41a03b280..0b865e5bc6f --- a/.ci/docker/common/install_acl.sh +++ b/.ci/docker/common/install_acl.sh @@ -1,16 +1,27 @@ -set -euo pipefail +#!/bin/bash +# Script used only in CD pipeline -readonly version=v25.02 -readonly src_host=https://github.com/ARM-software -readonly src_repo=ComputeLibrary +set -eux + +ACL_VERSION=${ACL_VERSION:-"v25.02"} +ACL_INSTALL_DIR="/acl" # Clone ACL -[[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git -cd ${src_repo} - -git checkout $version +git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules +ACL_CHECKOUT_DIR="ComputeLibrary" # Build with scons +pushd $ACL_CHECKOUT_DIR scons -j8 Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \ os=linux arch=armv8a build=native multi_isa=1 \ fixed_format_kernels=1 openmp=1 cppthreads=0 +popd + +# Install ACL +sudo mkdir -p ${ACL_INSTALL_DIR} +for d in arm_compute include utils support src build +do + sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d} +done + +rm -rf $ACL_CHECKOUT_DIR \ No newline at end of file diff --git a/.ci/docker/common/install_openblas.sh b/.ci/docker/common/install_openblas.sh old mode 100644 new mode 100755 index 3c795acf222..2f386c6bd52 --- a/.ci/docker/common/install_openblas.sh +++ b/.ci/docker/common/install_openblas.sh @@ -3,8 +3,10 @@ set -ex -cd / -git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION:-v0.3.30}" --depth 1 --shallow-submodules +OPENBLAS_VERSION=${OPENBLAS_VERSION:-"v0.3.30"} + +# Clone OpenBLAS +git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" --depth 1 --shallow-submodules OPENBLAS_CHECKOUT_DIR="OpenBLAS" OPENBLAS_BUILD_FLAGS=" @@ -17,5 +19,7 @@ CFLAGS=-O3 BUILD_BFLOAT16=1 " -make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR} -make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR} +make -j8 ${OPENBLAS_BUILD_FLAGS} -C $OPENBLAS_CHECKOUT_DIR +sudo make install -C $OPENBLAS_CHECKOUT_DIR + +rm -rf $OPENBLAS_CHECKOUT_DIR \ No newline at end of file diff --git a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 b/.ci/docker/manywheel/Dockerfile_2_28_aarch64 index da7ab4d3fd1..5ff4d98e51a 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 +++ b/.ci/docker/manywheel/Dockerfile_2_28_aarch64 @@ -62,6 +62,13 @@ ARG OPENBLAS_VERSION ADD ./common/install_openblas.sh install_openblas.sh RUN bash ./install_openblas.sh && rm install_openblas.sh +# Install Arm Compute Library +FROM base as arm_compute +# use python3.9 to install scons +RUN python3.9 -m pip install scons==4.7.0 +RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin +COPY ./common/install_acl.sh install_acl.sh +RUN bash ./install_acl.sh && rm install_acl.sh FROM base as final # remove unnecessary python versions @@ -70,4 +77,5 @@ RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/ -ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH +COPY --from=arm_compute /acl /acl +ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:/acl/build/:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/.ci/docker/manywheel/Dockerfile_cuda_aarch64 b/.ci/docker/manywheel/Dockerfile_cuda_aarch64 index 36970605573..e692d378c02 100644 --- a/.ci/docker/manywheel/Dockerfile_cuda_aarch64 +++ b/.ci/docker/manywheel/Dockerfile_cuda_aarch64 @@ -86,6 +86,15 @@ FROM base as nvpl ADD ./common/install_nvpl.sh install_nvpl.sh RUN bash ./install_nvpl.sh && rm install_nvpl.sh +# Install Arm Compute Library +FROM base as arm_compute +# use python3.9 to install scons +RUN python3.9 -m pip install scons==4.7.0 +RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin +COPY ./common/install_acl.sh install_acl.sh +RUN bash ./install_acl.sh && rm install_acl.sh +FROM base as final + FROM final as cuda_final ARG BASE_CUDA_VERSION RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} @@ -93,5 +102,7 @@ COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BAS COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=nvpl /opt/nvpl/lib/ /usr/local/lib/ COPY --from=nvpl /opt/nvpl/include/ /usr/local/include/ +COPY --from=arm_compute /acl /acl RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda ENV PATH=/usr/local/cuda/bin:$PATH +ENV LD_LIBRARY_PATH=/acl/build/:$LD_LIBRARY_PATH diff --git a/.ci/docker/manywheel/build.sh b/.ci/docker/manywheel/build.sh index 0f15ef7b3ad..ead6755e929 100755 --- a/.ci/docker/manywheel/build.sh +++ b/.ci/docker/manywheel/build.sh @@ -28,6 +28,7 @@ fi MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-} DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-} OPENBLAS_VERSION=${OPENBLAS_VERSION:-} +ACL_VERSION=${ACL_VERSION:-} case ${image} in manylinux2_28-builder:cpu) @@ -41,7 +42,6 @@ case ${image} in GPU_IMAGE=arm64v8/almalinux:8 DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1" MANY_LINUX_VERSION="2_28_aarch64" - OPENBLAS_VERSION="v0.3.30" ;; manylinuxs390x-builder:cpu-s390x) TARGET=final @@ -119,7 +119,8 @@ tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') DOCKER_BUILDKIT=1 docker build \ ${DOCKER_GPU_BUILD_ARG} \ --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ - --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION}" \ + --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION:-}" \ + --build-arg "ACL_VERSION=${ACL_VERSION:-}" \ --target "${TARGET}" \ -t "${tmp_tag}" \ $@ \ diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh index c08d9fce903..04882acd0f8 100755 --- a/.ci/pytorch/build.sh +++ b/.ci/pytorch/build.sh @@ -89,7 +89,7 @@ fi if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then export USE_MKLDNN=1 export USE_MKLDNN_ACL=1 - export ACL_ROOT_DIR=/ComputeLibrary + export ACL_ROOT_DIR=/acl fi if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then