Update OpenBLAS commit (#151547)

Motivation: Update OpenBLAS and change build script to enable SBGEMM kernels . Update pytorch `jammy` builds for aarch64 to use `install_openblas.sh` instead of `conda_install` Link to full [TorchInductor Performance Dashboard AArch64](https://hud.pytorch.org/benchmark/compilers?dashboard=torchinductor&startTime=Wed%2C%2016%20Apr%202025%2009%3A35%3A26%20GMT&stopTime=Thu%2C%2017%20Apr%202025%2009%3A35%3A26%20GMT&granularity=hour&mode=inference&dtype=bfloat16&deviceName=cpu%20(aarch64)&lBranch=adi/update_openblas&lCommit=90701ab81bf61fd864d31e0aa7e88d97a1a8676c&rBranch=main&rCommit=40ce4fb24a536d175348df876f61956d4945778e) 1. This shows a promising speedup across most of the HF models in benchmark, specifically giving a significant boost to SDPA layers. 2. Overall torch-bench pass-rate increased `[87%, 65/75 → 96%, 72/75]` <img width="676" alt="Screenshot 2025-04-17 at 10 32 10" src="https://github.com/user-attachments/assets/a92dce0c-ecee-4466-8175-065df664dd71" /> Pull Request resolved: https://github.com/pytorch/pytorch/pull/151547 Approved by: https://github.com/malfet
2025-12-06 12:20:52 +01:00 · 2025-04-27 15:55:42 +00:00 · 2025-04-27 15:55:42 +00:00 · c4b0854750
commit c4b0854750
parent bb680b5a87
4 changed files with 16 additions and 7 deletions
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -371,6 +371,7 @@ case "$image" in
    ACL=yes
    VISION=yes
    CONDA_CMAKE=yes
+    OPENBLAS=yes
    # snadampal: skipping llvm src build install because the current version
    # from pytorch/llvm:9.0.1 is x86 specific
    SKIP_LLVM_SRC_BUILD_INSTALL=yes
@ -381,6 +382,7 @@ case "$image" in
    ACL=yes
    VISION=yes
    CONDA_CMAKE=yes
+    OPENBLAS=yes
    # snadampal: skipping llvm src build install because the current version
    # from pytorch/llvm:9.0.1 is x86 specific
    SKIP_LLVM_SRC_BUILD_INSTALL=yes
@ -482,6 +484,7 @@ docker build \
       --build-arg "HALIDE=${HALIDE}" \
       --build-arg "XPU_VERSION=${XPU_VERSION}" \
       --build-arg "ACL=${ACL:-}" \
+       --build-arg "OPENBLAS=${OPENBLAS:-}" \
       --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
       --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
--- a/.ci/docker/common/install_conda.sh
+++ b/.ci/docker/common/install_conda.sh
@ -65,9 +65,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge

  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
-  if [[ $(uname -m) == "aarch64" ]]; then
-    conda_install "openblas==0.3.29=*openmp*"
-  else
+  if [[ $(uname -m) != "aarch64" ]]; then
    conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
  fi

--- a/.ci/docker/common/install_openblas.sh
+++ b/.ci/docker/common/install_openblas.sh
@ -4,8 +4,11 @@
 set -ex

 cd /
-git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29 --depth 1 --shallow-submodules
-
+OPENBLAS_HASH="b30dc9701f8e971720a02e24068acea274fd9cee" #Use SVE kernel for S/DGEMVT for SVE machines
+OPENBLAS_CHECKOUT_DIR="OpenBLAS"
+git clone https://github.com/OpenMathLib/OpenBLAS.git -b develop --shallow-submodules
+git -C $OPENBLAS_CHECKOUT_DIR fetch --depth 1 origin $OPENBLAS_HASH
+git -C $OPENBLAS_CHECKOUT_DIR checkout $OPENBLAS_HASH

 OPENBLAS_BUILD_FLAGS="
 NUM_THREADS=128
@ -14,9 +17,8 @@ NO_SHARED=0
 DYNAMIC_ARCH=1
 TARGET=ARMV8
 CFLAGS=-O3
+BUILD_BFLOAT16=1
 "

-OPENBLAS_CHECKOUT_DIR="OpenBLAS"
-
 make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR}
 make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR}
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -154,6 +154,12 @@ RUN if [ -n "${ACL}" ]; then bash ./install_acl.sh; fi
 RUN rm install_acl.sh
 ENV INSTALLED_ACL ${ACL}

+ARG OPENBLAS
+COPY ./common/install_openblas.sh install_openblas.sh
+RUN if [ -n "${OPENBLAS}" ]; then bash ./install_openblas.sh; fi
+RUN rm install_openblas.sh
+ENV INSTALLED_OPENBLAS ${OPENBLA}
+
 # Install ccache/sccache (do this last, so we get priority in PATH)
 ARG SKIP_SCCACHE_INSTALL
 COPY ./common/install_cache.sh install_cache.sh