From 2bd5bfa3cee336b4ae703bda6a2c3dc32f2f11f6 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Fri, 28 Mar 2025 19:28:50 +0000 Subject: [PATCH] [ROCm] use magma-rocm tarball for CI/CD (#149986) Follow-up to #149902. Pull Request resolved: https://github.com/pytorch/pytorch/pull/149986 Approved by: https://github.com/malfet --- .ci/docker/centos-rocm/Dockerfile | 2 +- .ci/docker/common/install_rocm_magma.sh | 68 +++++++++---------------- .ci/docker/libtorch/Dockerfile | 3 +- .ci/docker/libtorch/build.sh | 2 +- .ci/docker/manywheel/Dockerfile | 2 +- .ci/docker/manywheel/Dockerfile_2_28 | 2 +- .ci/docker/ubuntu-rocm/Dockerfile | 2 +- 7 files changed, 30 insertions(+), 51 deletions(-) diff --git a/.ci/docker/centos-rocm/Dockerfile b/.ci/docker/centos-rocm/Dockerfile index af9b0c89e38..044a443086f 100644 --- a/.ci/docker/centos-rocm/Dockerfile +++ b/.ci/docker/centos-rocm/Dockerfile @@ -68,7 +68,7 @@ COPY ./common/install_rocm.sh install_rocm.sh RUN bash ./install_rocm.sh RUN rm install_rocm.sh COPY ./common/install_rocm_magma.sh install_rocm_magma.sh -RUN bash ./install_rocm_magma.sh +RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} RUN rm install_rocm_magma.sh COPY ./common/install_amdsmi.sh install_amdsmi.sh RUN bash ./install_amdsmi.sh diff --git a/.ci/docker/common/install_rocm_magma.sh b/.ci/docker/common/install_rocm_magma.sh index 5ab15a56352..207a3b1d598 100644 --- a/.ci/docker/common/install_rocm_magma.sh +++ b/.ci/docker/common/install_rocm_magma.sh @@ -1,50 +1,28 @@ -#!/bin/bash -# Script used in CI and CD pipeline +#!/usr/bin/env bash +# Script used only in CD pipeline -set -ex +set -eou pipefail -# Magma build scripts need `python` -ln -sf /usr/bin/python3 /usr/bin/python +function do_install() { + rocm_version=$1 + rocm_version_nodot=${1//./} -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -case "$ID" in - almalinux) - yum install -y gcc-gfortran - ;; - *) - echo "No preinstalls to build magma..." - ;; -esac + # Version 2.7.2 + ROCm related updates + MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 + magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" -MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION} + rocm_dir="/opt/rocm" + ( + set -x + tmp_dir=$(mktemp -d) + pushd ${tmp_dir} + curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive} + tar -xvf "${magma_archive}" + mkdir -p "${rocm_dir}/magma" + mv include "${rocm_dir}/magma/include" + mv lib "${rocm_dir}/magma/lib" + popd + ) +} -# "install" hipMAGMA into /opt/rocm/magma by copying after build -git clone https://bitbucket.org/icl/magma.git -pushd magma - -# Version 2.7.2 + ROCm related updates -git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6 - -cp make.inc-examples/make.inc.hip-gcc-mkl make.inc -echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc -if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then - echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc -fi -echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc -echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc -export PATH="${PATH}:/opt/rocm/bin" -if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then - amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'` -else - amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs` -fi -for arch in $amdgpu_targets; do - echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc -done -# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition -sed -i 's/^FOPENMP/#FOPENMP/g' make.inc -make -f make.gen.hipMAGMA -j $(nproc) -LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}" -make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}" -popd -mv magma /opt/rocm +do_install $1 diff --git a/.ci/docker/libtorch/Dockerfile b/.ci/docker/libtorch/Dockerfile index b83071b25aa..f9ae32ad7f8 100644 --- a/.ci/docker/libtorch/Dockerfile +++ b/.ci/docker/libtorch/Dockerfile @@ -72,6 +72,7 @@ RUN bash ./install_magma.sh 12.8 RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda FROM cpu as rocm +ARG ROCM_VERSION ARG PYTORCH_ROCM_ARCH ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} ENV MKLROOT /opt/intel @@ -90,7 +91,7 @@ RUN apt-get update -y && \ apt-get clean RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh -RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh +RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh FROM ${BASE_TARGET} as final COPY --from=openssl /opt/openssl /opt/openssl diff --git a/.ci/docker/libtorch/build.sh b/.ci/docker/libtorch/build.sh index fd9932f8def..68f278e10dd 100755 --- a/.ci/docker/libtorch/build.sh +++ b/.ci/docker/libtorch/build.sh @@ -40,7 +40,7 @@ case ${GPU_ARCH_TYPE} in DOCKER_TAG=rocm${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" - DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" + DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}" ;; *) echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}" diff --git a/.ci/docker/manywheel/Dockerfile b/.ci/docker/manywheel/Dockerfile index 04298fd0ed0..d7daf989b49 100644 --- a/.ci/docker/manywheel/Dockerfile +++ b/.ci/docker/manywheel/Dockerfile @@ -195,6 +195,6 @@ RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh # cmake3 is needed for the MIOpen build RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3 ADD ./common/install_rocm_magma.sh install_rocm_magma.sh -RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh +RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh ADD ./common/install_miopen.sh install_miopen.sh RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh diff --git a/.ci/docker/manywheel/Dockerfile_2_28 b/.ci/docker/manywheel/Dockerfile_2_28 index e63657f391b..e3ac65f5ca2 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28 +++ b/.ci/docker/manywheel/Dockerfile_2_28 @@ -158,7 +158,7 @@ ADD ./common/install_rocm_drm.sh install_rocm_drm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh ENV MKLROOT /opt/intel ADD ./common/install_rocm_magma.sh install_rocm_magma.sh -RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh +RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh ADD ./common/install_miopen.sh install_miopen.sh RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh diff --git a/.ci/docker/ubuntu-rocm/Dockerfile b/.ci/docker/ubuntu-rocm/Dockerfile index 7ad78861af2..9cf0bed2420 100644 --- a/.ci/docker/ubuntu-rocm/Dockerfile +++ b/.ci/docker/ubuntu-rocm/Dockerfile @@ -63,7 +63,7 @@ COPY ./common/install_rocm.sh install_rocm.sh RUN bash ./install_rocm.sh RUN rm install_rocm.sh COPY ./common/install_rocm_magma.sh install_rocm_magma.sh -RUN bash ./install_rocm_magma.sh +RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} RUN rm install_rocm_magma.sh ADD ./common/install_miopen.sh install_miopen.sh RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh