ROCm 3.5.1 image (#40385)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/40385

Differential Revision: D22421426

Pulled By: ezyang

fbshipit-source-id: 1a131cdb1a0d5ad7ccd55dc1db17cae982cc286b
This commit is contained in:
Jeff Daily 2020-07-07 15:32:11 -07:00 committed by Facebook GitHub Bot
parent 5e03a1e926
commit 38b465db27
11 changed files with 128 additions and 35 deletions

View File

@ -11,6 +11,7 @@ IMAGE_NAMES = [
"pytorch-linux-bionic-py3.6-clang9",
"pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
"pytorch-linux-bionic-py3.8-gcc9",
"pytorch-linux-bionic-rocm3.5.1-py3.6",
"pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7",
"pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7",
"pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
@ -27,6 +28,7 @@ IMAGE_NAMES = [
"pytorch-linux-xenial-py3.6-gcc7",
"pytorch-linux-xenial-pynightly",
"pytorch-linux-xenial-rocm3.3-py3.6",
"pytorch-linux-xenial-rocm3.5.1-py3.6",
]

View File

@ -8085,6 +8085,9 @@ workflows:
- docker_build_job:
name: "pytorch-linux-bionic-py3.8-gcc9"
image_name: "pytorch-linux-bionic-py3.8-gcc9"
- docker_build_job:
name: "pytorch-linux-bionic-rocm3.5.1-py3.6"
image_name: "pytorch-linux-bionic-rocm3.5.1-py3.6"
- docker_build_job:
name: "pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7"
image_name: "pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7"
@ -8133,6 +8136,9 @@ workflows:
- docker_build_job:
name: "pytorch-linux-xenial-rocm3.3-py3.6"
image_name: "pytorch-linux-xenial-rocm3.3-py3.6"
- docker_build_job:
name: "pytorch-linux-xenial-rocm3.5.1-py3.6"
image_name: "pytorch-linux-xenial-rocm3.5.1-py3.6"
ecr_gc:
triggers:
- schedule:
@ -8146,7 +8152,7 @@ workflows:
- ecr_gc_job:
name: ecr_gc_job_for_pytorch
project: pytorch
tags_to_keep: "271,262,256,278,282,291,300,323,327,347,389,401,402,403,405,a8006f9a-272d-4478-b137-d121c6f05c83,6e7b11da-a919-49e5-b2ba-da66e3d4bb0a,f990c76a-a798-42bb-852f-5be5006f8026,e43973a9-9d5a-4138-9181-a08a0fc55e2f,8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59,9a3986fa-7ce7-4a36-a001-3c9bef9892e2,1bc00f11-e0f3-4e5c-859f-15937dd938cd,209062ef-ab58-422a-b295-36c4eed6e906,be76e8fd-44e2-484d-b090-07e0cc3a56f0,fff7795428560442086f7b2bb6004b65245dc11a"
tags_to_keep: "271,262,256,278,282,291,300,323,327,347,389,401,402,403,405,a8006f9a-272d-4478-b137-d121c6f05c83,6e7b11da-a919-49e5-b2ba-da66e3d4bb0a,f990c76a-a798-42bb-852f-5be5006f8026,e43973a9-9d5a-4138-9181-a08a0fc55e2f,8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59,9a3986fa-7ce7-4a36-a001-3c9bef9892e2,1bc00f11-e0f3-4e5c-859f-15937dd938cd,209062ef-ab58-422a-b295-36c4eed6e906,be76e8fd-44e2-484d-b090-07e0cc3a56f0,fff7795428560442086f7b2bb6004b65245dc11a,ab1632df-fa59-40e6-8c23-98e004f61148"
requires:
- docker_for_ecr_gc_build_job
- ecr_gc_job:

View File

@ -231,6 +231,22 @@ case "$image" in
VISION=yes
ROCM_VERSION=3.3
;;
pytorch-linux-xenial-rocm3.5.1-py3.6)
ANACONDA_PYTHON_VERSION=3.6
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=3.5.1
# newer cmake version required
CMAKE_VERSION=3.6.3
;;
pytorch-linux-bionic-rocm3.5.1-py3.6)
ANACONDA_PYTHON_VERSION=3.6
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=3.5.1
;;
esac
# Set Jenkins UID and GID if running Jenkins

View File

@ -20,8 +20,12 @@ write_sccache_stub cc
write_sccache_stub c++
write_sccache_stub gcc
write_sccache_stub g++
write_sccache_stub clang
write_sccache_stub clang++
# NOTE: See specific ROCM_VERSION case below.
if [ "x$ROCM_VERSION" = x ]; then
write_sccache_stub clang
write_sccache_stub clang++
fi
if [ -n "$CUDA_VERSION" ]; then
# TODO: This is a workaround for the fact that PyTorch's FindCUDA
@ -33,3 +37,47 @@ if [ -n "$CUDA_VERSION" ]; then
printf "#!/bin/sh\nexec sccache $(which nvcc) \"\$@\"" > /opt/cache/lib/nvcc
chmod a+x /opt/cache/lib/nvcc
fi
if [ -n "$ROCM_VERSION" ]; then
# ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
# hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
# causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
# directly under /opt/rocm while also preserving the original compiler names.
# Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
# Final link in symlink chain must point back to original directory.
# Original compiler is moved one directory deeper. Wrapper replaces it.
function write_sccache_stub_rocm() {
OLDCOMP=$1
COMPNAME=$(basename $OLDCOMP)
TOPDIR=$(dirname $OLDCOMP)
WRAPPED="$TOPDIR/original/$COMPNAME"
mv "$OLDCOMP" "$WRAPPED"
printf "#!/bin/sh\nexec sccache $WRAPPED \$*" > "$OLDCOMP"
chmod a+x "$1"
}
if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
# ROCm 3.3 or earlier.
mkdir /opt/rocm/hcc/bin/original
write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
# Fix last link in symlink chain, clang points to versioned clang in prior dir
pushd /opt/rocm/hcc/bin/original
ln -s ../$(readlink clang)
popd
elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
# ROCm 3.5 and beyond.
mkdir /opt/rocm/llvm/bin/original
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
# Fix last link in symlink chain, clang points to versioned clang in prior dir
pushd /opt/rocm/llvm/bin/original
ln -s ../$(readlink clang)
popd
else
echo "Cannot find ROCm compiler."
exit 1
fi
fi

View File

@ -8,6 +8,7 @@ install_ubuntu() {
# gpg-agent is not available by default on 18.04
apt-get install -y --no-install-recommends gpg-agent
fi
apt-get install -y kmod
apt-get install -y wget
apt-get install -y libopenblas-dev
@ -35,6 +36,15 @@ install_ubuntu() {
rocprofiler-dev \
roctracer-dev
# precompiled miopen kernels added in ROCm 3.5; search for all unversioned packages
# if search fails it will abort this script; use true to avoid case where search fails
MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
if [[ "x${MIOPENKERNELS}" = x ]]; then
echo "miopenkernels package not available"
else
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
@ -43,6 +53,7 @@ install_ubuntu() {
install_centos() {
yum update -y
yum install -y kmod
yum install -y wget
yum install -y openblas-devel

View File

@ -57,6 +57,7 @@ ENV PATH /opt/rocm/bin:$PATH
ENV PATH /opt/rocm/hcc/bin:$PATH
ENV PATH /opt/rocm/hip/bin:$PATH
ENV PATH /opt/rocm/opencl/bin:$PATH
ENV PATH /opt/rocm/llvm/bin:$PATH
ENV HIP_PLATFORM hcc
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

View File

@ -11,7 +11,7 @@
- ecr_gc_job:
name: ecr_gc_job_for_pytorch
project: pytorch
tags_to_keep: "271,262,256,278,282,291,300,323,327,347,389,401,402,403,405,a8006f9a-272d-4478-b137-d121c6f05c83,6e7b11da-a919-49e5-b2ba-da66e3d4bb0a,f990c76a-a798-42bb-852f-5be5006f8026,e43973a9-9d5a-4138-9181-a08a0fc55e2f,8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59,9a3986fa-7ce7-4a36-a001-3c9bef9892e2,1bc00f11-e0f3-4e5c-859f-15937dd938cd,209062ef-ab58-422a-b295-36c4eed6e906,be76e8fd-44e2-484d-b090-07e0cc3a56f0,fff7795428560442086f7b2bb6004b65245dc11a"
tags_to_keep: "271,262,256,278,282,291,300,323,327,347,389,401,402,403,405,a8006f9a-272d-4478-b137-d121c6f05c83,6e7b11da-a919-49e5-b2ba-da66e3d4bb0a,f990c76a-a798-42bb-852f-5be5006f8026,e43973a9-9d5a-4138-9181-a08a0fc55e2f,8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59,9a3986fa-7ce7-4a36-a001-3c9bef9892e2,1bc00f11-e0f3-4e5c-859f-15937dd938cd,209062ef-ab58-422a-b295-36c4eed6e906,be76e8fd-44e2-484d-b090-07e0cc3a56f0,fff7795428560442086f7b2bb6004b65245dc11a,ab1632df-fa59-40e6-8c23-98e004f61148"
requires:
- docker_for_ecr_gc_build_job
- ecr_gc_job:

View File

@ -260,9 +260,4 @@ fi
# Install ONNX into a local directory
pip install --user -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx"
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
# runtime compilation of MIOpen kernels manages to crash sccache - hence undo the wrapping
bash tools/amd_build/unwrap_clang.sh
fi
report_compile_cache_stats

View File

@ -159,9 +159,6 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
python tools/amd_build/build_amd.py
python setup.py install --user
# runtime compilation of MIOpen kernels manages to crash sccache - hence undo the wrapping
bash tools/amd_build/unwrap_clang.sh
exit 0
fi

View File

@ -14,20 +14,46 @@ if [ -z "${SCCACHE}" ]; then
exit 1
fi
# If rocm build, if hcc file exists then use hcc else clang(hip-clang) for sccache
if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
HIPCOM_DEST_PATH="$(readlink -f /opt/rocm/hcc/bin/hcc )"
else
HIPCOM_DEST_PATH="$(readlink -f /opt/rocm/llvm/bin/clang )"
fi
HIPCOM_REAL_BINARY="$(dirname $HIPCOM_DEST_PATH)/hipcompiler_original"
mv "$HIPCOM_DEST_PATH" "$HIPCOM_REAL_BINARY"
# ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
# hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
# causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
# directly under /opt/rocm while also preserving the original compiler names.
# Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
# Final link in symlink chain must point back to original directory.
# Create sccache wrapper.
(
echo "#!/bin/sh"
echo "exec $SCCACHE $HIPCOM_REAL_BINARY \"\$@\""
) > "$HIPCOM_DEST_PATH"
chmod +x "$HIPCOM_DEST_PATH"
# Original compiler is moved one directory deeper. Wrapper replaces it.
function write_sccache_stub_rocm() {
OLDCOMP=$1
COMPNAME=$(basename $OLDCOMP)
TOPDIR=$(dirname $OLDCOMP)
WRAPPED="$TOPDIR/original/$COMPNAME"
mv "$OLDCOMP" "$WRAPPED"
printf "#!/bin/sh\nexec sccache $WRAPPED \$*" > "$OLDCOMP"
chmod a+x "$1"
}
if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
# ROCm 3.3 or earlier.
mkdir /opt/rocm/hcc/bin/original
write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
# Fix last link in symlink chain, clang points to versioned clang in prior dir
pushd /opt/rocm/hcc/bin/original
ln -s ../$(readlink clang)
popd
elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
# ROCm 3.5 and beyond.
mkdir /opt/rocm/llvm/bin/original
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
# Fix last link in symlink chain, clang points to versioned clang in prior dir
pushd /opt/rocm/llvm/bin/original
ln -s ../$(readlink clang)
popd
else
echo "Cannot find ROCm compiler."
exit 1
fi
fi

View File

@ -1,9 +0,0 @@
#!/bin/bash
shopt -s extglob
ORIG_COMP=/opt/rocm/hcc/bin/clang-*_original
# note that the wrapping always names the compiler "clang-7.0_original"
if [ -e $ORIG_COMP ]; then
WRAPPED=/opt/rocm/hcc/bin/clang-?([0-9])?([0-9])[0-9]
sudo mv $ORIG_COMP $WRAPPED
fi