Create a new Ubunutu 22.04 (jammy) build for platform010 (#77591)

###  Motivation

In order to match the internal platform010 builds, we are creating a new config to run on PRs that uses compiler and package versions >= used in platform010. Here are the versions used in the new build

- Ubuntu 22.04 (Jammy-Jellyfish)
- Clang-12
- Python 3.8
- CUDA 11.6

### Summary of Changes

- As `nvidia/docker` images only support Cuda 11.7 with Ubuntu 22.04, we are starting with base Ubuntu 22.04 docker images, and then installing Cuda 11.6

- Fetching `install_cuda.sh` from `pytorch/builder` repo in order to install Cuda using `wget`

- `libssl-dev` has been upgraded to libssl3 in Ubuntu 22.04. Instead, we are using `include` and `lib` folders downloaded with `Openssl1.1.1`

- `Clang-12` requires `libomp-12-dev` to work with `OpenMP` which is added to the `install_base.sh` file.

- Minor fixes to handle compilation errors generated when using `clang-12`.
      - In `pow_test.cpp` adding a `static_cast` to input of `sqrt` method
      - In `vec512_qint.h`, explicitly defining `copy-assignment` operator as its implicit definition is deprecated due to
         user-declared `copy-constructor` in C++11

Pull Request resolved: https://github.com/pytorch/pytorch/pull/77591
Approved by: https://github.com/seemethere, https://github.com/atalman
This commit is contained in:
Nirav Mehta 2022-06-17 21:02:53 +00:00 committed by PyTorch MergeBot
parent f5d7e5a192
commit 71d82917f4
10 changed files with 88 additions and 12 deletions

View File

@ -54,6 +54,8 @@ elif [[ "$image" == *-bionic* ]]; then
UBUNTU_VERSION=18.04 UBUNTU_VERSION=18.04
elif [[ "$image" == *-focal* ]]; then elif [[ "$image" == *-focal* ]]; then
UBUNTU_VERSION=20.04 UBUNTU_VERSION=20.04
elif [[ "$image" == *-jammy* ]]; then
UBUNTU_VERSION=22.04
elif [[ "$image" == *ubuntu* ]]; then elif [[ "$image" == *ubuntu* ]]; then
extract_version_from_image_name ubuntu UBUNTU_VERSION extract_version_from_image_name ubuntu UBUNTU_VERSION
elif [[ "$image" == *centos* ]]; then elif [[ "$image" == *centos* ]]; then
@ -70,7 +72,8 @@ else
fi fi
DOCKERFILE="${OS}/Dockerfile" DOCKERFILE="${OS}/Dockerfile"
if [[ "$image" == *cuda* ]]; then # When using ubuntu - 22.04, start from Ubuntu docker image, instead of nvidia/cuda docker image.
if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
DOCKERFILE="${OS}-cuda/Dockerfile" DOCKERFILE="${OS}-cuda/Dockerfile"
elif [[ "$image" == *rocm* ]]; then elif [[ "$image" == *rocm* ]]; then
DOCKERFILE="${OS}-rocm/Dockerfile" DOCKERFILE="${OS}-rocm/Dockerfile"
@ -249,6 +252,15 @@ case "$image" in
VISION=yes VISION=yes
KATEX=yes KATEX=yes
;; ;;
pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12)
ANACONDA_PYTHON_VERSION=3.8
CUDA_VERSION=11.6
CUDNN_VERSION=8
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
VISION=yes
;;
*) *)
# Catch-all for builds that are not hardcoded. # Catch-all for builds that are not hardcoded.
PROTOBUF=yes PROTOBUF=yes

View File

@ -15,11 +15,20 @@ install_ubuntu() {
elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
cmake3="cmake=3.16*" cmake3="cmake=3.16*"
maybe_libiomp_dev="" maybe_libiomp_dev=""
elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
cmake3="cmake=3.22*"
maybe_libiomp_dev=""
else else
cmake3="cmake=3.5*" cmake3="cmake=3.5*"
maybe_libiomp_dev="libiomp-dev" maybe_libiomp_dev="libiomp-dev"
fi fi
if [[ "$CLANG_VERSION" == 12 ]]; then
libomp_dev="libomp-12-dev"
else
libomp_dev=""
fi
# TODO: Remove this once nvidia package repos are back online # TODO: Remove this once nvidia package repos are back online
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968 # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
# shellcheck disable=SC2046 # shellcheck disable=SC2046
@ -51,6 +60,7 @@ install_ubuntu() {
libjpeg-dev \ libjpeg-dev \
libasound2-dev \ libasound2-dev \
libsndfile-dev \ libsndfile-dev \
${libomp_dev} \
software-properties-common \ software-properties-common \
wget \ wget \
sudo \ sudo \
@ -60,6 +70,20 @@ install_ubuntu() {
# see: https://github.com/pytorch/pytorch/issues/65931 # see: https://github.com/pytorch/pytorch/issues/65931
apt-get install -y libgnutls30 apt-get install -y libgnutls30
# cuda-toolkit does not work with gcc-11.2.0 which is default in Ubunutu 22.04
# see: https://github.com/NVlabs/instant-ngp/issues/119
if [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
apt-get install -y g++-10
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-10 30
# https://www.spinics.net/lists/libreoffice/msg07549.html
sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/11
wget https://github.com/gcc-mirror/gcc/commit/2b2d97fc545635a0f6aa9c9ee3b017394bc494bf.patch -O noexecpt.patch
sudo patch /usr/include/c++/10/bits/range_access.h noexecpt.patch
fi
# Cleanup package manager # Cleanup package manager
apt-get autoclean && apt-get clean apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

View File

@ -5,7 +5,9 @@ set -ex
install_ubuntu() { install_ubuntu() {
echo "Preparing to build sccache from source" echo "Preparing to build sccache from source"
apt-get update apt-get update
apt-get install -y cargo pkg-config libssl-dev # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
apt-get install -y cargo
echo "Checking out sccache repo" echo "Checking out sccache repo"
git clone https://github.com/pytorch/sccache git clone https://github.com/pytorch/sccache
cd sccache cd sccache

View File

@ -10,5 +10,7 @@ cd "${OPENSSL}"
./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)' ./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
# NOTE: openssl install errors out when built with the -j option # NOTE: openssl install errors out when built with the -j option
make -j6; make install_sw make -j6; make install_sw
# Link the ssl libraries to the /usr/lib folder.
sudo ln -s /opt/openssl/lib/lib* /usr/lib
cd .. cd ..
rm -rf "${OPENSSL}" rm -rf "${OPENSSL}"

View File

@ -65,6 +65,7 @@ ENV INSTALLED_VISION ${VISION}
ADD ./common/install_openssl.sh install_openssl.sh ADD ./common/install_openssl.sh install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl ENV OPENSSL_ROOT_DIR /opt/openssl
RUN bash ./install_openssl.sh RUN bash ./install_openssl.sh
ENV OPENSSL_DIR /opt/openssl
# (optional) Install non-default CMake version # (optional) Install non-default CMake version
ARG CMAKE_VERSION ARG CMAKE_VERSION

View File

@ -6,6 +6,8 @@ ARG UBUNTU_VERSION
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
ARG CLANG_VERSION
# Install common dependencies (so that this step can be cached separately) # Install common dependencies (so that this step can be cached separately)
ARG EC2 ARG EC2
ADD ./common/install_base.sh install_base.sh ADD ./common/install_base.sh install_base.sh
@ -13,7 +15,6 @@ RUN bash ./install_base.sh && rm install_base.sh
# Install clang # Install clang
ARG LLVMDEV ARG LLVMDEV
ARG CLANG_VERSION
ADD ./common/install_clang.sh install_clang.sh ADD ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh RUN bash ./install_clang.sh && rm install_clang.sh
@ -50,6 +51,13 @@ RUN bash ./install_gcc.sh && rm install_gcc.sh
ADD ./common/install_lcov.sh install_lcov.sh ADD ./common/install_lcov.sh install_lcov.sh
RUN bash ./install_lcov.sh && rm install_lcov.sh RUN bash ./install_lcov.sh && rm install_lcov.sh
# Install cuda and cudnn
ARG CUDA_VERSION
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
# (optional) Install protobuf for ONNX # (optional) Install protobuf for ONNX
ARG PROTOBUF ARG PROTOBUF
ADD ./common/install_protobuf.sh install_protobuf.sh ADD ./common/install_protobuf.sh install_protobuf.sh
@ -111,6 +119,8 @@ RUN rm install_ninja.sh
ADD ./common/install_openssl.sh install_openssl.sh ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh RUN bash ./install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl ENV OPENSSL_ROOT_DIR /opt/openssl
ENV OPENSSL_DIR /opt/openssl
RUN rm install_openssl.sh
# Install ccache/sccache (do this last, so we get priority in PATH) # Install ccache/sccache (do this last, so we get priority in PATH)
ADD ./common/install_cache.sh install_cache.sh ADD ./common/install_cache.sh install_cache.sh
@ -122,12 +132,19 @@ ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh RUN bash ./install_jni.sh && rm install_jni.sh
# Install Open MPI for CUDA
ADD ./common/install_openmpi.sh install_openmpi.sh
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
RUN rm install_openmpi.sh
# Include BUILD_ENVIRONMENT environment variable in image # Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
# Install LLVM dev version (Defined in the pytorch/builder github repository) # AWS specific CUDA build guidance
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm ENV TORCH_CUDA_ARCH_LIST Maxwell
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
ENV CUDA_PATH /usr/local/cuda
USER jenkins USER jenkins
CMD ["bash"] CMD ["bash"]

View File

@ -30,6 +30,7 @@ jobs:
- docker-image-name: pytorch-linux-bionic-py3.7-clang9 - docker-image-name: pytorch-linux-bionic-py3.7-clang9
- docker-image-name: pytorch-linux-bionic-rocm5.0-py3.7 - docker-image-name: pytorch-linux-bionic-rocm5.0-py3.7
- docker-image-name: pytorch-linux-bionic-rocm5.1-py3.7 - docker-image-name: pytorch-linux-bionic-rocm5.1-py3.7
- docker-image-name: pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12
- docker-image-name: pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7 - docker-image-name: pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7
- docker-image-name: pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7 - docker-image-name: pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7
- docker-image-name: pytorch-linux-xenial-py3-clang5-android-ndk-r19c - docker-image-name: pytorch-linux-xenial-py3-clang5-android-ndk-r19c

View File

@ -193,6 +193,13 @@ jobs:
docker-image-name: pytorch-linux-xenial-py3-clang5-asan docker-image-name: pytorch-linux-xenial-py3-clang5-asan
build-generates-artifacts: false build-generates-artifacts: false
linux-jammy-cuda-11_6-cudnn8-py3_8-clang12-build:
name: linux-jammy-cuda11.6-cudnn8-py3.8-clang12
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-cuda11.6-cudnn8-py3.8-clang12
docker-image-name: pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12
linux-xenial-py3-clang5-mobile-custom-build-static: linux-xenial-py3-clang5-mobile-custom-build-static:
name: linux-xenial-py3-clang5-mobile-custom-build-static name: linux-xenial-py3-clang5-mobile-custom-build-static
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml

View File

@ -430,6 +430,11 @@ struct Vectorized<c10::qint8> : public Vectorizedqi {
// constructor for moving the enum // constructor for moving the enum
Vectorized(const Vectorized<c10::qint8>& other) : Vectorizedqi(other.vals) { } Vectorized(const Vectorized<c10::qint8>& other) : Vectorizedqi(other.vals) { }
// This is added to avoid error: definition of implicit copy assignment operator
// for 'Vectorized<c10::qint8>' is deprecated because it has a user-declared
// copy constructor [-Werror,-Wdeprecated-copy]
Vectorized& operator=(const Vectorized<c10::qint8>&) = default;
void store(void* ptr, int count = size()) const { void store(void* ptr, int count = size()) const {
if (count != size()) { if (count != size()) {
memcpy(ptr, &vals, count * sizeof(value_type)); memcpy(ptr, &vals, count * sizeof(value_type));
@ -589,6 +594,11 @@ struct Vectorized<c10::quint8> : public Vectorizedqi {
Vectorized(const Vectorized<c10::quint8>& other) : Vectorizedqi(other.vals) { } Vectorized(const Vectorized<c10::quint8>& other) : Vectorizedqi(other.vals) { }
// This is added to avoid error: definition of implicit copy assignment operator
// for 'Vectorized<c10::quint8>' is deprecated because it has a user-declared
// copy constructor [-Werror,-Wdeprecated-copy]
Vectorized& operator=(const Vectorized<c10::quint8>&) = default;
void store(void* ptr, int count = size()) const { void store(void* ptr, int count = size()) const {
if (count != size()) { if (count != size()) {
memcpy(ptr, &vals, count * sizeof(value_type)); memcpy(ptr, &vals, count * sizeof(value_type));

View File

@ -35,16 +35,16 @@ const std::vector<int> ints {
int_min, int_min,
int_min + 1, int_min + 1,
int_min + 2, int_min + 2,
static_cast<int>(-sqrt(int_max)), static_cast<int>(-sqrt(static_cast<double>(int_max))),
-3, -2, -1, 0, 1, 2, 3, -3, -2, -1, 0, 1, 2, 3,
static_cast<int>(sqrt(int_max)), static_cast<int>(sqrt(static_cast<double>(int_max))),
int_max - 2, int_max - 2,
int_max - 1, int_max - 1,
int_max int_max
}; };
const std::vector<int> non_neg_ints { const std::vector<int> non_neg_ints {
0, 1, 2, 3, 0, 1, 2, 3,
static_cast<int>(sqrt(int_max)), static_cast<int>(sqrt(static_cast<double>(int_max))),
int_max - 2, int_max - 2,
int_max - 1, int_max - 1,
int_max int_max
@ -53,16 +53,16 @@ const std::vector<int64_t> longs {
long_min, long_min,
long_min + 1, long_min + 1,
long_min + 2, long_min + 2,
static_cast<int64_t>(-sqrt(long_max)), static_cast<int64_t>(-sqrt(static_cast<double>(long_max))),
-3, -2, -1, 0, 1, 2, 3, -3, -2, -1, 0, 1, 2, 3,
static_cast<int64_t>(sqrt(long_max)), static_cast<int64_t>(sqrt(static_cast<double>(long_max))),
long_max - 2, long_max - 2,
long_max - 1, long_max - 1,
long_max long_max
}; };
const std::vector<int64_t> non_neg_longs { const std::vector<int64_t> non_neg_longs {
0, 1, 2, 3, 0, 1, 2, 3,
static_cast<int64_t>(sqrt(long_max)), static_cast<int64_t>(sqrt(static_cast<double>(long_max))),
long_max - 2, long_max - 2,
long_max - 1, long_max - 1,
long_max long_max
@ -128,7 +128,7 @@ void tensor_pow_scalar(const Vals vals, const Pows pows, const torch::ScalarType
for (const auto pow : pows) { for (const auto pow : pows) {
// NOLINTNEXTLINE(clang-diagnostic-implicit-const-int-float-conversion) // NOLINTNEXTLINE(clang-diagnostic-implicit-const-int-float-conversion)
if ( dtype == kInt && pow > std::numeric_limits<int>::max()) { if ( dtype == kInt && pow > static_cast<float>(std::numeric_limits<int>::max())) {
// value cannot be converted to type int without overflow // value cannot be converted to type int without overflow
// NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto) // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
EXPECT_THROW(tensor.pow(pow), std::runtime_error); EXPECT_THROW(tensor.pow(pow), std::runtime_error);