mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Create a new Ubunutu 22.04 (jammy) build for platform010 (#77591)
### Motivation
In order to match the internal platform010 builds, we are creating a new config to run on PRs that uses compiler and package versions >= used in platform010. Here are the versions used in the new build
- Ubuntu 22.04 (Jammy-Jellyfish)
- Clang-12
- Python 3.8
- CUDA 11.6
### Summary of Changes
- As `nvidia/docker` images only support Cuda 11.7 with Ubuntu 22.04, we are starting with base Ubuntu 22.04 docker images, and then installing Cuda 11.6
- Fetching `install_cuda.sh` from `pytorch/builder` repo in order to install Cuda using `wget`
- `libssl-dev` has been upgraded to libssl3 in Ubuntu 22.04. Instead, we are using `include` and `lib` folders downloaded with `Openssl1.1.1`
- `Clang-12` requires `libomp-12-dev` to work with `OpenMP` which is added to the `install_base.sh` file.
- Minor fixes to handle compilation errors generated when using `clang-12`.
- In `pow_test.cpp` adding a `static_cast` to input of `sqrt` method
- In `vec512_qint.h`, explicitly defining `copy-assignment` operator as its implicit definition is deprecated due to
user-declared `copy-constructor` in C++11
Pull Request resolved: https://github.com/pytorch/pytorch/pull/77591
Approved by: https://github.com/seemethere, https://github.com/atalman
This commit is contained in:
parent
f5d7e5a192
commit
71d82917f4
|
|
@ -54,6 +54,8 @@ elif [[ "$image" == *-bionic* ]]; then
|
|||
UBUNTU_VERSION=18.04
|
||||
elif [[ "$image" == *-focal* ]]; then
|
||||
UBUNTU_VERSION=20.04
|
||||
elif [[ "$image" == *-jammy* ]]; then
|
||||
UBUNTU_VERSION=22.04
|
||||
elif [[ "$image" == *ubuntu* ]]; then
|
||||
extract_version_from_image_name ubuntu UBUNTU_VERSION
|
||||
elif [[ "$image" == *centos* ]]; then
|
||||
|
|
@ -70,7 +72,8 @@ else
|
|||
fi
|
||||
|
||||
DOCKERFILE="${OS}/Dockerfile"
|
||||
if [[ "$image" == *cuda* ]]; then
|
||||
# When using ubuntu - 22.04, start from Ubuntu docker image, instead of nvidia/cuda docker image.
|
||||
if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
|
||||
DOCKERFILE="${OS}-cuda/Dockerfile"
|
||||
elif [[ "$image" == *rocm* ]]; then
|
||||
DOCKERFILE="${OS}-rocm/Dockerfile"
|
||||
|
|
@ -249,6 +252,15 @@ case "$image" in
|
|||
VISION=yes
|
||||
KATEX=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
CUDA_VERSION=11.6
|
||||
CUDNN_VERSION=8
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
;;
|
||||
*)
|
||||
# Catch-all for builds that are not hardcoded.
|
||||
PROTOBUF=yes
|
||||
|
|
|
|||
|
|
@ -15,11 +15,20 @@ install_ubuntu() {
|
|||
elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
|
||||
cmake3="cmake=3.16*"
|
||||
maybe_libiomp_dev=""
|
||||
elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
|
||||
cmake3="cmake=3.22*"
|
||||
maybe_libiomp_dev=""
|
||||
else
|
||||
cmake3="cmake=3.5*"
|
||||
maybe_libiomp_dev="libiomp-dev"
|
||||
fi
|
||||
|
||||
if [[ "$CLANG_VERSION" == 12 ]]; then
|
||||
libomp_dev="libomp-12-dev"
|
||||
else
|
||||
libomp_dev=""
|
||||
fi
|
||||
|
||||
# TODO: Remove this once nvidia package repos are back online
|
||||
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
|
||||
# shellcheck disable=SC2046
|
||||
|
|
@ -51,6 +60,7 @@ install_ubuntu() {
|
|||
libjpeg-dev \
|
||||
libasound2-dev \
|
||||
libsndfile-dev \
|
||||
${libomp_dev} \
|
||||
software-properties-common \
|
||||
wget \
|
||||
sudo \
|
||||
|
|
@ -60,6 +70,20 @@ install_ubuntu() {
|
|||
# see: https://github.com/pytorch/pytorch/issues/65931
|
||||
apt-get install -y libgnutls30
|
||||
|
||||
# cuda-toolkit does not work with gcc-11.2.0 which is default in Ubunutu 22.04
|
||||
# see: https://github.com/NVlabs/instant-ngp/issues/119
|
||||
if [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
|
||||
apt-get install -y g++-10
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30
|
||||
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-10 30
|
||||
|
||||
# https://www.spinics.net/lists/libreoffice/msg07549.html
|
||||
sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/11
|
||||
wget https://github.com/gcc-mirror/gcc/commit/2b2d97fc545635a0f6aa9c9ee3b017394bc494bf.patch -O noexecpt.patch
|
||||
sudo patch /usr/include/c++/10/bits/range_access.h noexecpt.patch
|
||||
fi
|
||||
|
||||
# Cleanup package manager
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@ set -ex
|
|||
install_ubuntu() {
|
||||
echo "Preparing to build sccache from source"
|
||||
apt-get update
|
||||
apt-get install -y cargo pkg-config libssl-dev
|
||||
# libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
|
||||
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
|
||||
apt-get install -y cargo
|
||||
echo "Checking out sccache repo"
|
||||
git clone https://github.com/pytorch/sccache
|
||||
cd sccache
|
||||
|
|
|
|||
|
|
@ -10,5 +10,7 @@ cd "${OPENSSL}"
|
|||
./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
|
||||
# NOTE: openssl install errors out when built with the -j option
|
||||
make -j6; make install_sw
|
||||
# Link the ssl libraries to the /usr/lib folder.
|
||||
sudo ln -s /opt/openssl/lib/lib* /usr/lib
|
||||
cd ..
|
||||
rm -rf "${OPENSSL}"
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ ENV INSTALLED_VISION ${VISION}
|
|||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ ARG UBUNTU_VERSION
|
|||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
ARG CLANG_VERSION
|
||||
|
||||
# Install common dependencies (so that this step can be cached separately)
|
||||
ARG EC2
|
||||
ADD ./common/install_base.sh install_base.sh
|
||||
|
|
@ -13,7 +15,6 @@ RUN bash ./install_base.sh && rm install_base.sh
|
|||
|
||||
# Install clang
|
||||
ARG LLVMDEV
|
||||
ARG CLANG_VERSION
|
||||
ADD ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
|
|
@ -50,6 +51,13 @@ RUN bash ./install_gcc.sh && rm install_gcc.sh
|
|||
ADD ./common/install_lcov.sh install_lcov.sh
|
||||
RUN bash ./install_lcov.sh && rm install_lcov.sh
|
||||
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
|
||||
# (optional) Install protobuf for ONNX
|
||||
ARG PROTOBUF
|
||||
ADD ./common/install_protobuf.sh install_protobuf.sh
|
||||
|
|
@ -111,6 +119,8 @@ RUN rm install_ninja.sh
|
|||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
RUN rm install_openssl.sh
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
ADD ./common/install_cache.sh install_cache.sh
|
||||
|
|
@ -122,12 +132,19 @@ ADD ./common/install_jni.sh install_jni.sh
|
|||
ADD ./java/jni.h jni.h
|
||||
RUN bash ./install_jni.sh && rm install_jni.sh
|
||||
|
||||
# Install Open MPI for CUDA
|
||||
ADD ./common/install_openmpi.sh install_openmpi.sh
|
||||
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
|
||||
RUN rm install_openmpi.sh
|
||||
|
||||
# Include BUILD_ENVIRONMENT environment variable in image
|
||||
ARG BUILD_ENVIRONMENT
|
||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||
|
||||
# Install LLVM dev version (Defined in the pytorch/builder github repository)
|
||||
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
||||
# AWS specific CUDA build guidance
|
||||
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
||||
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
||||
ENV CUDA_PATH /usr/local/cuda
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
|
|
|||
1
.github/workflows/docker-builds.yml
vendored
1
.github/workflows/docker-builds.yml
vendored
|
|
@ -30,6 +30,7 @@ jobs:
|
|||
- docker-image-name: pytorch-linux-bionic-py3.7-clang9
|
||||
- docker-image-name: pytorch-linux-bionic-rocm5.0-py3.7
|
||||
- docker-image-name: pytorch-linux-bionic-rocm5.1-py3.7
|
||||
- docker-image-name: pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12
|
||||
- docker-image-name: pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7
|
||||
- docker-image-name: pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7
|
||||
- docker-image-name: pytorch-linux-xenial-py3-clang5-android-ndk-r19c
|
||||
|
|
|
|||
7
.github/workflows/pull.yml
vendored
7
.github/workflows/pull.yml
vendored
|
|
@ -193,6 +193,13 @@ jobs:
|
|||
docker-image-name: pytorch-linux-xenial-py3-clang5-asan
|
||||
build-generates-artifacts: false
|
||||
|
||||
linux-jammy-cuda-11_6-cudnn8-py3_8-clang12-build:
|
||||
name: linux-jammy-cuda11.6-cudnn8-py3.8-clang12
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-jammy-cuda11.6-cudnn8-py3.8-clang12
|
||||
docker-image-name: pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12
|
||||
|
||||
linux-xenial-py3-clang5-mobile-custom-build-static:
|
||||
name: linux-xenial-py3-clang5-mobile-custom-build-static
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
|
|
|
|||
|
|
@ -430,6 +430,11 @@ struct Vectorized<c10::qint8> : public Vectorizedqi {
|
|||
// constructor for moving the enum
|
||||
Vectorized(const Vectorized<c10::qint8>& other) : Vectorizedqi(other.vals) { }
|
||||
|
||||
// This is added to avoid error: definition of implicit copy assignment operator
|
||||
// for 'Vectorized<c10::qint8>' is deprecated because it has a user-declared
|
||||
// copy constructor [-Werror,-Wdeprecated-copy]
|
||||
Vectorized& operator=(const Vectorized<c10::qint8>&) = default;
|
||||
|
||||
void store(void* ptr, int count = size()) const {
|
||||
if (count != size()) {
|
||||
memcpy(ptr, &vals, count * sizeof(value_type));
|
||||
|
|
@ -589,6 +594,11 @@ struct Vectorized<c10::quint8> : public Vectorizedqi {
|
|||
|
||||
Vectorized(const Vectorized<c10::quint8>& other) : Vectorizedqi(other.vals) { }
|
||||
|
||||
// This is added to avoid error: definition of implicit copy assignment operator
|
||||
// for 'Vectorized<c10::quint8>' is deprecated because it has a user-declared
|
||||
// copy constructor [-Werror,-Wdeprecated-copy]
|
||||
Vectorized& operator=(const Vectorized<c10::quint8>&) = default;
|
||||
|
||||
void store(void* ptr, int count = size()) const {
|
||||
if (count != size()) {
|
||||
memcpy(ptr, &vals, count * sizeof(value_type));
|
||||
|
|
|
|||
|
|
@ -35,16 +35,16 @@ const std::vector<int> ints {
|
|||
int_min,
|
||||
int_min + 1,
|
||||
int_min + 2,
|
||||
static_cast<int>(-sqrt(int_max)),
|
||||
static_cast<int>(-sqrt(static_cast<double>(int_max))),
|
||||
-3, -2, -1, 0, 1, 2, 3,
|
||||
static_cast<int>(sqrt(int_max)),
|
||||
static_cast<int>(sqrt(static_cast<double>(int_max))),
|
||||
int_max - 2,
|
||||
int_max - 1,
|
||||
int_max
|
||||
};
|
||||
const std::vector<int> non_neg_ints {
|
||||
0, 1, 2, 3,
|
||||
static_cast<int>(sqrt(int_max)),
|
||||
static_cast<int>(sqrt(static_cast<double>(int_max))),
|
||||
int_max - 2,
|
||||
int_max - 1,
|
||||
int_max
|
||||
|
|
@ -53,16 +53,16 @@ const std::vector<int64_t> longs {
|
|||
long_min,
|
||||
long_min + 1,
|
||||
long_min + 2,
|
||||
static_cast<int64_t>(-sqrt(long_max)),
|
||||
static_cast<int64_t>(-sqrt(static_cast<double>(long_max))),
|
||||
-3, -2, -1, 0, 1, 2, 3,
|
||||
static_cast<int64_t>(sqrt(long_max)),
|
||||
static_cast<int64_t>(sqrt(static_cast<double>(long_max))),
|
||||
long_max - 2,
|
||||
long_max - 1,
|
||||
long_max
|
||||
};
|
||||
const std::vector<int64_t> non_neg_longs {
|
||||
0, 1, 2, 3,
|
||||
static_cast<int64_t>(sqrt(long_max)),
|
||||
static_cast<int64_t>(sqrt(static_cast<double>(long_max))),
|
||||
long_max - 2,
|
||||
long_max - 1,
|
||||
long_max
|
||||
|
|
@ -128,7 +128,7 @@ void tensor_pow_scalar(const Vals vals, const Pows pows, const torch::ScalarType
|
|||
|
||||
for (const auto pow : pows) {
|
||||
// NOLINTNEXTLINE(clang-diagnostic-implicit-const-int-float-conversion)
|
||||
if ( dtype == kInt && pow > std::numeric_limits<int>::max()) {
|
||||
if ( dtype == kInt && pow > static_cast<float>(std::numeric_limits<int>::max())) {
|
||||
// value cannot be converted to type int without overflow
|
||||
// NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
|
||||
EXPECT_THROW(tensor.pow(pow), std::runtime_error);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user