mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Use reusable binary docker build action for libtorch (#151488)
This is part of splitting up https://github.com/pytorch/pytorch/pull/150558 into smaller chunks, please see that for more context Similar to https://github.com/pytorch/pytorch/pull/151483 but for libtorch Changed the job name Testing: Can't really test since PRs don't have the credentials to push to docker io, which is the image used for everything, including PRs right now Pull Request resolved: https://github.com/pytorch/pytorch/pull/151488 Approved by: https://github.com/atalman
This commit is contained in:
parent
88b0553c58
commit
b0f26e81a5
|
|
@ -1,83 +1,63 @@
|
|||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -eou pipefail
|
||||
set -eoux pipefail
|
||||
|
||||
image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGE"
|
||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKER_IMAGE="pytorch/${image}"
|
||||
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
|
||||
WITH_PUSH=${WITH_PUSH:-}
|
||||
|
||||
DOCKER=${DOCKER:-docker}
|
||||
|
||||
case ${GPU_ARCH_TYPE} in
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
|
||||
GPU_ARCH_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
||||
fi
|
||||
|
||||
case ${DOCKER_TAG_PREFIX} in
|
||||
cpu)
|
||||
BASE_TARGET=cpu
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=ubuntu:20.04
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
cuda)
|
||||
cuda*)
|
||||
BASE_TARGET=cuda${GPU_ARCH_VERSION}
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=ubuntu:20.04
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
rocm)
|
||||
rocm*)
|
||||
BASE_TARGET=rocm
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||
echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
(
|
||||
set -x
|
||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||
--target final \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
-t "${DOCKER_IMAGE}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
||||
)
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
${DOCKER} push "${DOCKER_IMAGE}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||
${DOCKER} push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
${DOCKER} push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
fi
|
||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||
--target final \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
-t "${tmp_tag}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
|
|
|||
138
.github/workflows/build-libtorch-images.yml
vendored
138
.github/workflows/build-libtorch-images.yml
vendored
|
|
@ -10,14 +10,14 @@ on:
|
|||
# Release candidate tags look like: v1.11.0-rc1
|
||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||
paths:
|
||||
- '.ci/docker/libtorch/*'
|
||||
- '.ci/docker/common/*'
|
||||
- .ci/docker/**
|
||||
- .github/workflows/build-libtorch-images.yml
|
||||
- .github/actions/binary-docker-build/**
|
||||
pull_request:
|
||||
paths:
|
||||
- '.ci/docker/libtorch/*'
|
||||
- '.ci/docker/common/*'
|
||||
- .ci/docker/**
|
||||
- .github/workflows/build-libtorch-images.yml
|
||||
- .github/actions/binary-docker-build/**
|
||||
|
||||
env:
|
||||
DOCKER_REGISTRY: "docker.io"
|
||||
|
|
@ -39,123 +39,29 @@ jobs:
|
|||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
build-docker-cuda:
|
||||
build:
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
needs: get-label-type
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||
runs-on: ${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral
|
||||
name: libtorch-cxx11-builder:${{ matrix.tag }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
cuda_version: ["12.8", "12.6", "12.4", "11.8"]
|
||||
env:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
|
||||
include: [
|
||||
{ tag: "cuda12.8" },
|
||||
{ tag: "cuda12.6" },
|
||||
{ tag: "cuda12.4" },
|
||||
{ tag: "cuda11.8" },
|
||||
{ tag: "rocm6.3" },
|
||||
{ tag: "rocm6.4" },
|
||||
{ tag: "cpu" },
|
||||
]
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
- name: Build docker image
|
||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
||||
with:
|
||||
submodules: false
|
||||
- name: Calculate docker image
|
||||
if: env.WITH_PUSH == 'false'
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
with:
|
||||
docker-image-name: libtorch-cxx11-builder-cuda${{matrix.cuda_version}}
|
||||
docker-build-dir: .ci/docker/libtorch
|
||||
always-rebuild: true
|
||||
push: true
|
||||
- name: Authenticate if WITH_PUSH
|
||||
if: env.WITH_PUSH == 'true'
|
||||
env:
|
||||
docker-image-name: libtorch-cxx11-builder
|
||||
custom-tag-prefix: ${{ matrix.tag }}
|
||||
docker-build-dir: libtorch
|
||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||
run: |
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||
fi
|
||||
- name: Build Docker Image
|
||||
if: env.WITH_PUSH == 'true'
|
||||
uses: nick-fields/retry@v3.0.0
|
||||
with:
|
||||
shell: bash
|
||||
timeout_minutes: 90
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 90
|
||||
command: |
|
||||
.ci/docker/libtorch/build.sh libtorch-cxx11-builder:cuda${{matrix.cuda_version}}
|
||||
build-docker-rocm:
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
needs: get-label-type
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||
strategy:
|
||||
matrix:
|
||||
rocm_version: ["6.3", "6.4"]
|
||||
env:
|
||||
GPU_ARCH_TYPE: rocm
|
||||
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
with:
|
||||
submodules: false
|
||||
- name: Calculate docker image
|
||||
if: env.WITH_PUSH == 'false'
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
with:
|
||||
docker-image-name: libtorch-cxx11-builder-rocm${{matrix.rocm_version}}
|
||||
docker-build-dir: .ci/docker/libtorch
|
||||
always-rebuild: true
|
||||
push: true
|
||||
- name: Authenticate if WITH_PUSH
|
||||
if: env.WITH_PUSH == 'true'
|
||||
env:
|
||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||
run: |
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||
fi
|
||||
- name: Build Docker Image
|
||||
if: env.WITH_PUSH == 'true'
|
||||
uses: nick-fields/retry@v3.0.0
|
||||
with:
|
||||
shell: bash
|
||||
timeout_minutes: 90
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 90
|
||||
command: |
|
||||
.ci/docker/libtorch/build.sh libtorch-cxx11-builder:rocm${{matrix.rocm_version}}
|
||||
build-docker-cpu:
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
needs: get-label-type
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
with:
|
||||
submodules: false
|
||||
- name: Calculate docker image
|
||||
if: env.WITH_PUSH == 'false'
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
with:
|
||||
docker-image-name: libtorch-cxx11-builder-cpu
|
||||
docker-build-dir: .ci/docker/libtorch
|
||||
always-rebuild: true
|
||||
push: true
|
||||
- name: Authenticate if WITH_PUSH
|
||||
if: env.WITH_PUSH == 'true'
|
||||
env:
|
||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||
run: |
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||
fi
|
||||
- name: Build Docker Image
|
||||
if: env.WITH_PUSH == 'true'
|
||||
uses: nick-fields/retry@v3.0.0
|
||||
with:
|
||||
shell: bash
|
||||
timeout_minutes: 90
|
||||
max_attempts: 3
|
||||
retry_wait_seconds: 90
|
||||
command: |
|
||||
.ci/docker/libtorch/build.sh libtorch-cxx11-builder:cpu
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user