mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
Consolidates binary checkout logic to use the standard common logic we have in our common templates. Also fixes issues related to pytorch/builder trying to checkout the head commit for pytorch/pytorch instead of checking out the builder commit we actually want Signed-off-by: Eli Uriegas <eliuriegasfb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/73092 Signed-off-by: Eli Uriegas <eliuriegas@fb.com>
8287 lines
338 KiB
YAML
Generated
8287 lines
338 KiB
YAML
Generated
# @generated DO NOT EDIT MANUALLY
|
|
|
|
# Template is at: .github/templates/linux_binary_build_workflow.yml.j2
|
|
# Generation script: .github/scripts/generate_ci_workflows.py
|
|
name: linux-binary-conda
|
|
|
|
on:
|
|
push:
|
|
# NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build
|
|
branches:
|
|
- nightly
|
|
tags:
|
|
# NOTE: Binary build pipelines should only get triggered on release candidate builds
|
|
# Release candidate tags look like: v1.11.0-rc1
|
|
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
|
- 'ciflow/binaries/*'
|
|
- 'ciflow/binaries_conda/*'
|
|
workflow_dispatch:
|
|
|
|
env:
|
|
# Needed for conda builds
|
|
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
ANACONDA_USER: pytorch
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
BINARY_ENV_FILE: /tmp/env
|
|
BUILD_ENVIRONMENT: linux-binary-conda
|
|
BUILDER_ROOT: /builder
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
IN_CI: 1
|
|
IS_GHA: 1
|
|
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
|
|
PYTORCH_RETRY_TEST_CASES: 1
|
|
PYTORCH_ROOT: /pytorch
|
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
SKIP_ALL_TESTS: 1
|
|
concurrency:
|
|
group: linux-binary-conda-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
conda-py3_7-cpu-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_7-cpu
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cpu-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cpu-build
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cpu-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cpu-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda10_2-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_7-cuda10_2
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda10_2-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda10_2-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda10_2-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda10_2-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_1-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_7-cuda11_1
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_1-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda11_1-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_1-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda11_1-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_3-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_7-cuda11_3
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_3-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda11_3-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_3-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda11_3-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_5-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_7-cuda11_5
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_5-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda11_5-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_7-cuda11_5-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_7-cuda11_5-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.7"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_7-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cpu-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_8-cpu
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cpu-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cpu-build
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cpu-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cpu-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda10_2-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_8-cuda10_2
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda10_2-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda10_2-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda10_2-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda10_2-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_1-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_8-cuda11_1
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_1-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda11_1-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_1-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda11_1-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_3-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_8-cuda11_3
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_3-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda11_3-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_3-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda11_3-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_5-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_8-cuda11_5
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_5-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda11_5-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_8-cuda11_5-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_8-cuda11_5-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.8"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_8-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cpu-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_9-cpu
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cpu-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cpu-build
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cpu-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cpu-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda10_2-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_9-cuda10_2
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda10_2-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda10_2-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda10_2-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda10_2-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_1-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_9-cuda11_1
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_1-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda11_1-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_1-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda11_1-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_3-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_9-cuda11_3
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_3-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda11_3-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_3-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda11_3-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_5-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_9-cuda11_5
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_5-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda11_5-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_9-cuda11_5-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_9-cuda11_5-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.9"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_9-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cpu-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_10-cpu
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cpu-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cpu-build
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cpu-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cpu-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cpu
|
|
GPU_ARCH_TYPE: cpu
|
|
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cpu
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda10_2-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_10-cuda10_2
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda10_2-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda10_2-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda10_2-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda10_2-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu102
|
|
GPU_ARCH_VERSION: 10.2
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda10.2
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda10_2
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_1-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_10-cuda11_1
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_1-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda11_1-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_1-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda11_1-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu111
|
|
GPU_ARCH_VERSION: 11.1
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.1
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda11_1
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_3-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_10-cuda11_3
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_3-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda11_3-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_3-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda11_3-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu113
|
|
GPU_ARCH_VERSION: 11.3
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.3
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda11_3
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_5-build:
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
runs-on: linux.4xlarge
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Set BUILD_SPLIT_CUDA
|
|
run: |
|
|
echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Build PyTorch binary
|
|
run: |
|
|
set -x
|
|
mkdir -p artifacts/
|
|
container_name=$(docker run \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh"
|
|
- name: Chown artifacts
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- uses: seemethere/upload-artifact-s3@v3
|
|
with:
|
|
name: conda-py3_10-cuda11_5
|
|
retention-days: 14
|
|
if-no-files-found: error
|
|
path:
|
|
${{ runner.temp }}/artifacts/*
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_5-test: # Testing
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda11_5-build
|
|
runs-on: linux.4xlarge.nvidia.gpu
|
|
timeout-minutes: 240
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Checkout PyTorch
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
submodules: recursive
|
|
path: pytorch
|
|
- name: Clean PyTorch checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: pytorch
|
|
- name: Checkout pytorch/builder
|
|
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
|
with:
|
|
ref: main
|
|
submodules: recursive
|
|
repository: pytorch/builder
|
|
path: builder
|
|
- name: Clean pytorch/builder checkout
|
|
run: |
|
|
# Remove any artifacts from the previous checkouts
|
|
git clean -fxd
|
|
working-directory: builder
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
working-directory: pytorch/
|
|
run: |
|
|
bash .github/scripts/install_nvidia_utils_linux.sh
|
|
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
|
- name: Pull Docker image
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${DOCKER_IMAGE}"
|
|
- name: Test PyTorch binary
|
|
run: |
|
|
set -x
|
|
# shellcheck disable=SC2086,SC2090
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e BINARY_ENV_FILE \
|
|
-e BUILDER_ROOT \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e BUILD_SPLIT_CUDA \
|
|
-e DESIRED_CUDA \
|
|
-e DESIRED_DEVTOOLSET \
|
|
-e DESIRED_PYTHON \
|
|
-e GPU_ARCH_TYPE \
|
|
-e GPU_ARCH_VERSION \
|
|
-e IS_GHA \
|
|
-e LIBTORCH_VARIANT \
|
|
-e PACKAGE_TYPE \
|
|
-e PYTORCH_FINAL_PACKAGE_DIR \
|
|
-e PYTORCH_ROOT \
|
|
-e SKIP_ALL_TESTS \
|
|
--tty \
|
|
--detach \
|
|
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
|
-v "${GITHUB_WORKSPACE}/builder:/builder" \
|
|
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
|
|
-w / \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
|
# Generate test script
|
|
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
|
|
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
working-directory: pytorch/
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|
|
conda-py3_10-cuda11_5-upload: # Uploading
|
|
runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts
|
|
if: ${{ github.repository_owner == 'pytorch' }}
|
|
needs: conda-py3_10-cuda11_5-test
|
|
env:
|
|
PACKAGE_TYPE: conda
|
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
# favor of GPU_ARCH_VERSION
|
|
DESIRED_CUDA: cu115
|
|
GPU_ARCH_VERSION: 11.5
|
|
GPU_ARCH_TYPE: cuda
|
|
DOCKER_IMAGE: pytorch/conda-builder:cuda11.5
|
|
SKIP_ALL_TESTS: 1
|
|
DESIRED_PYTHON: "3.10"
|
|
steps:
|
|
- name: Display EC2 information
|
|
shell: bash
|
|
run: |
|
|
set -euo pipefail
|
|
function get_ec2_metadata() {
|
|
# Pulled from instance metadata endpoint for EC2
|
|
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
|
category=$1
|
|
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
|
}
|
|
echo "ami-id: $(get_ec2_metadata ami-id)"
|
|
echo "instance-id: $(get_ec2_metadata instance-id)"
|
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
|
- name: Log in to ECR
|
|
env:
|
|
AWS_RETRY_MODE: standard
|
|
AWS_MAX_ATTEMPTS: 5
|
|
run: |
|
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
|
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
|
- name: Chown workspace
|
|
run: |
|
|
retry () {
|
|
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
|
}
|
|
retry docker pull "${ALPINE_IMAGE}"
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Clean workspace
|
|
run: |
|
|
rm -rf "${GITHUB_WORKSPACE}"
|
|
mkdir "${GITHUB_WORKSPACE}"
|
|
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
|
uses: seemethere/add-github-ssh-key@v1
|
|
with:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
- name: Clone pytorch/pytorch
|
|
uses: actions/checkout@v2
|
|
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
|
name: Download Build Artifacts
|
|
with:
|
|
name: conda-py3_10-cuda11_5
|
|
path: "${{ runner.temp }}/artifacts/"
|
|
- name: Set DRY_RUN (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
|
|
run: |
|
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
|
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
run: |
|
|
# reference ends with an RC suffix
|
|
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
|
|
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
|
|
fi
|
|
- name: Upload binaries
|
|
env:
|
|
PKG_DIR: "${{ runner.temp }}/artifacts"
|
|
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
|
|
# When running these on pull_request events these should be blank
|
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
|
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
|
|
ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
|
run: |
|
|
docker run --rm -i \
|
|
-e ANACONDA_API_TOKEN \
|
|
-e AWS_ACCESS_KEY_ID \
|
|
-e AWS_SECRET_ACCESS_KEY \
|
|
-e DRY_RUN \
|
|
-e PACKAGE_TYPE \
|
|
-e PKG_DIR=/artifacts \
|
|
-e UPLOAD_CHANNEL \
|
|
-e UPLOAD_SUBFOLDER \
|
|
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
|
|
-v "${GITHUB_WORKSPACE}:/v" \
|
|
-w /v \
|
|
308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
|
|
bash -c '.circleci/scripts/binary_upload.sh'
|
|
- name: Hold runner for 2 hours or until ssh sessions have drained
|
|
# Always hold for active ssh sessions
|
|
if: always()
|
|
run: .github/scripts/wait_for_ssh_to_drain.sh
|
|
- name: Chown workspace
|
|
if: always()
|
|
run: |
|
|
# Ensure the working directory gets chowned back to the current user
|
|
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
|
- name: Kill containers, clean up images
|
|
if: always()
|
|
run: |
|
|
# ignore expansion of "docker ps -q" since it could be empty
|
|
# shellcheck disable=SC2046
|
|
docker stop $(docker ps -q) || true
|
|
# Prune all of the docker images
|
|
docker system prune -af
|