From 7ed2a43d26f28174e347b232c4b3ce874ddc6372 Mon Sep 17 00:00:00 2001 From: Andrey Talman Date: Wed, 19 Jan 2022 08:34:15 -0800 Subject: [PATCH] Adding wheels with py3.10 (#71419) Summary: Adding wheels with py3.10 Pull Request resolved: https://github.com/pytorch/pytorch/pull/71419 Reviewed By: janeyx99 Differential Revision: D33657770 Pulled By: atalman fbshipit-source-id: 5d24f1771991ff07fbfd92d04d3d5211cf53084c (cherry picked from commit bf2f2624e12821a417a17bd374e13fda5ab69724) --- .../scripts/generate_binary_build_matrix.py | 3 +- .../linux_binary_build_workflow.yml.j2 | 2 +- .../generated-linux-binary-conda.yml | 2026 ++++++- .../generated-linux-binary-manywheel.yml | 5100 +++++++++++++---- 4 files changed, 5884 insertions(+), 1247 deletions(-) diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 35de04ab4c5..99a5efc58a6 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -16,7 +16,7 @@ from typing import Dict, List, Tuple CUDA_ARCHES = ["10.2", "11.1", "11.3", "11.5"] -ROCM_ARCHES = ["4.2", "4.3.1"] +ROCM_ARCHES = ["4.3.1", "4.5.2"] def arch_type(arch_version: str) -> str: @@ -65,6 +65,7 @@ FULL_PYTHON_VERSIONS = [ "3.7", "3.8", "3.9", + "3.10" ] diff --git a/.github/templates/linux_binary_build_workflow.yml.j2 b/.github/templates/linux_binary_build_workflow.yml.j2 index 297f79a0418..1ec525e2527 100644 --- a/.github/templates/linux_binary_build_workflow.yml.j2 +++ b/.github/templates/linux_binary_build_workflow.yml.j2 @@ -22,7 +22,7 @@ name: !{{ build_environment }} LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }} DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }} {%- else %} - DESIRED_PYTHON: !{{ config["python_version"] }} + DESIRED_PYTHON: "!{{ config["python_version"] }}" {%- endif %} {%- endmacro %} diff --git a/.github/workflows/generated-linux-binary-conda.yml b/.github/workflows/generated-linux-binary-conda.yml index dbc24c3ff16..7227c0fff40 100644 --- a/.github/workflows/generated-linux-binary-conda.yml +++ b/.github/workflows/generated-linux-binary-conda.yml @@ -69,7 +69,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -203,7 +203,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -332,7 +332,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -449,7 +449,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -584,7 +584,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -719,7 +719,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -836,7 +836,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -974,7 +974,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1109,7 +1109,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1226,7 +1226,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1364,7 +1364,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1499,7 +1499,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1616,7 +1616,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1754,7 +1754,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1889,7 +1889,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -2005,7 +2005,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2139,7 +2139,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2268,7 +2268,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2385,7 +2385,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2520,7 +2520,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2655,7 +2655,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2772,7 +2772,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2910,7 +2910,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3045,7 +3045,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3162,7 +3162,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3300,7 +3300,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3435,7 +3435,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3552,7 +3552,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3690,7 +3690,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3825,7 +3825,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3941,7 +3941,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4075,7 +4075,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4204,7 +4204,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4321,7 +4321,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4456,7 +4456,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4591,7 +4591,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4708,7 +4708,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4846,7 +4846,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -4981,7 +4981,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5098,7 +5098,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5236,7 +5236,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5371,7 +5371,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5488,7 +5488,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5626,7 +5626,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5761,7 +5761,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5864,3 +5864,1939 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af + conda-py3_10-cpu-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: conda-py3_10-cpu + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cpu-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cpu-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cpu + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cpu-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cpu-test + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cpu + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda10_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: conda-py3_10-cuda10_2 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda10_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda10_2-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda10_2 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda10_2-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda10_2-test + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda10_2 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_1-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu111 + GPU_ARCH_VERSION: 11.1 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: conda-py3_10-cuda11_1 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_1-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda11_1-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu111 + GPU_ARCH_VERSION: 11.1 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda11_1 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_1-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda11_1-test + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu111 + GPU_ARCH_VERSION: 11.1 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda11_1 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_3-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: conda-py3_10-cuda11_3 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_3-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda11_3-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda11_3 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_3-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda11_3-test + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda11_3 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_5-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu115 + GPU_ARCH_VERSION: 11.5 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: conda-py3_10-cuda11_5 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_5-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda11_5-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu115 + GPU_ARCH_VERSION: 11.5 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda11_5 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + conda-py3_10-cuda11_5-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: conda-py3_10-cuda11_5-test + env: + PACKAGE_TYPE: conda + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu115 + GPU_ARCH_VERSION: 11.5 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: conda-py3_10-cuda11_5 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af diff --git a/.github/workflows/generated-linux-binary-manywheel.yml b/.github/workflows/generated-linux-binary-manywheel.yml index 751187c2c40..889e5ff0afa 100644 --- a/.github/workflows/generated-linux-binary-manywheel.yml +++ b/.github/workflows/generated-linux-binary-manywheel.yml @@ -69,7 +69,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -203,7 +203,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -332,7 +332,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -449,7 +449,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -584,7 +584,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -719,7 +719,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -836,7 +836,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -974,7 +974,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1109,7 +1109,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1226,7 +1226,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1364,7 +1364,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1499,7 +1499,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1616,7 +1616,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1754,7 +1754,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1889,7 +1889,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1992,388 +1992,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm4_2-build: - if: ${{ github.repository_owner == 'pytorch' }} - needs: should-run - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - with: - path: pytorch - submodules: recursive - - name: Clone pytorch/builder - uses: actions/checkout@v2 - with: - repository: pytorch/builder - path: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v3 - with: - name: manywheel-py3_7-rocm4_2 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_7-rocm4_2-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-rocm4_2-build - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download Build Artifacts - with: - name: manywheel-py3_7-rocm4_2 - path: "${{ runner.temp }}/artifacts/" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - with: - path: pytorch - submodules: recursive - - name: Clone pytorch/builder - uses: actions/checkout@v2 - with: - repository: pytorch/builder - path: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_7-rocm4_2-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-rocm4_2-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download Build Artifacts - with: - name: manywheel-py3_7-rocm4_2 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af manywheel-py3_7-rocm4_3_1-build: if: ${{ github.repository_owner == 'pytorch' }} needs: should-run @@ -2388,7 +2006,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -2523,7 +2141,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -2653,7 +2271,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.7 + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -2756,6 +2374,388 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af + manywheel-py3_7-rocm4_5_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.7" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_7-rocm4_5_2 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_7-rocm4_5_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_7-rocm4_5_2-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.7" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_7-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_7-rocm4_5_2-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_7-rocm4_5_2-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.7" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_7-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af manywheel-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: should-run @@ -2769,7 +2769,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2903,7 +2903,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3032,7 +3032,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3149,7 +3149,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3284,7 +3284,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3419,7 +3419,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3536,7 +3536,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3674,7 +3674,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3809,7 +3809,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -3926,7 +3926,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -4064,7 +4064,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -4199,7 +4199,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -4316,7 +4316,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -4454,7 +4454,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -4589,7 +4589,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -4692,388 +4692,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-rocm4_2-build: - if: ${{ github.repository_owner == 'pytorch' }} - needs: should-run - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - with: - path: pytorch - submodules: recursive - - name: Clone pytorch/builder - uses: actions/checkout@v2 - with: - repository: pytorch/builder - path: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v3 - with: - name: manywheel-py3_8-rocm4_2 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm4_2-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-rocm4_2-build - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download Build Artifacts - with: - name: manywheel-py3_8-rocm4_2 - path: "${{ runner.temp }}/artifacts/" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - with: - path: pytorch - submodules: recursive - - name: Clone pytorch/builder - uses: actions/checkout@v2 - with: - repository: pytorch/builder - path: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm4_2-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-rocm4_2-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download Build Artifacts - with: - name: manywheel-py3_8-rocm4_2 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af manywheel-py3_8-rocm4_3_1-build: if: ${{ github.repository_owner == 'pytorch' }} needs: should-run @@ -5088,7 +4706,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -5223,7 +4841,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -5353,7 +4971,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.8 + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -5456,6 +5074,388 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af + manywheel-py3_8-rocm4_5_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.8" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_8-rocm4_5_2 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_8-rocm4_5_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_8-rocm4_5_2-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.8" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_8-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_8-rocm4_5_2-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_8-rocm4_5_2-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.8" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_8-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af manywheel-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: should-run @@ -5469,7 +5469,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5603,7 +5603,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5732,7 +5732,7 @@ jobs: GPU_ARCH_TYPE: cpu DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5849,7 +5849,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -5984,7 +5984,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6119,7 +6119,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6236,7 +6236,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6374,7 +6374,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6509,7 +6509,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6626,7 +6626,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6764,7 +6764,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -6899,7 +6899,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -7016,7 +7016,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -7154,7 +7154,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -7289,7 +7289,7 @@ jobs: GPU_ARCH_TYPE: cuda DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -7392,388 +7392,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm4_2-build: - if: ${{ github.repository_owner == 'pytorch' }} - needs: should-run - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - with: - path: pytorch - submodules: recursive - - name: Clone pytorch/builder - uses: actions/checkout@v2 - with: - repository: pytorch/builder - path: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v3 - with: - name: manywheel-py3_9-rocm4_2 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-rocm4_2-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-rocm4_2-build - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download Build Artifacts - with: - name: manywheel-py3_9-rocm4_2 - path: "${{ runner.temp }}/artifacts/" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - with: - path: pytorch - submodules: recursive - - name: Clone pytorch/builder - uses: actions/checkout@v2 - with: - repository: pytorch/builder - path: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-rocm4_2-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-rocm4_2-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.2 - GPU_ARCH_VERSION: 4.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download Build Artifacts - with: - name: manywheel-py3_9-rocm4_2 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af manywheel-py3_9-rocm4_3_1-build: if: ${{ github.repository_owner == 'pytorch' }} needs: should-run @@ -7788,7 +7406,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -7923,7 +7541,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -8053,7 +7671,7 @@ jobs: GPU_ARCH_TYPE: rocm DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: 3.9 + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -8156,3 +7774,3085 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af + manywheel-py3_9-rocm4_5_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.9" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_9-rocm4_5_2 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_9-rocm4_5_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_9-rocm4_5_2-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.9" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_9-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_9-rocm4_5_2-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_9-rocm4_5_2-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.9" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_9-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cpu-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-cpu + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cpu-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cpu-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cpu + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cpu-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cpu-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cpu + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda10_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-cuda10_2 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda10_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda10_2-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda10_2 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda10_2-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda10_2-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda10_2 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_1-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu111 + GPU_ARCH_VERSION: 11.1 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-cuda11_1 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_1-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda11_1-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu111 + GPU_ARCH_VERSION: 11.1 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda11_1 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_1-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda11_1-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu111 + GPU_ARCH_VERSION: 11.1 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda11_1 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_3-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-cuda11_3 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_3-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda11_3-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda11_3 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_3-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda11_3-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda11_3 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_5-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu115 + GPU_ARCH_VERSION: 11.5 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-cuda11_5 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_5-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda11_5-build + runs-on: linux.4xlarge.nvidia.gpu + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu115 + GPU_ARCH_VERSION: 11.5 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda11_5 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + working-directory: pytorch/ + run: | + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-cuda11_5-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-cuda11_5-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu115 + GPU_ARCH_VERSION: 11.5 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-cuda11_5 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-rocm4_3_1-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.3.1 + GPU_ARCH_VERSION: 4.3.1 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-rocm4_3_1 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-rocm4_3_1-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-rocm4_3_1-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.3.1 + GPU_ARCH_VERSION: 4.3.1 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-rocm4_3_1 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-rocm4_3_1-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-rocm4_3_1-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.3.1 + GPU_ARCH_VERSION: 4.3.1 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.3.1 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-rocm4_3_1 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-rocm4_5_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: should-run + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Build PyTorch binary + run: | + set -x + mkdir -p artifacts/ + container_name=$(docker run \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" + - name: Chown artifacts + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - uses: seemethere/upload-artifact-s3@v3 + with: + name: manywheel-py3_10-rocm4_5_2 + retention-days: 14 + if-no-files-found: error + path: + ${{ runner.temp }}/artifacts/* + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-rocm4_5_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-rocm4_5_2-build + runs-on: linux.4xlarge + timeout-minutes: 240 + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + with: + path: pytorch + submodules: recursive + - name: Clone pytorch/builder + uses: actions/checkout@v2 + with: + repository: pytorch/builder + path: builder + - name: Pull Docker image + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${DOCKER_IMAGE}" + - name: Test PyTorch binary + run: | + set -x + # shellcheck disable=SC2086,SC2090 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e BINARY_ENV_FILE \ + -e BUILDER_ROOT \ + -e BUILD_ENVIRONMENT \ + -e BUILD_SPLIT_CUDA \ + -e DESIRED_CUDA \ + -e DESIRED_DEVTOOLSET \ + -e DESIRED_PYTHON \ + -e GPU_ARCH_TYPE \ + -e GPU_ARCH_VERSION \ + -e IS_GHA \ + -e LIBTORCH_VARIANT \ + -e PACKAGE_TYPE \ + -e PYTORCH_FINAL_PACKAGE_DIR \ + -e PYTORCH_ROOT \ + -e SKIP_ALL_TESTS \ + --tty \ + --detach \ + -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ + -v "${GITHUB_WORKSPACE}/builder:/builder" \ + -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ + -w / \ + "${DOCKER_IMAGE}" + ) + docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" + # Generate test script + docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af + manywheel-py3_10-rocm4_5_2-upload: # Uploading + runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_10-rocm4_5_2-test + env: + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + - name: Log in to ECR + env: + AWS_RETRY_MODE: standard + AWS_MAX_ATTEMPTS: 5 + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ + --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Clean workspace + run: | + rm -rf "${GITHUB_WORKSPACE}" + mkdir "${GITHUB_WORKSPACE}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Clone pytorch/pytorch + uses: actions/checkout@v2 + - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b + name: Download Build Artifacts + with: + name: manywheel-py3_10-rocm4_5_2 + path: "${{ runner.temp }}/artifacts/" + - name: Set DRY_RUN (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + echo "DRY_RUN=disabled" >> "$GITHUB_ENV" + - name: Set UPLOAD_CHANNEL (only for tagged pushes) + if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}} + run: | + # reference ends with an RC suffix + if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then + echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" + fi + - name: Upload binaries + env: + PKG_DIR: "${{ runner.temp }}/artifacts" + UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" + # When running these on pull_request events these should be blank + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }} + ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + run: | + docker run --rm -i \ + -e ANACONDA_API_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e DRY_RUN \ + -e PACKAGE_TYPE \ + -e PKG_DIR=/artifacts \ + -e UPLOAD_CHANNEL \ + -e UPLOAD_SUBFOLDER \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -v "${GITHUB_WORKSPACE}:/v" \ + -w /v \ + 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ + bash -c '.circleci/scripts/binary_upload.sh' + - name: Hold runner for 2 hours or until ssh sessions have drained + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images + if: always() + run: | + # ignore expansion of "docker ps -q" since it could be empty + # shellcheck disable=SC2046 + docker stop $(docker ps -q) || true + # Prune all of the docker images + docker system prune -af