mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
add rocm GHA workflow (#68552)
Summary: cc jeffdaily sunway513 jithunnair-amd ROCmSupport KyleCZH Pull Request resolved: https://github.com/pytorch/pytorch/pull/68552 Reviewed By: bdhirsh Differential Revision: D33569551 Pulled By: seemethere fbshipit-source-id: cc7d68a22ad0eedd4d11eea3cf43a909e5b8616b
This commit is contained in:
parent
8cfd51d75f
commit
2bb701eb9d
6
.github/generated-ciflow-ruleset.json
generated
vendored
6
.github/generated-ciflow-ruleset.json
generated
vendored
|
|
@ -16,6 +16,7 @@
|
|||
"libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
|
||||
"linux-bionic-cuda10.2-py3.9-gcc7",
|
||||
"linux-bionic-py3.7-clang9",
|
||||
"linux-bionic-rocm4.5-py3.7",
|
||||
"linux-docs",
|
||||
"linux-docs-push",
|
||||
"linux-vulkan-bionic-py3.7-clang9",
|
||||
|
|
@ -150,6 +151,7 @@
|
|||
"libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
|
||||
"linux-bionic-cuda10.2-py3.9-gcc7",
|
||||
"linux-bionic-py3.7-clang9",
|
||||
"linux-bionic-rocm4.5-py3.7",
|
||||
"linux-docs",
|
||||
"linux-docs-push",
|
||||
"linux-vulkan-bionic-py3.7-clang9",
|
||||
|
|
@ -196,6 +198,9 @@
|
|||
"ciflow/onnx": [
|
||||
"linux-xenial-py3.7-clang7-onnx"
|
||||
],
|
||||
"ciflow/rocm": [
|
||||
"linux-bionic-rocm4.5-py3.7"
|
||||
],
|
||||
"ciflow/sanitizers": [
|
||||
"linux-xenial-py3.7-clang7-asan"
|
||||
],
|
||||
|
|
@ -231,6 +236,7 @@
|
|||
"libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
|
||||
"linux-bionic-cuda10.2-py3.9-gcc7",
|
||||
"linux-bionic-py3.7-clang9",
|
||||
"linux-bionic-rocm4.5-py3.7",
|
||||
"linux-docs",
|
||||
"linux-vulkan-bionic-py3.7-clang9",
|
||||
"linux-xenial-cuda11.3-py3.7-gcc7",
|
||||
|
|
|
|||
34
.github/scripts/generate_ci_workflows.py
vendored
34
.github/scripts/generate_ci_workflows.py
vendored
|
|
@ -29,9 +29,22 @@ WINDOWS_RUNNERS = {
|
|||
LINUX_CPU_TEST_RUNNER = "linux.2xlarge"
|
||||
# contains 1 gpu
|
||||
LINUX_CUDA_TEST_RUNNER = "linux.4xlarge.nvidia.gpu"
|
||||
# contains 4 gpus
|
||||
LINUX_ROCM_TEST_RUNNER = "linux.rocm.gpu"
|
||||
LINUX_RUNNERS = {
|
||||
LINUX_CPU_TEST_RUNNER,
|
||||
LINUX_CUDA_TEST_RUNNER,
|
||||
LINUX_ROCM_TEST_RUNNER,
|
||||
}
|
||||
|
||||
LINUX_DISTRIBUTED_GPU_RUNNERS = {
|
||||
LINUX_CUDA_TEST_RUNNER : "linux.8xlarge.nvidia.gpu",
|
||||
LINUX_ROCM_TEST_RUNNER : LINUX_ROCM_TEST_RUNNER,
|
||||
}
|
||||
|
||||
LINUX_MULTIGPU_RUNNERS = {
|
||||
LINUX_CUDA_TEST_RUNNER : "linux.16xlarge.nvidia.gpu",
|
||||
LINUX_ROCM_TEST_RUNNER : LINUX_ROCM_TEST_RUNNER,
|
||||
}
|
||||
|
||||
MACOS_TEST_RUNNER_10_15 = "macos-10.15"
|
||||
|
|
@ -46,6 +59,9 @@ CUDA_RUNNERS = {
|
|||
WINDOWS_CUDA_TEST_RUNNER,
|
||||
LINUX_CUDA_TEST_RUNNER,
|
||||
}
|
||||
ROCM_RUNNERS = {
|
||||
LINUX_ROCM_TEST_RUNNER,
|
||||
}
|
||||
CPU_RUNNERS = {
|
||||
WINDOWS_CPU_TEST_RUNNER,
|
||||
LINUX_CPU_TEST_RUNNER,
|
||||
|
|
@ -55,6 +71,7 @@ LABEL_CIFLOW_ALL = "ciflow/all"
|
|||
LABEL_CIFLOW_BAZEL = "ciflow/bazel"
|
||||
LABEL_CIFLOW_CPU = "ciflow/cpu"
|
||||
LABEL_CIFLOW_CUDA = "ciflow/cuda"
|
||||
LABEL_CIFLOW_ROCM = "ciflow/rocm"
|
||||
LABEL_CIFLOW_DOCS = "ciflow/docs"
|
||||
LABEL_CIFLOW_DEFAULT = "ciflow/default"
|
||||
LABEL_CIFLOW_LIBTORCH = "ciflow/libtorch"
|
||||
|
|
@ -164,6 +181,8 @@ class CIWorkflow:
|
|||
|
||||
# Optional fields
|
||||
test_runner_type: str = ''
|
||||
multigpu_runner_type: str = ''
|
||||
distributed_gpu_runner_type: str = ''
|
||||
ciflow_config: CIFlowConfig = field(default_factory=CIFlowConfig)
|
||||
cuda_version: str = ''
|
||||
docker_image_base: str = ''
|
||||
|
|
@ -205,6 +224,9 @@ class CIWorkflow:
|
|||
if self.fx2trt_test:
|
||||
self.enable_fx2trt_test = 1
|
||||
|
||||
self.multigpu_runner_type = LINUX_MULTIGPU_RUNNERS.get(self.test_runner_type, "linux.16xlarge.nvidia.gpu")
|
||||
self.distributed_gpu_runner_type = LINUX_DISTRIBUTED_GPU_RUNNERS.get(self.test_runner_type, "linux.8xlarge.nvidia.gpu")
|
||||
|
||||
# If num_test_shards_on_pull_request is not user-defined, default to num_test_shards unless we are
|
||||
# only running smoke tests on the pull request.
|
||||
if self.num_test_shards_on_pull_request == -1:
|
||||
|
|
@ -235,6 +257,8 @@ class CIWorkflow:
|
|||
assert self.test_runner_type != ''
|
||||
if self.test_runner_type in CUDA_RUNNERS:
|
||||
assert LABEL_CIFLOW_CUDA in self.ciflow_config.labels
|
||||
if self.test_runner_type in ROCM_RUNNERS:
|
||||
assert LABEL_CIFLOW_ROCM in self.ciflow_config.labels
|
||||
if self.test_runner_type in CPU_RUNNERS and not self.exclude_test:
|
||||
assert LABEL_CIFLOW_CPU in self.ciflow_config.labels
|
||||
if self.is_scheduled:
|
||||
|
|
@ -576,6 +600,16 @@ LINUX_WORKFLOWS = [
|
|||
labels=set([LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU]),
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-bionic-rocm4.5-py3.7",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm4.5-py3.7",
|
||||
test_runner_type=LINUX_ROCM_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels=set([LABEL_CIFLOW_LINUX, LABEL_CIFLOW_ROCM]),
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
|
||||
|
|
|
|||
50
.github/templates/common.yml.j2
vendored
50
.github/templates/common.yml.j2
vendored
|
|
@ -104,6 +104,45 @@ concurrency:
|
|||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro setup_rocm_linux() -%}
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: Set DOCKER_HOST
|
||||
run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
|
||||
- name: Runner health check system info
|
||||
if: always()
|
||||
run: |
|
||||
cat /etc/os-release || true
|
||||
cat /etc/apt/sources.list.d/rocm.list || true
|
||||
cat /opt/rocm/.info/version || true
|
||||
whoami
|
||||
- name: Runner health check rocm-smi
|
||||
if: always()
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: Runner health check rocminfo
|
||||
if: always()
|
||||
run: |
|
||||
rocminfo
|
||||
- name: Runner health check GPU count
|
||||
if: always()
|
||||
run: |
|
||||
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
||||
if [[ "x$ngpu" != "x4" ]]; then
|
||||
echo "Failed to detect 4 GPUs on the runner"
|
||||
exit 1
|
||||
fi
|
||||
- name: Runner health check disconnect on failure
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
killall runsvc.sh
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro teardown_ec2_linux(pytorch_directory="") -%}
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
{%- if pytorch_directory %}
|
||||
|
|
@ -127,6 +166,17 @@ concurrency:
|
|||
docker system prune -af
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro teardown_rocm_linux() -%}
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro checkout_pytorch(submodules) -%}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
|
|
|
|||
43
.github/templates/linux_ci_workflow.yml.j2
vendored
43
.github/templates/linux_ci_workflow.yml.j2
vendored
|
|
@ -162,8 +162,8 @@ jobs:
|
|||
ENABLE_XLA_TEST: !{{ enable_xla_test }}
|
||||
ENABLE_NOARCH_TEST: !{{ enable_noarch_test }}
|
||||
NUM_TEST_SHARDS: !{{ num_test_shards }}
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
MULTIGPU_RUNNER_TYPE: !{{ multigpu_runner_type }}
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: !{{ distributed_gpu_runner_type }}
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
outputs:
|
||||
|
|
@ -196,17 +196,28 @@ jobs:
|
|||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
|
||||
steps:
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
!{{ common.setup_rocm_linux() }}
|
||||
{%- else %}
|
||||
!{{ common.setup_ec2_linux() }}
|
||||
{%- endif %}
|
||||
!{{ common.checkout_pytorch("recursive") }}
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
!{{ common.add_retry_to_env() }}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
- name: ROCm set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'rocm') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
{%- else %}
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
{%- endif %}
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -228,7 +239,11 @@ jobs:
|
|||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
df -H
|
||||
{%- else %}
|
||||
sudo df -H
|
||||
{%- endif %}
|
||||
!{{ common.parse_ref() }}
|
||||
- name: Test
|
||||
env:
|
||||
|
|
@ -246,6 +261,7 @@ jobs:
|
|||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
{%- if 'rocm' not in test_runner_type %}
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
|
|
@ -254,6 +270,7 @@ jobs:
|
|||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=!{{ common.squid_proxy }} -e https_proxy=!{{ common.squid_proxy }} -e no_proxy=!{{ common.squid_no_proxy }}"
|
||||
fi
|
||||
{%- endif %}
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
|
|
@ -280,12 +297,16 @@ jobs:
|
|||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
{%- if 'rocm' not in test_runner_type %}
|
||||
${PROXY_ENV} \
|
||||
{%- endif %}
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
{%- if 'rocm' not in test_runner_type %}
|
||||
--ipc=host \
|
||||
{%- endif %}
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
|
|
@ -295,17 +316,35 @@ jobs:
|
|||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
# jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
|
||||
docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${container_name}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
{%- else %}
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
{%- endif %}
|
||||
{%- if 'rocm' not in test_runner_type %}
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
{%- endif %}
|
||||
!{{ common.render_test_results() }}
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
!{{ common.upload_downloaded_files(name='linux', use_s3=False) }}
|
||||
!{{ common.upload_test_reports(name='linux', artifact_name="test-reports", use_s3=False) }}
|
||||
{%- else %}
|
||||
!{{ common.upload_downloaded_files(name='linux') }}
|
||||
!{{ common.upload_test_reports(name='linux') }}
|
||||
{%- endif %}
|
||||
!{{ common.upload_test_statistics(build_environment) }}
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
!{{ common.teardown_rocm_linux() }}
|
||||
{%- else %}
|
||||
!{{ common.teardown_ec2_linux() }}
|
||||
{%- endif %}
|
||||
{% endblock %}
|
||||
{%- endif -%}
|
||||
{%- if enable_doc_jobs %}
|
||||
|
|
|
|||
517
.github/workflows/generated-linux-bionic-rocm4.5-py3.7.yml
generated
vendored
Normal file
517
.github/workflows/generated-linux-bionic-rocm4.5-py3.7.yml
generated
vendored
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
# @generated DO NOT EDIT MANUALLY
|
||||
# Template is at: .github/templates/linux_ci_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: linux-bionic-rocm4.5-py3.7
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, unassigned]
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: linux-bionic-rocm4.5-py3.7
|
||||
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-rocm4.5-py3.7
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
|
||||
TORCH_CUDA_ARCH_LIST: 5.2
|
||||
IN_CI: 1
|
||||
IS_GHA: 1
|
||||
# This is used for the phase of adding wheel tests only, will be removed once completed
|
||||
IN_WHEEL_TEST: 1
|
||||
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
|
||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
||||
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PYTORCH_RETRY_TEST_CASES: 1
|
||||
concurrency:
|
||||
group: linux-bionic-rocm4.5-py3.7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
if: ${{ (github.repository == 'pytorch/pytorch') && (
|
||||
(github.event_name == 'push') ||
|
||||
(github.event_name == 'schedule') ||
|
||||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/rocm') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk')) ||
|
||||
(false))
|
||||
}}
|
||||
env:
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-build
|
||||
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
|
||||
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/rocm') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') }}
|
||||
outputs:
|
||||
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
|
||||
steps:
|
||||
- name: print labels
|
||||
run: echo "${PR_LABELS}"
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Calculate docker image tag
|
||||
id: calculate-tag
|
||||
run: |
|
||||
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
|
||||
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "::set-output name=docker_tag::${DOCKER_TAG}"
|
||||
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
|
||||
- name: Check if image should be built
|
||||
id: check
|
||||
env:
|
||||
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
|
||||
run: |
|
||||
set -x
|
||||
# Check if image already exists, if it does then skip building it
|
||||
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
|
||||
exit 0
|
||||
fi
|
||||
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
|
||||
# if we're on the base branch then use the parent commit
|
||||
MERGE_BASE=$(git rev-parse HEAD~)
|
||||
else
|
||||
# otherwise we're on a PR, so use the most recent base commit
|
||||
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
|
||||
fi
|
||||
# Covers the case where a previous tag doesn't exist for the tree
|
||||
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
|
||||
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
|
||||
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
|
||||
exit 1
|
||||
fi
|
||||
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
|
||||
# If no image exists but the hash is the same as the previous hash then we should error out here
|
||||
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
|
||||
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
|
||||
echo " contact the PyTorch team to restore the original images"
|
||||
exit 1
|
||||
fi
|
||||
echo ::set-output name=rebuild::yes
|
||||
- name: Build and push docker image
|
||||
if: ${{ steps.check.outputs.rebuild }}
|
||||
env:
|
||||
DOCKER_SKIP_S3_UPLOAD: 1
|
||||
working-directory: .circleci/docker
|
||||
run: |
|
||||
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
|
||||
./build_docker.sh
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Build
|
||||
env:
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
run: |
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e JOB_BASE_NAME \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IS_GHA \
|
||||
-e PR_NUMBER \
|
||||
-e SHA1 \
|
||||
-e BRANCH \
|
||||
-e GITHUB_RUN_ID \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e TORCH_CUDA_ARCH_LIST \
|
||||
-e PR_LABELS \
|
||||
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--detach \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
|
||||
- name: Display and upload binary build size statistics (Click Me)
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
run: |
|
||||
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
|
||||
export COMMIT_TIME
|
||||
pip3 install requests==2.26 boto3==1.16.34
|
||||
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Archive artifacts into zip
|
||||
run: |
|
||||
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store PyTorch Build Artifacts on S3
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
artifacts.zip
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.rocm.gpu
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_FX2TRT_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.rocm.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.rocm.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
|
||||
steps:
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: Set DOCKER_HOST
|
||||
run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
|
||||
- name: Runner health check system info
|
||||
if: always()
|
||||
run: |
|
||||
cat /etc/os-release || true
|
||||
cat /etc/apt/sources.list.d/rocm.list || true
|
||||
cat /opt/rocm/.info/version || true
|
||||
whoami
|
||||
- name: Runner health check rocm-smi
|
||||
if: always()
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: Runner health check rocminfo
|
||||
if: always()
|
||||
run: |
|
||||
rocminfo
|
||||
- name: Runner health check GPU count
|
||||
if: always()
|
||||
run: |
|
||||
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
||||
if [[ "x$ngpu" != "x4" ]]; then
|
||||
echo "Failed to detect 4 GPUs on the runner"
|
||||
exit 1
|
||||
fi
|
||||
- name: Runner health check disconnect on failure
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
killall runsvc.sh
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: ROCm set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'rocm') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PYTORCH_IGNORE_DISABLED_ISSUES \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
# jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
|
||||
docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${container_name}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Downloaded JSONs on Github
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Reports on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-reports
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
|
@ -148,23 +148,8 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
|||
export PYTORCH_ROCM_ARCH="gfx900;gfx906"
|
||||
fi
|
||||
|
||||
# hipify sources
|
||||
python tools/amd_build/build_amd.py
|
||||
python setup.py install
|
||||
|
||||
# remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
|
||||
sudo rm -f /opt/cache/bin/cc
|
||||
sudo rm -f /opt/cache/bin/c++
|
||||
sudo rm -f /opt/cache/bin/gcc
|
||||
sudo rm -f /opt/cache/bin/g++
|
||||
pushd /opt/rocm/llvm/bin
|
||||
if [[ -d original ]]; then
|
||||
sudo mv original/clang .
|
||||
sudo mv original/clang++ .
|
||||
fi
|
||||
sudo rm -rf original
|
||||
popd
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
|
|
@ -227,7 +212,7 @@ else
|
|||
# ppc64le build fails when WERROR=1
|
||||
# set only when building other architectures
|
||||
# only use for "python setup.py install" line
|
||||
if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *ppc64le* && "$BUILD_ENVIRONMENT" != *rocm* ]]; then
|
||||
WERROR=1 python setup.py bdist_wheel
|
||||
else
|
||||
python setup.py bdist_wheel
|
||||
|
|
@ -251,6 +236,25 @@ else
|
|||
cp build/.ninja_log dist
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
# remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
|
||||
sudo rm -f /opt/cache/bin/cc
|
||||
sudo rm -f /opt/cache/bin/c++
|
||||
sudo rm -f /opt/cache/bin/gcc
|
||||
sudo rm -f /opt/cache/bin/g++
|
||||
pushd /opt/rocm/llvm/bin
|
||||
if [[ -d original ]]; then
|
||||
sudo mv original/clang .
|
||||
sudo mv original/clang++ .
|
||||
fi
|
||||
sudo rm -rf original
|
||||
popd
|
||||
|
||||
# exit before building custom test artifacts until we resolve cmake error:
|
||||
# static library kineto_LIBRARY-NOTFOUND not found.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR=${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-${PWD}/../}
|
||||
mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
|
||||
|
||||
|
|
|
|||
|
|
@ -80,8 +80,11 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
|||
rocminfo | grep -E 'Name:.*\sgfx|Marketing'
|
||||
|
||||
# Manually set NUM_TEST_SHARDS since Jenkins doesn't do it
|
||||
# TODO: Can remove this once ROCm migration from Jenkins to GHA is complete.
|
||||
if [[ -z "${GITHUB_ACTIONS}" ]]; then
|
||||
export NUM_TEST_SHARDS=2
|
||||
fi
|
||||
fi
|
||||
|
||||
# --user breaks ppc64le builds and these packages are already in ppc64le docker
|
||||
if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]] && [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user