mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Added DevOps PR and Nightly Build logic (#58007)
Summary:
This PR adds Azure DevOps support for running custom PyTorch unit tests on PyTorch PR and Nightly builds.
PR Builds on Azure DevOps:
- Ensures that the wheel artifacts for a given PR build is ready
- Once the wheels are ready, PyTorch custom tests are run on torch installation from build wheels
Nightly Builds on Azure DevOps:
- Cues 4 builds {Win,Linux}*{cpu, CUDA} to run PyTorch custom unit tests on nightly PyTorch builds.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/58007
Reviewed By: seemethere, mruberry
Differential Revision: D28342428
Pulled By: malfet
fbshipit-source-id: a454accf69163f9ba77845eeb54831ef91437981
This commit is contained in:
parent
7156168f71
commit
53bc6f79f3
51
.azure_pipelines/job_templates/pytorch-template-unix.yml
Normal file
51
.azure_pipelines/job_templates/pytorch-template-unix.yml
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# PyTorch build steps template with Unix images Azure DevOps Instances
|
||||
#
|
||||
# This build depends on 5 parameters set as an environment variables in the pipeline:
|
||||
# - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
|
||||
# - AZURE_STORAGE_KEY: Secret var for authenticating to Azure Storage
|
||||
# - _TS_CLONE_P, _TS_P, _TS_SM_P: Secret vars for specific unit tests
|
||||
|
||||
parameters:
|
||||
name: ''
|
||||
pool: ''
|
||||
container_endpoint: ''
|
||||
customMatrixes: ''
|
||||
|
||||
jobs:
|
||||
- job: ${{parameters.name}}
|
||||
timeoutInMinutes: 600
|
||||
strategy:
|
||||
matrix:
|
||||
${{ insert }}: ${{parameters.customMatrixes}}
|
||||
pool:
|
||||
name: ${{ parameters.pool}}
|
||||
variables:
|
||||
DECODE_PERCENTS: false
|
||||
|
||||
steps:
|
||||
# Don't checkout repo contents to save time and CPU compute. Environment variables
|
||||
# related to checkout branch such as $(BUILD_SOURCEBRANCH) are still available.
|
||||
- checkout: none
|
||||
|
||||
# Delete pytorch_tests repo from previous builds if exists
|
||||
- bash: rm -rf pytorch_tests/
|
||||
displayName: Delete pytorch_tests repo from previous builds if exists
|
||||
|
||||
# Clone PyTorch Tests repository
|
||||
- bash: |
|
||||
B64_PAT=$(printf "%s"":$_ADOTOKEN" | base64)
|
||||
git -c http.extraHeader="Authorization: Basic ${B64_PAT}" clone $(AZURE_DEVOPS_PYTORCH_TESTS_REPO_URL)
|
||||
cd pytorch_tests
|
||||
git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
|
||||
env:
|
||||
_ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
|
||||
displayName: Clone PyTorch Tests repo
|
||||
|
||||
# Run PyTorch Unit Tests
|
||||
- bash: bash $(Build.SourcesDirectory)/pytorch_tests/scripts/linux/run.sh
|
||||
env:
|
||||
_AZURE_STORAGE_KEY: $(AZURE_STORAGE_KEY)
|
||||
_TS_CLONE_P: $(TS_CLONE_PASSWORD)
|
||||
_TS_P: $(TS_PAT)
|
||||
_TS_SM_P: $(TS_SM_PAT)
|
||||
displayName: Run PyTorch Unit Tests
|
||||
49
.azure_pipelines/job_templates/pytorch-template-win.yml
Normal file
49
.azure_pipelines/job_templates/pytorch-template-win.yml
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# PyTorch build steps template with Windows images Azure DevOps Instances
|
||||
#
|
||||
# This build depends on 5 parameters set as an environment variables in the pipeline:
|
||||
# - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
|
||||
# - AZURE_STORAGE_KEY: Secret var for authenticating to Azure Storage
|
||||
# - _TS_CLONE_P, _TS_P, _TS_SM_P: Secret vars for specific unit tests
|
||||
|
||||
parameters:
|
||||
name: ''
|
||||
pool: ''
|
||||
customMatrixes: ''
|
||||
|
||||
jobs:
|
||||
- job: ${{parameters.name}}
|
||||
timeoutInMinutes: 600
|
||||
strategy:
|
||||
matrix:
|
||||
${{ insert }}: ${{parameters.customMatrixes}}
|
||||
pool:
|
||||
name: ${{ parameters.pool}}
|
||||
|
||||
steps:
|
||||
# Don't checkout repo contents to save time and CPU compute. Environment variables
|
||||
# related to checkout branch such as $(BUILD_SOURCEBRANCH) are still available.
|
||||
- checkout: none
|
||||
|
||||
# Delete pytorch_tests repo from previous builds if exists
|
||||
- script: if exist "pytorch_tests/" rmdir "pytorch_tests/" /q /s
|
||||
displayName: Delete pytorch_tests repo from previous builds if exists
|
||||
|
||||
# Clone PyTorch Tests repository
|
||||
- powershell: |
|
||||
$env:B64Pat = [Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes(":$env:_ADOTOKEN"))
|
||||
git -c http.extraHeader="Authorization: Basic $env:B64Pat" clone $env:AZURE_DEVOPS_pytorch_tests_REPO_URL
|
||||
cd pytorch_tests
|
||||
git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
|
||||
env:
|
||||
_ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
|
||||
displayName: Clone PyTorch Tests repo
|
||||
|
||||
# Run PyTorch Unit Tests
|
||||
- script: call $(Build.SourcesDirectory)\pytorch_tests\scripts\windows\run.bat
|
||||
env:
|
||||
_ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
|
||||
_AZURE_STORAGE_KEY: $(AZURE_STORAGE_KEY)
|
||||
_TS_CLONE_P: $(TS_CLONE_PASSWORD)
|
||||
_TS_P: $(TS_PAT)
|
||||
_TS_SM_P: $(TS_SM_PAT)
|
||||
displayName: Run PyTorch Unit Tests
|
||||
14
.azure_pipelines/job_templates/wheel-wait-job-template.yml
Normal file
14
.azure_pipelines/job_templates/wheel-wait-job-template.yml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# Main logic to initiate wait for PR artifact to be ready
|
||||
|
||||
steps:
|
||||
- task: InvokeRESTAPI@1
|
||||
displayName: 'Wait for job success and wheel ready'
|
||||
timeoutInMinutes: 60
|
||||
inputs:
|
||||
connectionType: 'connectedServiceName'
|
||||
serviceConnection: circleciconn
|
||||
method: 'POST'
|
||||
headers: '{"Content-Type":"application/json", "BranchName":"$(TARGET_BRANCH_TO_CHECK_PR)", "JobName":"$(TARGET_CIRCLECI_PR)", "PlanUrl":"$(System.CollectionUri)", "ProjectId":"$(System.TeamProjectId)", "HubName":"$(System.HostType)", "PlanId":"$(System.PlanId)", "JobId":"$(System.JobId)", "TimelineId":"$(System.TimelineId)", "TaskInstanceId":"$(System.TaskInstanceId)", "AuthToken":"$(System.AccessToken)"}'
|
||||
body: ''
|
||||
urlSuffix: 'api/JobStatus'
|
||||
waitForCompletion: true
|
||||
49
.azure_pipelines/job_templates/wheel-wait-template.yml
Normal file
49
.azure_pipelines/job_templates/wheel-wait-template.yml
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# Initiate 5 agentless-server waiting jobs to check on the
|
||||
# status of PR artifact builds, for a maximum wait time of
|
||||
# 5 * 60 min =300 minutes. These jobs will pass immediately
|
||||
# once targeted CircleCI build is ready.
|
||||
|
||||
jobs:
|
||||
- job: checkjob1
|
||||
pool: server
|
||||
timeoutInMinutes: 60
|
||||
continueOnError: true
|
||||
|
||||
steps:
|
||||
- template: wheel-wait-job-template.yml
|
||||
|
||||
- job: checkjob2
|
||||
pool: server
|
||||
timeoutInMinutes: 60
|
||||
dependsOn: checkjob1
|
||||
continueOnError: true
|
||||
|
||||
steps:
|
||||
- template: wheel-wait-job-template.yml
|
||||
|
||||
- job: checkjob3
|
||||
pool: server
|
||||
timeoutInMinutes: 60
|
||||
dependsOn: checkjob2
|
||||
continueOnError: true
|
||||
|
||||
steps:
|
||||
- template: wheel-wait-job-template.yml
|
||||
|
||||
- job: checkjob4
|
||||
pool: server
|
||||
timeoutInMinutes: 60
|
||||
dependsOn: checkjob3
|
||||
continueOnError: true
|
||||
|
||||
steps:
|
||||
- template: wheel-wait-job-template.yml
|
||||
|
||||
- job: checkjob5
|
||||
pool: server
|
||||
timeoutInMinutes: 60
|
||||
dependsOn: checkjob4
|
||||
continueOnError: true
|
||||
|
||||
steps:
|
||||
- template: wheel-wait-job-template.yml
|
||||
50
.azure_pipelines/nightly-pytorch-tests-pipeline.yml
Normal file
50
.azure_pipelines/nightly-pytorch-tests-pipeline.yml
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# PyTorch Nightly PyTorch Tests Builds Pipeline on Azure DevOps
|
||||
#
|
||||
# This pipeline runs custom PyTorch unit-tests on nightly
|
||||
# PyTorch wheels.
|
||||
|
||||
stages:
|
||||
- stage: 'NightlyCustomTests'
|
||||
displayName: 'Run custom unit tests on PyTorch wheels'
|
||||
jobs:
|
||||
- template: job_templates/pytorch-template-unix.yml
|
||||
parameters:
|
||||
name: ubuntu_1804_CPU_docker
|
||||
pool: $(BUILD_POOL_LIN_1)
|
||||
customMatrixes:
|
||||
Nightly_Custom_Tests:
|
||||
_DOCKER_IMAGE: $(DOCKER_IMAGE_LIN_1)
|
||||
_PYTHON_VERSION: $(PYTHON_VERSION_LIN_1)
|
||||
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_LIN_1)
|
||||
_RUN_TESTS: $(RUN_TESTS_LIN)
|
||||
|
||||
- template: job_templates/pytorch-template-unix.yml
|
||||
parameters:
|
||||
name: ubuntu_1804_GPU_docker
|
||||
pool: $(BUILD_POOL_LIN_2)
|
||||
customMatrixes:
|
||||
Nightly_Custom_Tests:
|
||||
_DOCKER_IMAGE: $(DOCKER_IMAGE_LIN_2)
|
||||
_PYTHON_VERSION: $(PYTHON_VERSION_LIN_2)
|
||||
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_LIN_2)
|
||||
_RUN_TESTS: $(RUN_TESTS_LIN)
|
||||
|
||||
- template: job_templates/pytorch-template-win.yml
|
||||
parameters:
|
||||
name: windows_2019_CPU
|
||||
pool: $(BUILD_POOL_WIN_1)
|
||||
customMatrixes:
|
||||
Nightly_Custom_Tests:
|
||||
_PYTHON_VERSION: $(PYTHON_VERSION_WIN_1)
|
||||
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_1)
|
||||
_RUN_TESTS: $(RUN_TESTS_WIN)
|
||||
|
||||
- template: job_templates/pytorch-template-win.yml
|
||||
parameters:
|
||||
name: windows_2019_GPU
|
||||
pool: $(BUILD_POOL_WIN_2)
|
||||
customMatrixes:
|
||||
Nightly_Custom_Tests:
|
||||
_PYTHON_VERSION: $(PYTHON_VERSION_WIN_2)
|
||||
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_2)
|
||||
_RUN_TESTS: $(RUN_TESTS_WIN)
|
||||
30
.azure_pipelines/pytorch-tests-pipeline.yml
Normal file
30
.azure_pipelines/pytorch-tests-pipeline.yml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# PyTorch PR PyTorch Tests Builds Pipeline on Azure DevOps
|
||||
#
|
||||
# This pipeline:
|
||||
# 1) ensures that CircleCI builds for a given PR
|
||||
# have finished, and that its artifacts are
|
||||
# ready for download
|
||||
# 2) runs custom PyTorch unit-tests on PyTorch
|
||||
# wheels generated during PR builds.
|
||||
|
||||
stages:
|
||||
- stage: 'EnsureArtifactsReady'
|
||||
displayName: 'Ensure PyTorch PR Artifacts are ready'
|
||||
jobs:
|
||||
- template: job_templates/wheel-wait-template.yml
|
||||
|
||||
- stage: 'PRCustomTests'
|
||||
displayName: 'Run custom unit tests on PyTorch wheels'
|
||||
jobs:
|
||||
- template: job_templates/pytorch-template-unix.yml
|
||||
parameters:
|
||||
name: ubuntu_1804_GPU_docker
|
||||
pool: $(BUILD_POOL_PR)
|
||||
customMatrixes:
|
||||
PR_Custom_Tests:
|
||||
_PYTHON_VERSION: $(PYTHON_VERSION_PR)
|
||||
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_PR)
|
||||
_TARGET_CIRCLECI_BUILD: $(TARGET_CIRCLECI_PR)
|
||||
_TARGET_BRANCH_TO_CHECK: $(TARGET_BRANCH_TO_CHECK_PR)
|
||||
_DOCKER_IMAGE: $(DOCKER_IMAGE_PR)
|
||||
_RUN_TESTS: $(RUN_TESTS_PR)
|
||||
|
|
@ -49,8 +49,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
cmake \
|
||||
curl \
|
||||
git \
|
||||
git-lfs \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
openmpi-bin \
|
||||
wget && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN /usr/sbin/update-ccache-symlinks
|
||||
|
|
@ -86,7 +88,10 @@ ARG TORCH_CUDA_ARCH_LIST_VAR
|
|||
RUN if [ -z "$TORCH_CUDA_ARCH_LIST_VAR" ] ; then \
|
||||
echo "Continuing CPU build ..."; \
|
||||
else \
|
||||
echo "Setting CUDA env vars ..."; \
|
||||
echo "Setting CUDA env vars and installing openmpi ..."; \
|
||||
# Set MPI links to avoid libmpi_cxx.so.1 not found error
|
||||
ln -s /usr/lib/x86_64-linux-gnu/libmpi_cxx.so.20 /usr/lib/x86_64-linux-gnu/libmpi_cxx.so.1; \
|
||||
ln -s /usr/lib/x86_64-linux-gnu/libmpi.so.20.10.1 /usr/lib/x86_64-linux-gnu/libmpi.so.12; \
|
||||
fi
|
||||
# If the build argument TORCH_CUDA_ARCH_LIST_VAR is given, container will be
|
||||
# set for GPU/CUDA build, else for CPU build.
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user