Enable multigpu_test in GHA (#60221)

Summary:
- [x] add to test matrix
- [x] enable on PRs for testing
- [x] modify the scripts so it actually runs the multigpu tests
- [x] put `num_shards` after `shard` number
- [x] use a separate test-reports artifact
- [x] run on `linux.16xlarge.nvidia.gpu`
- [x] validate that it works
- [x] disable on PRs before merging

Pull Request resolved: https://github.com/pytorch/pytorch/pull/60221

Test Plan: CI. Example run: https://github.com/pytorch/pytorch/actions/runs/984347177

Reviewed By: malfet

Differential Revision: D29430567

Pulled By: samestep

fbshipit-source-id: 09f8e208e524579b603611479ca00515c8a1b5aa
This commit is contained in:
Sam Estep 2021-06-30 08:51:06 -07:00 committed by Facebook GitHub Bot
parent 5576c7bdd1
commit 0b8a7daa2a
11 changed files with 217 additions and 128 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from typing import Any, Dict from typing import Any, Dict
import jinja2 import jinja2
from typing_extensions import Literal
DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com" DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
@ -15,6 +16,8 @@ GITHUB_DIR = Path(__file__).parent.parent
# https://github.com/python/mypy/issues/4617 # https://github.com/python/mypy/issues/4617
PyTorchWorkflow = Dict[str, Any] PyTorchWorkflow = Dict[str, Any]
YamlShellBool = Literal["''", 1]
WINDOWS_CPU_TEST_RUNNER = "windows.4xlarge" WINDOWS_CPU_TEST_RUNNER = "windows.4xlarge"
WINDOWS_CUDA_TEST_RUNNER = "windows.8xlarge.nvidia.gpu" WINDOWS_CUDA_TEST_RUNNER = "windows.8xlarge.nvidia.gpu"
@ -49,6 +52,7 @@ def PyTorchLinuxWorkflow(
test_runner_type: str, test_runner_type: str,
on_pull_request: bool = False, on_pull_request: bool = False,
enable_doc_jobs: bool = False, enable_doc_jobs: bool = False,
enable_multigpu_test: YamlShellBool = "''",
num_test_shards: int = 1, num_test_shards: int = 1,
) -> PyTorchWorkflow: ) -> PyTorchWorkflow:
return { return {
@ -57,6 +61,7 @@ def PyTorchLinuxWorkflow(
"test_runner_type": test_runner_type, "test_runner_type": test_runner_type,
"on_pull_request": on_pull_request, "on_pull_request": on_pull_request,
"enable_doc_jobs": enable_doc_jobs, "enable_doc_jobs": enable_doc_jobs,
"enable_multigpu_test": enable_multigpu_test,
"num_test_shards": num_test_shards, "num_test_shards": num_test_shards,
} }
@ -147,6 +152,7 @@ LINUX_WORKFLOWS = [
build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7", build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7", docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER, test_runner_type=LINUX_CUDA_TEST_RUNNER,
enable_multigpu_test=1,
num_test_shards=2, num_test_shards=2,
), ),
PyTorchLinuxWorkflow( PyTorchLinuxWorkflow(

View File

@ -9,22 +9,47 @@ dictated by just sharding.
import json import json
import os import os
from typing import List from typing import Dict
from typing_extensions import TypedDict
NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1')) class Config(TypedDict):
num_shards: int
def generate_sharding_list() -> List[int]: runner: str
return list(range(1, NUM_TEST_SHARDS + 1))
def main() -> None: def main() -> None:
print(json.dumps( TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
{ NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
'test_config': generate_sharding_list() MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
}, configs: Dict[str, Config] = {}
sort_keys=True, if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
)) configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
matrix = {
'include': [
{
'config': 'default',
'shard': shard,
'num_shards': NUM_TEST_SHARDS,
'runner': TEST_RUNNER_TYPE,
}
for shard in range(1, NUM_TEST_SHARDS + 1)
] + [
{
'config': name,
'shard': shard,
'num_shards': config['num_shards'],
'runner': config['runner'],
}
for name, config in configs.items()
for shard in range(1, config['num_shards'] + 1)
]
}
render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
print(f'::set-output name=matrix::{json.dumps(matrix)}')
print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -195,24 +195,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: !{{ test_runner_type }}
ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
NUM_TEST_SHARDS: !{{ num_test_shards }} NUM_TEST_SHARDS: !{{ num_test_shards }}
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: !{{ test_runner_type }}
needs: needs:
- calculate-docker-image - calculate-docker-image
- build - build
@ -220,11 +221,13 @@ jobs:
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
env: env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: !{{ build_environment }}-test JOB_BASE_NAME: !{{ build_environment }}-test
NUM_TEST_SHARDS: !{{ num_test_shards }} TEST_CONFIG: ${{ matrix.config }}
TEST_CONFIG: ${{ matrix.test_config }} SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps: steps:
- name: Log in to ECR - name: Log in to ECR
run: | run: |
@ -274,9 +277,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch - name: Test PyTorch
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $TEST_CONFIG == 'multigpu' ]]; then
export SHARD_NUMBER=$TEST_CONFIG TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
# TODO: Stop building test binaries as part of the build phase # TODO: Stop building test binaries as part of the build phase
@ -300,7 +306,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \ -w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \ "${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh' sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace - name: Chown workspace
if: always() if: always()
run: | run: |
@ -316,7 +322,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -337,8 +343,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -348,7 +358,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -92,37 +92,37 @@ jobs:
{%- endif %} {%- endif %}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: !{{ test_runner_type }}
NUM_TEST_SHARDS: !{{ num_test_shards }} NUM_TEST_SHARDS: !{{ num_test_shards }}
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
{%- if only_build_on_pull_request %} {%- if only_build_on_pull_request %}
if: ${{ github.event_name == 'push' }} if: ${{ github.event_name == 'push' }}
{%- endif %} {%- endif %}
runs-on: !{{ test_runner_type }}
env: env:
JOB_BASE_NAME: !{{ build_environment }}-test JOB_BASE_NAME: !{{ build_environment }}-test
NUM_TEST_SHARDS: !{{ num_test_shards }} SHARD_NUMBER: ${{ matrix.shard }}
TEST_CONFIG: ${{ matrix.test_config }} NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs: needs:
- build - build
- generate-test-matrix - generate-test-matrix
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -165,9 +165,7 @@ jobs:
env: env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -184,7 +182,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -202,8 +200,12 @@ jobs:
if: always() if: always()
{%- endif %} {%- endif %}
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step # TODO: Make this into a composite step
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
@ -214,7 +216,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -193,24 +193,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_MULTIGPU_TEST: ''
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: linux.8xlarge.nvidia.gpu
needs: needs:
- calculate-docker-image - calculate-docker-image
- build - build
@ -218,11 +219,13 @@ jobs:
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
env: env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7-test JOB_BASE_NAME: pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7-test
NUM_TEST_SHARDS: 2 TEST_CONFIG: ${{ matrix.config }}
TEST_CONFIG: ${{ matrix.test_config }} SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps: steps:
- name: Log in to ECR - name: Log in to ECR
run: | run: |
@ -272,9 +275,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch - name: Test PyTorch
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $TEST_CONFIG == 'multigpu' ]]; then
export SHARD_NUMBER=$TEST_CONFIG TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
# TODO: Stop building test binaries as part of the build phase # TODO: Stop building test binaries as part of the build phase
@ -298,7 +304,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \ -w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \ "${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh' sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace - name: Chown workspace
if: always() if: always()
run: | run: |
@ -314,7 +320,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -335,8 +341,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -346,7 +356,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -193,24 +193,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_MULTIGPU_TEST: 1
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: linux.8xlarge.nvidia.gpu
needs: needs:
- calculate-docker-image - calculate-docker-image
- build - build
@ -218,11 +219,13 @@ jobs:
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
env: env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7-test JOB_BASE_NAME: pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7-test
NUM_TEST_SHARDS: 2 TEST_CONFIG: ${{ matrix.config }}
TEST_CONFIG: ${{ matrix.test_config }} SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps: steps:
- name: Log in to ECR - name: Log in to ECR
run: | run: |
@ -272,9 +275,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch - name: Test PyTorch
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $TEST_CONFIG == 'multigpu' ]]; then
export SHARD_NUMBER=$TEST_CONFIG TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
# TODO: Stop building test binaries as part of the build phase # TODO: Stop building test binaries as part of the build phase
@ -298,7 +304,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \ -w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \ "${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh' sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace - name: Chown workspace
if: always() if: always()
run: | run: |
@ -314,7 +320,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -335,8 +341,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -346,7 +356,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -193,24 +193,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_MULTIGPU_TEST: ''
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: linux.8xlarge.nvidia.gpu
needs: needs:
- calculate-docker-image - calculate-docker-image
- build - build
@ -218,11 +219,13 @@ jobs:
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
env: env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7-test JOB_BASE_NAME: pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7-test
NUM_TEST_SHARDS: 2 TEST_CONFIG: ${{ matrix.config }}
TEST_CONFIG: ${{ matrix.test_config }} SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps: steps:
- name: Log in to ECR - name: Log in to ECR
run: | run: |
@ -272,9 +275,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch - name: Test PyTorch
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $TEST_CONFIG == 'multigpu' ]]; then
export SHARD_NUMBER=$TEST_CONFIG TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
# TODO: Stop building test binaries as part of the build phase # TODO: Stop building test binaries as part of the build phase
@ -298,7 +304,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \ -w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \ "${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh' sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace - name: Chown workspace
if: always() if: always()
run: | run: |
@ -314,7 +320,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -335,8 +341,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -346,7 +356,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -194,24 +194,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: linux.2xlarge
ENABLE_MULTIGPU_TEST: ''
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: linux.2xlarge
needs: needs:
- calculate-docker-image - calculate-docker-image
- build - build
@ -219,11 +220,13 @@ jobs:
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
env: env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-xenial-py3.6-gcc5.4-test JOB_BASE_NAME: pytorch-linux-xenial-py3.6-gcc5.4-test
NUM_TEST_SHARDS: 2 TEST_CONFIG: ${{ matrix.config }}
TEST_CONFIG: ${{ matrix.test_config }} SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps: steps:
- name: Log in to ECR - name: Log in to ECR
run: | run: |
@ -273,9 +276,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch - name: Test PyTorch
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $TEST_CONFIG == 'multigpu' ]]; then
export SHARD_NUMBER=$TEST_CONFIG TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
# TODO: Stop building test binaries as part of the build phase # TODO: Stop building test binaries as part of the build phase
@ -299,7 +305,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \ -w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \ "${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh' sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace - name: Chown workspace
if: always() if: always()
run: | run: |
@ -315,7 +321,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -336,8 +342,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -347,7 +357,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -73,34 +73,34 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: windows.4xlarge
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: windows.4xlarge
env: env:
JOB_BASE_NAME: pytorch-win-vs2019-cpu-py3-test JOB_BASE_NAME: pytorch-win-vs2019-cpu-py3-test
NUM_TEST_SHARDS: 2 SHARD_NUMBER: ${{ matrix.shard }}
TEST_CONFIG: ${{ matrix.test_config }} NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs: needs:
- build - build
- generate-test-matrix - generate-test-matrix
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -133,9 +133,7 @@ jobs:
env: env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -152,7 +150,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -166,8 +164,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step # TODO: Make this into a composite step
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
@ -178,7 +180,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -83,34 +83,34 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: windows.8xlarge.nvidia.gpu
env: env:
JOB_BASE_NAME: pytorch-win-vs2019-cuda10-cudnn7-py3-test JOB_BASE_NAME: pytorch-win-vs2019-cuda10-cudnn7-py3-test
NUM_TEST_SHARDS: 2 SHARD_NUMBER: ${{ matrix.shard }}
TEST_CONFIG: ${{ matrix.test_config }} NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs: needs:
- build - build
- generate-test-matrix - generate-test-matrix
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -151,9 +151,7 @@ jobs:
env: env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -170,7 +168,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -184,8 +182,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step # TODO: Make this into a composite step
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
@ -196,7 +198,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |

View File

@ -82,34 +82,34 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
env: env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2 NUM_TEST_SHARDS: 2
outputs: outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container: container:
image: python:3.9 image: python:3.9
steps: steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch - name: Clone pytorch/pytorch
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Generating test matrix - name: Generating test matrix
id: set-matrix id: set-matrix
run: | run: .github/scripts/generate_pytorch_test_matrix.py
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
test: test:
runs-on: windows.8xlarge.nvidia.gpu
env: env:
JOB_BASE_NAME: pytorch-win-vs2019-cuda11-cudnn8-py3-test JOB_BASE_NAME: pytorch-win-vs2019-cuda11-cudnn8-py3-test
NUM_TEST_SHARDS: 2 SHARD_NUMBER: ${{ matrix.shard }}
TEST_CONFIG: ${{ matrix.test_config }} NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs: needs:
- build - build
- generate-test-matrix - generate-test-matrix
strategy: strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false fail-fast: false
runs-on: ${{ matrix.runner }}
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -150,9 +150,7 @@ jobs:
env: env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: | run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
export SHARD_NUMBER=0 export SHARD_NUMBER=0
fi fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -169,7 +167,7 @@ jobs:
name: Store PyTorch Test Reports name: Store PyTorch Test Reports
if: always() if: always()
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
retention-days: 14 retention-days: 14
if-no-files-found: error if-no-files-found: error
path: path:
@ -183,8 +181,12 @@ jobs:
render_test_results: render_test_results:
if: always() if: always()
needs: needs:
- generate-test-matrix
- test - test
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step # TODO: Make this into a composite step
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
@ -195,7 +197,7 @@ jobs:
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
name: Download PyTorch Test Reports name: Download PyTorch Test Reports
with: with:
name: test-reports name: test-reports-${{ matrix.config }}
path: . path: .
- name: Unzip test reports - name: Unzip test reports
run: | run: |