Enable multigpu_test in GHA (#60221)

Summary:
- [x] add to test matrix
- [x] enable on PRs for testing
- [x] modify the scripts so it actually runs the multigpu tests
- [x] put `num_shards` after `shard` number
- [x] use a separate test-reports artifact
- [x] run on `linux.16xlarge.nvidia.gpu`
- [x] validate that it works
- [x] disable on PRs before merging

Pull Request resolved: https://github.com/pytorch/pytorch/pull/60221

Test Plan: CI. Example run: https://github.com/pytorch/pytorch/actions/runs/984347177

Reviewed By: malfet

Differential Revision: D29430567

Pulled By: samestep

fbshipit-source-id: 09f8e208e524579b603611479ca00515c8a1b5aa
This commit is contained in:
Sam Estep 2021-06-30 08:51:06 -07:00 committed by Facebook GitHub Bot
parent 5576c7bdd1
commit 0b8a7daa2a
11 changed files with 217 additions and 128 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from typing import Any, Dict
import jinja2
from typing_extensions import Literal
DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
@ -15,6 +16,8 @@ GITHUB_DIR = Path(__file__).parent.parent
# https://github.com/python/mypy/issues/4617
PyTorchWorkflow = Dict[str, Any]
YamlShellBool = Literal["''", 1]
WINDOWS_CPU_TEST_RUNNER = "windows.4xlarge"
WINDOWS_CUDA_TEST_RUNNER = "windows.8xlarge.nvidia.gpu"
@ -49,6 +52,7 @@ def PyTorchLinuxWorkflow(
test_runner_type: str,
on_pull_request: bool = False,
enable_doc_jobs: bool = False,
enable_multigpu_test: YamlShellBool = "''",
num_test_shards: int = 1,
) -> PyTorchWorkflow:
return {
@ -57,6 +61,7 @@ def PyTorchLinuxWorkflow(
"test_runner_type": test_runner_type,
"on_pull_request": on_pull_request,
"enable_doc_jobs": enable_doc_jobs,
"enable_multigpu_test": enable_multigpu_test,
"num_test_shards": num_test_shards,
}
@ -147,6 +152,7 @@ LINUX_WORKFLOWS = [
build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
enable_multigpu_test=1,
num_test_shards=2,
),
PyTorchLinuxWorkflow(

View File

@ -9,22 +9,47 @@ dictated by just sharding.
import json
import os
from typing import List
from typing import Dict
from typing_extensions import TypedDict
NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
def generate_sharding_list() -> List[int]:
return list(range(1, NUM_TEST_SHARDS + 1))
class Config(TypedDict):
num_shards: int
runner: str
def main() -> None:
print(json.dumps(
{
'test_config': generate_sharding_list()
},
sort_keys=True,
))
TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
configs: Dict[str, Config] = {}
if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
matrix = {
'include': [
{
'config': 'default',
'shard': shard,
'num_shards': NUM_TEST_SHARDS,
'runner': TEST_RUNNER_TYPE,
}
for shard in range(1, NUM_TEST_SHARDS + 1)
] + [
{
'config': name,
'shard': shard,
'num_shards': config['num_shards'],
'runner': config['runner'],
}
for name, config in configs.items()
for shard in range(1, config['num_shards'] + 1)
]
}
render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
print(f'::set-output name=matrix::{json.dumps(matrix)}')
print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
if __name__ == "__main__":

View File

@ -195,24 +195,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: !{{ test_runner_type }}
ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
NUM_TEST_SHARDS: !{{ num_test_shards }}
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: !{{ test_runner_type }}
needs:
- calculate-docker-image
- build
@ -220,11 +221,13 @@ jobs:
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: !{{ build_environment }}-test
NUM_TEST_SHARDS: !{{ num_test_shards }}
TEST_CONFIG: ${{ matrix.test_config }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps:
- name: Log in to ECR
run: |
@ -274,9 +277,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# TODO: Stop building test binaries as part of the build phase
@ -300,7 +306,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace
if: always()
run: |
@ -316,7 +322,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -337,8 +343,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -348,7 +358,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -92,37 +92,37 @@ jobs:
{%- endif %}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: !{{ test_runner_type }}
NUM_TEST_SHARDS: !{{ num_test_shards }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
{%- if only_build_on_pull_request %}
if: ${{ github.event_name == 'push' }}
{%- endif %}
runs-on: !{{ test_runner_type }}
env:
JOB_BASE_NAME: !{{ build_environment }}-test
NUM_TEST_SHARDS: !{{ num_test_shards }}
TEST_CONFIG: ${{ matrix.test_config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs:
- build
- generate-test-matrix
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -165,9 +165,7 @@ jobs:
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -184,7 +182,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -202,8 +200,12 @@ jobs:
if: always()
{%- endif %}
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step
steps:
- name: Checkout PyTorch
@ -214,7 +216,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -193,24 +193,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_MULTIGPU_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: linux.8xlarge.nvidia.gpu
needs:
- calculate-docker-image
- build
@ -218,11 +219,13 @@ jobs:
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps:
- name: Log in to ECR
run: |
@ -272,9 +275,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# TODO: Stop building test binaries as part of the build phase
@ -298,7 +304,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace
if: always()
run: |
@ -314,7 +320,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -335,8 +341,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -346,7 +356,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -193,24 +193,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_MULTIGPU_TEST: 1
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: linux.8xlarge.nvidia.gpu
needs:
- calculate-docker-image
- build
@ -218,11 +219,13 @@ jobs:
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps:
- name: Log in to ECR
run: |
@ -272,9 +275,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# TODO: Stop building test binaries as part of the build phase
@ -298,7 +304,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace
if: always()
run: |
@ -314,7 +320,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -335,8 +341,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -346,7 +356,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -193,24 +193,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_MULTIGPU_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: linux.8xlarge.nvidia.gpu
needs:
- calculate-docker-image
- build
@ -218,11 +219,13 @@ jobs:
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps:
- name: Log in to ECR
run: |
@ -272,9 +275,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# TODO: Stop building test binaries as part of the build phase
@ -298,7 +304,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace
if: always()
run: |
@ -314,7 +320,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -335,8 +341,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -346,7 +356,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -194,24 +194,25 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: linux.2xlarge
ENABLE_MULTIGPU_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: linux.2xlarge
needs:
- calculate-docker-image
- build
@ -219,11 +220,13 @@ jobs:
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: pytorch-linux-xenial-py3.6-gcc5.4-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
steps:
- name: Log in to ECR
run: |
@ -273,9 +276,12 @@ jobs:
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# TODO: Stop building test binaries as part of the build phase
@ -299,7 +305,7 @@ jobs:
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
- name: Chown workspace
if: always()
run: |
@ -315,7 +321,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -336,8 +342,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -347,7 +357,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -73,34 +73,34 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.4xlarge
NUM_TEST_SHARDS: 2
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: windows.4xlarge
env:
JOB_BASE_NAME: pytorch-win-vs2019-cpu-py3-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs:
- build
- generate-test-matrix
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -133,9 +133,7 @@ jobs:
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -152,7 +150,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -166,8 +164,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step
steps:
- name: Checkout PyTorch
@ -178,7 +180,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -83,34 +83,34 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: windows.8xlarge.nvidia.gpu
env:
JOB_BASE_NAME: pytorch-win-vs2019-cuda10-cudnn7-py3-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs:
- build
- generate-test-matrix
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -151,9 +151,7 @@ jobs:
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -170,7 +168,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -184,8 +182,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step
steps:
- name: Checkout PyTorch
@ -196,7 +198,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |

View File

@ -82,34 +82,34 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions
- name: Clone pytorch/pytorch
uses: actions/checkout@v2
- name: Generating test matrix
id: set-matrix
run: |
# outputting for debugging purposes
MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
echo "${MATRIX}"
echo "::set-output name=matrix::${MATRIX}"
run: .github/scripts/generate_pytorch_test_matrix.py
test:
runs-on: windows.8xlarge.nvidia.gpu
env:
JOB_BASE_NAME: pytorch-win-vs2019-cuda11-cudnn8-py3-test
NUM_TEST_SHARDS: 2
TEST_CONFIG: ${{ matrix.test_config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
needs:
- build
- generate-test-matrix
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
@ -150,9 +150,7 @@ jobs:
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
export SHARD_NUMBER=$TEST_CONFIG
else
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@ -169,7 +167,7 @@ jobs:
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
@ -183,8 +181,12 @@ jobs:
render_test_results:
if: always()
needs:
- generate-test-matrix
- test
runs-on: ubuntu-18.04
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
fail-fast: false
# TODO: Make this into a composite step
steps:
- name: Checkout PyTorch
@ -195,7 +197,7 @@ jobs:
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
name: test-reports-${{ matrix.config }}
path: .
- name: Unzip test reports
run: |