diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
index a076cde1d63..8873aa54717 100755
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Any, Dict
 
 import jinja2
+from typing_extensions import Literal
 
 DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
 
@@ -15,6 +16,8 @@ GITHUB_DIR = Path(__file__).parent.parent
 # https://github.com/python/mypy/issues/4617
 PyTorchWorkflow = Dict[str, Any]
 
+YamlShellBool = Literal["''", 1]
+
 WINDOWS_CPU_TEST_RUNNER = "windows.4xlarge"
 WINDOWS_CUDA_TEST_RUNNER = "windows.8xlarge.nvidia.gpu"
 
@@ -49,6 +52,7 @@ def PyTorchLinuxWorkflow(
     test_runner_type: str,
     on_pull_request: bool = False,
     enable_doc_jobs: bool = False,
+    enable_multigpu_test: YamlShellBool = "''",
     num_test_shards: int = 1,
 ) -> PyTorchWorkflow:
     return {
@@ -57,6 +61,7 @@ def PyTorchLinuxWorkflow(
         "test_runner_type": test_runner_type,
         "on_pull_request": on_pull_request,
         "enable_doc_jobs": enable_doc_jobs,
+        "enable_multigpu_test": enable_multigpu_test,
         "num_test_shards": num_test_shards,
     }
 
@@ -147,6 +152,7 @@ LINUX_WORKFLOWS = [
         build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7",
         docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
         test_runner_type=LINUX_CUDA_TEST_RUNNER,
+        enable_multigpu_test=1,
         num_test_shards=2,
     ),
     PyTorchLinuxWorkflow(
diff --git a/.github/scripts/generate_pytorch_test_matrix.py b/.github/scripts/generate_pytorch_test_matrix.py
index 7ab2099b8e7..4edb3e92eec 100755
--- a/.github/scripts/generate_pytorch_test_matrix.py
+++ b/.github/scripts/generate_pytorch_test_matrix.py
@@ -9,22 +9,47 @@ dictated by just sharding.
 
 import json
 import os
-from typing import List
+from typing import Dict
+
+from typing_extensions import TypedDict
 
 
-NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
-
-def generate_sharding_list() -> List[int]:
-    return list(range(1, NUM_TEST_SHARDS + 1))
+class Config(TypedDict):
+    num_shards: int
+    runner: str
 
 
 def main() -> None:
-    print(json.dumps(
-        {
-            'test_config': generate_sharding_list()
-        },
-        sort_keys=True,
-    ))
+    TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
+    NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
+    MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
+    configs: Dict[str, Config] = {}
+    if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
+        configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
+    matrix = {
+        'include': [
+            {
+                'config': 'default',
+                'shard': shard,
+                'num_shards': NUM_TEST_SHARDS,
+                'runner': TEST_RUNNER_TYPE,
+            }
+            for shard in range(1, NUM_TEST_SHARDS + 1)
+        ] + [
+            {
+                'config': name,
+                'shard': shard,
+                'num_shards': config['num_shards'],
+                'runner': config['runner'],
+            }
+            for name, config in configs.items()
+            for shard in range(1, config['num_shards'] + 1)
+        ]
+    }
+    render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
+    print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
+    print(f'::set-output name=matrix::{json.dumps(matrix)}')
+    print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
 
 
 if __name__ == "__main__":
diff --git a/.github/templates/linux_ci_workflow.yml.j2 b/.github/templates/linux_ci_workflow.yml.j2
index 5876a91fae8..d97af26540f 100644
--- a/.github/templates/linux_ci_workflow.yml.j2
+++ b/.github/templates/linux_ci_workflow.yml.j2
@@ -195,24 +195,25 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: !{{ test_runner_type }}
+      ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
       NUM_TEST_SHARDS: !{{ num_test_shards }}
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: !{{ test_runner_type }}
     needs:
       - calculate-docker-image
       - build
@@ -220,11 +221,13 @@ jobs:
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     env:
       DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
       JOB_BASE_NAME: !{{ build_environment }}-test
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      TEST_CONFIG: ${{ matrix.test_config }}
+      TEST_CONFIG: ${{ matrix.config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     steps:
       - name: Log in to ECR
         run: |
@@ -274,9 +277,12 @@ jobs:
           env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
       - name: Test PyTorch
         run: |
-          if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-            export SHARD_NUMBER=$TEST_CONFIG
+          if [[ $TEST_CONFIG == 'multigpu' ]]; then
+            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
           else
+            TEST_COMMAND=.jenkins/pytorch/test.sh
+          fi
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
             export SHARD_NUMBER=0
           fi
           # TODO: Stop building test binaries as part of the build phase
@@ -300,7 +306,7 @@ jobs:
             -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
             -w /var/lib/jenkins/workspace \
             "${DOCKER_IMAGE}" \
-            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
+            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
       - name: Chown workspace
         if: always()
         run: |
@@ -316,7 +322,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -337,8 +343,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -348,7 +358,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/templates/windows_ci_workflow.yml.j2 b/.github/templates/windows_ci_workflow.yml.j2
index e4108d36966..19311525374 100644
--- a/.github/templates/windows_ci_workflow.yml.j2
+++ b/.github/templates/windows_ci_workflow.yml.j2
@@ -92,37 +92,37 @@ jobs:
 {%- endif %}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: !{{ test_runner_type }}
       NUM_TEST_SHARDS: !{{ num_test_shards }}
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
 {%- if only_build_on_pull_request %}
     if: ${{ github.event_name == 'push' }}
 {%- endif %}
-    runs-on: !{{ test_runner_type }}
     env:
       JOB_BASE_NAME: !{{ build_environment }}-test
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      TEST_CONFIG: ${{ matrix.test_config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     needs:
       - build
       - generate-test-matrix
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -165,9 +165,7 @@ jobs:
         env:
           PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
         run: |
-            if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-              export SHARD_NUMBER=$TEST_CONFIG
-            else
+            if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
               export SHARD_NUMBER=0
             fi
             if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@@ -184,7 +182,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -202,8 +200,12 @@ jobs:
     if: always()
 {%- endif %}
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     # TODO: Make this into a composite step
     steps:
       - name: Checkout PyTorch
@@ -214,7 +216,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7.yml b/.github/workflows/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7.yml
index 38d5c5915ff..93c9c452315 100644
--- a/.github/workflows/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7.yml
+++ b/.github/workflows/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7.yml
@@ -193,24 +193,25 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
+      ENABLE_MULTIGPU_TEST: ''
       NUM_TEST_SHARDS: 2
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: linux.8xlarge.nvidia.gpu
     needs:
       - calculate-docker-image
       - build
@@ -218,11 +219,13 @@ jobs:
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     env:
       DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
       JOB_BASE_NAME: pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      TEST_CONFIG: ${{ matrix.config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     steps:
       - name: Log in to ECR
         run: |
@@ -272,9 +275,12 @@ jobs:
           env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
       - name: Test PyTorch
         run: |
-          if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-            export SHARD_NUMBER=$TEST_CONFIG
+          if [[ $TEST_CONFIG == 'multigpu' ]]; then
+            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
           else
+            TEST_COMMAND=.jenkins/pytorch/test.sh
+          fi
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
             export SHARD_NUMBER=0
           fi
           # TODO: Stop building test binaries as part of the build phase
@@ -298,7 +304,7 @@ jobs:
             -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
             -w /var/lib/jenkins/workspace \
             "${DOCKER_IMAGE}" \
-            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
+            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
       - name: Chown workspace
         if: always()
         run: |
@@ -314,7 +320,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -335,8 +341,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -346,7 +356,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7.yml b/.github/workflows/pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7.yml
index 38a48a1386b..563b4778cd0 100644
--- a/.github/workflows/pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7.yml
+++ b/.github/workflows/pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7.yml
@@ -193,24 +193,25 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
+      ENABLE_MULTIGPU_TEST: 1
       NUM_TEST_SHARDS: 2
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: linux.8xlarge.nvidia.gpu
     needs:
       - calculate-docker-image
       - build
@@ -218,11 +219,13 @@ jobs:
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     env:
       DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
       JOB_BASE_NAME: pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      TEST_CONFIG: ${{ matrix.config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     steps:
       - name: Log in to ECR
         run: |
@@ -272,9 +275,12 @@ jobs:
           env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
       - name: Test PyTorch
         run: |
-          if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-            export SHARD_NUMBER=$TEST_CONFIG
+          if [[ $TEST_CONFIG == 'multigpu' ]]; then
+            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
           else
+            TEST_COMMAND=.jenkins/pytorch/test.sh
+          fi
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
             export SHARD_NUMBER=0
           fi
           # TODO: Stop building test binaries as part of the build phase
@@ -298,7 +304,7 @@ jobs:
             -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
             -w /var/lib/jenkins/workspace \
             "${DOCKER_IMAGE}" \
-            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
+            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
       - name: Chown workspace
         if: always()
         run: |
@@ -314,7 +320,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -335,8 +341,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -346,7 +356,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7.yml b/.github/workflows/pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7.yml
index dbf337dfc2a..42204b08eb5 100644
--- a/.github/workflows/pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7.yml
+++ b/.github/workflows/pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7.yml
@@ -193,24 +193,25 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
+      ENABLE_MULTIGPU_TEST: ''
       NUM_TEST_SHARDS: 2
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: linux.8xlarge.nvidia.gpu
     needs:
       - calculate-docker-image
       - build
@@ -218,11 +219,13 @@ jobs:
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     env:
       DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
       JOB_BASE_NAME: pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      TEST_CONFIG: ${{ matrix.config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     steps:
       - name: Log in to ECR
         run: |
@@ -272,9 +275,12 @@ jobs:
           env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
       - name: Test PyTorch
         run: |
-          if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-            export SHARD_NUMBER=$TEST_CONFIG
+          if [[ $TEST_CONFIG == 'multigpu' ]]; then
+            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
           else
+            TEST_COMMAND=.jenkins/pytorch/test.sh
+          fi
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
             export SHARD_NUMBER=0
           fi
           # TODO: Stop building test binaries as part of the build phase
@@ -298,7 +304,7 @@ jobs:
             -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
             -w /var/lib/jenkins/workspace \
             "${DOCKER_IMAGE}" \
-            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
+            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
       - name: Chown workspace
         if: always()
         run: |
@@ -314,7 +320,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -335,8 +341,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -346,7 +356,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-linux-xenial-py3.6-gcc5.4.yml b/.github/workflows/pytorch-linux-xenial-py3.6-gcc5.4.yml
index 63abdd6cca4..3b6fabafb3e 100644
--- a/.github/workflows/pytorch-linux-xenial-py3.6-gcc5.4.yml
+++ b/.github/workflows/pytorch-linux-xenial-py3.6-gcc5.4.yml
@@ -194,24 +194,25 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: linux.2xlarge
+      ENABLE_MULTIGPU_TEST: ''
       NUM_TEST_SHARDS: 2
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: linux.2xlarge
     needs:
       - calculate-docker-image
       - build
@@ -219,11 +220,13 @@ jobs:
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     env:
       DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
       JOB_BASE_NAME: pytorch-linux-xenial-py3.6-gcc5.4-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      TEST_CONFIG: ${{ matrix.config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     steps:
       - name: Log in to ECR
         run: |
@@ -273,9 +276,12 @@ jobs:
           env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
       - name: Test PyTorch
         run: |
-          if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-            export SHARD_NUMBER=$TEST_CONFIG
+          if [[ $TEST_CONFIG == 'multigpu' ]]; then
+            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
           else
+            TEST_COMMAND=.jenkins/pytorch/test.sh
+          fi
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
             export SHARD_NUMBER=0
           fi
           # TODO: Stop building test binaries as part of the build phase
@@ -299,7 +305,7 @@ jobs:
             -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
             -w /var/lib/jenkins/workspace \
             "${DOCKER_IMAGE}" \
-            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
+            sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && '$TEST_COMMAND
       - name: Chown workspace
         if: always()
         run: |
@@ -315,7 +321,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -336,8 +342,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -347,7 +357,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-win-vs2019-cpu-py3.yml b/.github/workflows/pytorch-win-vs2019-cpu-py3.yml
index 14b87215789..50a3060b13b 100644
--- a/.github/workflows/pytorch-win-vs2019-cpu-py3.yml
+++ b/.github/workflows/pytorch-win-vs2019-cpu-py3.yml
@@ -73,34 +73,34 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: windows.4xlarge
       NUM_TEST_SHARDS: 2
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: windows.4xlarge
     env:
       JOB_BASE_NAME: pytorch-win-vs2019-cpu-py3-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     needs:
       - build
       - generate-test-matrix
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -133,9 +133,7 @@ jobs:
         env:
           PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
         run: |
-            if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-              export SHARD_NUMBER=$TEST_CONFIG
-            else
+            if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
               export SHARD_NUMBER=0
             fi
             if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@@ -152,7 +150,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -166,8 +164,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     # TODO: Make this into a composite step
     steps:
       - name: Checkout PyTorch
@@ -178,7 +180,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-win-vs2019-cuda10-cudnn7-py3.yml b/.github/workflows/pytorch-win-vs2019-cuda10-cudnn7-py3.yml
index e061fb08b6f..4aa43661f3a 100644
--- a/.github/workflows/pytorch-win-vs2019-cuda10-cudnn7-py3.yml
+++ b/.github/workflows/pytorch-win-vs2019-cuda10-cudnn7-py3.yml
@@ -83,34 +83,34 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
       NUM_TEST_SHARDS: 2
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: windows.8xlarge.nvidia.gpu
     env:
       JOB_BASE_NAME: pytorch-win-vs2019-cuda10-cudnn7-py3-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     needs:
       - build
       - generate-test-matrix
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -151,9 +151,7 @@ jobs:
         env:
           PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
         run: |
-            if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-              export SHARD_NUMBER=$TEST_CONFIG
-            else
+            if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
               export SHARD_NUMBER=0
             fi
             if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@@ -170,7 +168,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -184,8 +182,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     # TODO: Make this into a composite step
     steps:
       - name: Checkout PyTorch
@@ -196,7 +198,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |
diff --git a/.github/workflows/pytorch-win-vs2019-cuda11-cudnn8-py3.yml b/.github/workflows/pytorch-win-vs2019-cuda11-cudnn8-py3.yml
index 3325c1734b7..1a9af43943b 100644
--- a/.github/workflows/pytorch-win-vs2019-cuda11-cudnn8-py3.yml
+++ b/.github/workflows/pytorch-win-vs2019-cuda11-cudnn8-py3.yml
@@ -82,34 +82,34 @@ jobs:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-18.04
     env:
+      TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
       NUM_TEST_SHARDS: 2
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
     container:
       image: python:3.9
     steps:
+      - name: Install dependencies
+        run: pip install typing-extensions
       - name: Clone pytorch/pytorch
         uses: actions/checkout@v2
       - name: Generating test matrix
         id: set-matrix
-        run: |
-          # outputting for debugging purposes
-          MATRIX=$(python .github/scripts/generate_pytorch_test_matrix.py)
-          echo "${MATRIX}"
-          echo "::set-output name=matrix::${MATRIX}"
+        run: .github/scripts/generate_pytorch_test_matrix.py
 
   test:
-    runs-on: windows.8xlarge.nvidia.gpu
     env:
       JOB_BASE_NAME: pytorch-win-vs2019-cuda11-cudnn8-py3-test
-      NUM_TEST_SHARDS: 2
-      TEST_CONFIG: ${{ matrix.test_config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
     needs:
       - build
       - generate-test-matrix
     strategy:
       matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
       fail-fast: false
+    runs-on: ${{ matrix.runner }}
     steps:
       - name: Checkout PyTorch
         uses: actions/checkout@v2
@@ -150,9 +150,7 @@ jobs:
         env:
           PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
         run: |
-            if [[ $NUM_TEST_SHARDS -eq 2 ]]; then
-              export SHARD_NUMBER=$TEST_CONFIG
-            else
+            if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
               export SHARD_NUMBER=0
             fi
             if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
@@ -169,7 +167,7 @@ jobs:
         name: Store PyTorch Test Reports
         if: always()
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           retention-days: 14
           if-no-files-found: error
           path:
@@ -183,8 +181,12 @@ jobs:
   render_test_results:
     if: always()
     needs:
+      - generate-test-matrix
       - test
     runs-on: ubuntu-18.04
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.render-matrix) }}
+      fail-fast: false
     # TODO: Make this into a composite step
     steps:
       - name: Checkout PyTorch
@@ -195,7 +197,7 @@ jobs:
       - uses: actions/download-artifact@v2
         name: Download PyTorch Test Reports
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.config }}
           path: .
       - name: Unzip test reports
         run: |