mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Merge remote-tracking branch 'upstream/viable/strict' into mkl-spmmd
This commit is contained in:
commit
f77783c374
1
.circleci/config.yml
generated
1
.circleci/config.yml
generated
|
|
@ -847,6 +847,7 @@ jobs:
|
|||
<<: *binary_mac_params
|
||||
macos:
|
||||
xcode: "12.0"
|
||||
resource_class: "large"
|
||||
steps:
|
||||
# See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
|
||||
- checkout
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@
|
|||
<<: *binary_mac_params
|
||||
macos:
|
||||
xcode: "12.0"
|
||||
resource_class: "large"
|
||||
steps:
|
||||
# See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
|
||||
- checkout
|
||||
|
|
|
|||
54
.github/merge_rules.json
vendored
54
.github/merge_rules.json
vendored
|
|
@ -1,48 +1,54 @@
|
|||
[
|
||||
{
|
||||
"name": "ONNX exporter",
|
||||
"patterns": [
|
||||
"torch/onnx/**",
|
||||
"torch/csrc/jit/passes/onnx/**",
|
||||
"torch/csrc/jit/passes/onnx.*",
|
||||
"test/onnx/**",
|
||||
"docs/source/onnx.rst",
|
||||
"torch/csrc/jit/serialization/export.*",
|
||||
"torch/csrc/jit/serialization/onnx.*",
|
||||
"torch/_C/__init__.pyi.in",
|
||||
"torch/csrc/onnx/**"
|
||||
"name": "ONNX exporter",
|
||||
"patterns": [
|
||||
"torch/onnx/**",
|
||||
"torch/csrc/jit/passes/onnx/**",
|
||||
"torch/csrc/jit/passes/onnx.*",
|
||||
"test/onnx/**",
|
||||
"docs/source/onnx.rst",
|
||||
"torch/csrc/jit/serialization/export.*",
|
||||
"torch/csrc/jit/serialization/onnx.*",
|
||||
"torch/_C/__init__.pyi.in",
|
||||
"torch/csrc/onnx/**"
|
||||
],
|
||||
"approved_by": ["BowenBao", "garymm"],
|
||||
"mandatory_app_id": 12274
|
||||
"approved_by": ["BowenBao", "garymm"],
|
||||
"mandatory_app_id": 12274
|
||||
},
|
||||
{
|
||||
"name": "NVFuser",
|
||||
"patterns": ["torch/csrc/jit/codegen/fuser/cuda/**", "torch/csrc/jit/codegen/cuda/**", "benchmarks/cpp/nvfuser/**"],
|
||||
"approved_by": ["csarofeen", "ngimel"],
|
||||
"mandatory_app_id": 12274
|
||||
"name": "NVFuser",
|
||||
"patterns": ["torch/csrc/jit/codegen/fuser/cuda/**", "torch/csrc/jit/codegen/cuda/**", "benchmarks/cpp/nvfuser/**"],
|
||||
"approved_by": ["csarofeen", "ngimel"],
|
||||
"mandatory_app_id": 12274
|
||||
},
|
||||
{
|
||||
"name": "OSS CI",
|
||||
"patterns": [".github/**", ".circleci/**", ".jenkins/**", "scripts/**", "tools/**"],
|
||||
"approved_by": ["seemethere", "malfet", "suo", "janeyx99", "ezyang"],
|
||||
"mandatory_app_id": 12274
|
||||
"name": "OSS CI",
|
||||
"patterns": [".github/**", ".circleci/**", ".jenkins/**", "scripts/**", "tools/**"],
|
||||
"approved_by": ["janeyx99", "ezyang"],
|
||||
"mandatory_app_id": 12274
|
||||
},
|
||||
{
|
||||
"name": "Documentation",
|
||||
"patterns": ["docs/**", "torch/*docs.py"],
|
||||
"approved_by": ["mruberry", "ngimel", "albanD", "janeyx99"],
|
||||
"approved_by": ["mruberry", "ngimel", "janeyx99"],
|
||||
"mandatory_app_id": 12274
|
||||
},
|
||||
{
|
||||
"name": "Android",
|
||||
"patterns": ["android/**"],
|
||||
"approved_by": ["linbinyu", "kit1980", "IvanKobzarev", "malfet"],
|
||||
"approved_by": ["linbinyu", "kit1980", "IvanKobzarev"],
|
||||
"mandatory_app_id": 12274
|
||||
},
|
||||
{
|
||||
"name": "iOS",
|
||||
"patterns": ["ios/**"],
|
||||
"approved_by": ["linbinyu", "kit1980", "xta0", "malfet", "hanton"],
|
||||
"approved_by": ["linbinyu", "kit1980", "xta0", "hanton"],
|
||||
"mandatory_app_id": 12274
|
||||
},
|
||||
{
|
||||
"name": "superuser",
|
||||
"patterns": ["*"],
|
||||
"approved_by": ["albanD", "jbschlosser", "suo", "osalpekar", "malfet", "seemethere"],
|
||||
"mandatory_app_id": 12274
|
||||
}
|
||||
]
|
||||
|
|
|
|||
11
.github/templates/common.yml.j2
vendored
11
.github/templates/common.yml.j2
vendored
|
|
@ -6,6 +6,10 @@
|
|||
{%- set squid_no_proxy = "localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" -%}
|
||||
{%- set timeout_minutes = 240 -%}
|
||||
|
||||
# NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
|
||||
# the binary builds will check out
|
||||
{%- set builder_branch = "main" -%}
|
||||
|
||||
{%- macro concurrency(build_environment) -%}
|
||||
concurrency:
|
||||
group: !{{ build_environment }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
|
|
@ -191,7 +195,9 @@ concurrency:
|
|||
- name: Checkout !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }}
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
{%- if checkout_pr_head %}
|
||||
{%- if branch %}
|
||||
ref: !{{ branch }}
|
||||
{%- elif checkout_pr_head %}
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
{%- endif %}
|
||||
{%- if deep_clone %}
|
||||
|
|
@ -202,9 +208,6 @@ concurrency:
|
|||
{%- if repository != "pytorch/pytorch" %}
|
||||
repository: !{{ repository }}
|
||||
{%- endif %}
|
||||
{%- if branch %}
|
||||
ref: !{{ branch }}
|
||||
{%- endif %}
|
||||
{%- if directory %}
|
||||
path: !{{ directory }}
|
||||
{%- endif %}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ jobs:
|
|||
steps:
|
||||
!{{ common.setup_ec2_linux() }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", checkout_pr_head=False) }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
|
||||
{%- if config["gpu_arch_type"] == 'cuda' and config["gpu_arch_version"].startswith('11') %}
|
||||
- name: Set BUILD_SPLIT_CUDA
|
||||
run: |
|
||||
|
|
@ -119,16 +119,8 @@ jobs:
|
|||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
path: pytorch
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
|
||||
{%- if config["gpu_arch_type"] == "cuda" %}
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ jobs:
|
|||
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
|
||||
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
|
||||
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
|
||||
if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -60,16 +60,8 @@ jobs:
|
|||
steps:
|
||||
!{{ common.setup_ec2_windows() }}
|
||||
!{{ set_runner_specific_vars() }}
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
path: ${{ env.PYTORCH_ROOT }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: pytorch/builder
|
||||
path: ${{ env.BUILDER_ROOT }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
|
|
@ -104,16 +96,8 @@ jobs:
|
|||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
path: ${{ env.PYTORCH_ROOT }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: pytorch/builder
|
||||
path: ${{ env.BUILDER_ROOT }}
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
|
|
|
|||
480
.github/workflows/generated-linux-binary-conda.yml
generated
vendored
480
.github/workflows/generated-linux-binary-conda.yml
generated
vendored
|
|
@ -111,6 +111,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -248,16 +249,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_7-cpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -502,6 +516,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -640,16 +655,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_7-cuda10_2
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -900,6 +928,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1041,16 +1070,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_7-cuda11_1
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -1301,6 +1343,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1442,16 +1485,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_7-cuda11_3
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -1702,6 +1758,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1843,16 +1900,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_7-cuda11_5
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2102,6 +2172,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -2239,16 +2310,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_8-cpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -2493,6 +2577,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -2631,16 +2716,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_8-cuda10_2
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2891,6 +2989,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3032,16 +3131,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_8-cuda11_1
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -3292,6 +3404,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3433,16 +3546,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_8-cuda11_3
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -3693,6 +3819,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3834,16 +3961,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_8-cuda11_5
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4093,6 +4233,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -4230,16 +4371,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_9-cpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -4484,6 +4638,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -4622,16 +4777,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_9-cuda10_2
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4882,6 +5050,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5023,16 +5192,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_9-cuda11_1
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -5283,6 +5465,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5424,16 +5607,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_9-cuda11_3
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -5684,6 +5880,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5825,16 +6022,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_9-cuda11_5
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6084,6 +6294,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -6221,16 +6432,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_10-cpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -6475,6 +6699,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -6613,16 +6838,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_10-cuda10_2
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6873,6 +7111,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7014,16 +7253,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_10-cuda11_1
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -7274,6 +7526,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7415,16 +7668,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_10-cuda11_3
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -7675,6 +7941,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7816,16 +8083,29 @@ jobs:
|
|||
with:
|
||||
name: conda-py3_10-cuda11_5
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
|
|||
480
.github/workflows/generated-linux-binary-libtorch-cxx11-abi.yml
generated
vendored
480
.github/workflows/generated-linux-binary-libtorch-cxx11-abi.yml
generated
vendored
|
|
@ -112,6 +112,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -250,16 +251,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-shared-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -505,6 +519,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -643,16 +658,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-shared-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -898,6 +926,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1036,16 +1065,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-static-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -1291,6 +1333,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1429,16 +1472,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-static-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -1685,6 +1741,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1824,16 +1881,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-shared-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2086,6 +2156,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -2225,16 +2296,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-shared-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2487,6 +2571,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -2626,16 +2711,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-static-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2888,6 +2986,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3027,16 +3126,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-static-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -3289,6 +3401,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3431,16 +3544,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-shared-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -3693,6 +3819,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3835,16 +3962,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-shared-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4097,6 +4237,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -4239,16 +4380,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-static-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4501,6 +4655,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -4643,16 +4798,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-static-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4905,6 +5073,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5047,16 +5216,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-shared-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -5309,6 +5491,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5451,16 +5634,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-shared-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -5713,6 +5909,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5855,16 +6052,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-static-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6117,6 +6327,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -6259,16 +6470,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-static-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6521,6 +6745,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -6663,16 +6888,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-shared-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6925,6 +7163,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7067,16 +7306,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-shared-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -7329,6 +7581,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7471,16 +7724,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-static-with-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -7733,6 +7999,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7875,16 +8142,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-static-without-deps-cxx11-abi
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
|
|||
480
.github/workflows/generated-linux-binary-libtorch-pre-cxx11.yml
generated
vendored
480
.github/workflows/generated-linux-binary-libtorch-pre-cxx11.yml
generated
vendored
|
|
@ -112,6 +112,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -250,16 +251,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-shared-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -505,6 +519,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -643,16 +658,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-shared-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -898,6 +926,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1036,16 +1065,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-static-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -1291,6 +1333,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1429,16 +1472,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cpu-static-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
|
|
@ -1685,6 +1741,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -1824,16 +1881,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-shared-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2086,6 +2156,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -2225,16 +2296,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-shared-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2487,6 +2571,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -2626,16 +2711,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-static-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -2888,6 +2986,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3027,16 +3126,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda10_2-static-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -3289,6 +3401,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3431,16 +3544,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-shared-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -3693,6 +3819,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -3835,16 +3962,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-shared-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4097,6 +4237,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -4239,16 +4380,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-static-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4501,6 +4655,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -4643,16 +4798,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_1-static-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -4905,6 +5073,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5047,16 +5216,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-shared-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -5309,6 +5491,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5451,16 +5634,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-shared-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -5713,6 +5909,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -5855,16 +6052,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-static-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6117,6 +6327,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -6259,16 +6470,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_3-static-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6521,6 +6745,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -6663,16 +6888,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-shared-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -6925,6 +7163,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7067,16 +7306,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-shared-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -7329,6 +7581,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7471,16 +7724,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-static-with-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
@ -7733,6 +7999,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -7875,16 +8142,29 @@ jobs:
|
|||
with:
|
||||
name: libtorch-cuda11_5-static-without-deps-pre-cxx11
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
path: pytorch
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
- name: Clone pytorch/builder
|
||||
uses: actions/checkout@v2
|
||||
path: pytorch
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
working-directory: pytorch/
|
||||
run: |
|
||||
|
|
|
|||
672
.github/workflows/generated-linux-binary-manywheel.yml
generated
vendored
672
.github/workflows/generated-linux-binary-manywheel.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
6
.github/workflows/generated-macos-arm64-binary-conda.yml
generated
vendored
6
.github/workflows/generated-macos-arm64-binary-conda.yml
generated
vendored
|
|
@ -87,7 +87,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -284,7 +284,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -481,7 +481,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
|
|||
8
.github/workflows/generated-macos-arm64-binary-wheel.yml
generated
vendored
8
.github/workflows/generated-macos-arm64-binary-wheel.yml
generated
vendored
|
|
@ -87,7 +87,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -284,7 +284,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -481,7 +481,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -678,7 +678,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
|
|||
8
.github/workflows/generated-macos-binary-conda.yml
generated
vendored
8
.github/workflows/generated-macos-binary-conda.yml
generated
vendored
|
|
@ -85,7 +85,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -282,7 +282,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -479,7 +479,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -676,7 +676,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
|
|||
8
.github/workflows/generated-macos-binary-libtorch-cxx11-abi.yml
generated
vendored
8
.github/workflows/generated-macos-binary-libtorch-cxx11-abi.yml
generated
vendored
|
|
@ -90,7 +90,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -293,7 +293,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -496,7 +496,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -699,7 +699,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
|
|||
8
.github/workflows/generated-macos-binary-libtorch-pre-cxx11.yml
generated
vendored
8
.github/workflows/generated-macos-binary-libtorch-pre-cxx11.yml
generated
vendored
|
|
@ -90,7 +90,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -293,7 +293,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -496,7 +496,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -699,7 +699,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
|
|||
8
.github/workflows/generated-macos-binary-wheel.yml
generated
vendored
8
.github/workflows/generated-macos-binary-wheel.yml
generated
vendored
|
|
@ -85,7 +85,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -282,7 +282,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -479,7 +479,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
@ -676,7 +676,7 @@ jobs:
|
|||
- name: Checkout pytorch/builder
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
|
|
|
|||
800
.github/workflows/generated-windows-binary-libtorch-cxx11-abi.yml
generated
vendored
800
.github/workflows/generated-windows-binary-libtorch-cxx11-abi.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
800
.github/workflows/generated-windows-binary-libtorch-pre-cxx11.yml
generated
vendored
800
.github/workflows/generated-windows-binary-libtorch-pre-cxx11.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
800
.github/workflows/generated-windows-binary-wheel.yml
generated
vendored
800
.github/workflows/generated-windows-binary-wheel.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
|
|
@ -14,9 +14,16 @@ repositories {
|
|||
jcenter()
|
||||
}
|
||||
|
||||
# lite interpreter build
|
||||
dependencies {
|
||||
implementation 'org.pytorch:pytorch_android:1.6.0'
|
||||
implementation 'org.pytorch:pytorch_android_torchvision:1.6.0'
|
||||
implementation 'org.pytorch:pytorch_android_lite:1.10.0'
|
||||
implementation 'org.pytorch:pytorch_android_torchvision_lite:1.10.0'
|
||||
}
|
||||
|
||||
# full jit build
|
||||
dependencies {
|
||||
implementation 'org.pytorch:pytorch_android:1.10.0'
|
||||
implementation 'org.pytorch:pytorch_android_torchvision:1.10.0'
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -32,6 +39,15 @@ repositories {
|
|||
}
|
||||
}
|
||||
|
||||
# lite interpreter build
|
||||
dependencies {
|
||||
...
|
||||
implementation 'org.pytorch:pytorch_android_lite:1.12.0-SNAPSHOT'
|
||||
implementation 'org.pytorch:pytorch_android_torchvision_lite:1.12.0-SNAPSHOT'
|
||||
...
|
||||
}
|
||||
|
||||
# full jit build
|
||||
dependencies {
|
||||
...
|
||||
implementation 'org.pytorch:pytorch_android:1.12.0-SNAPSHOT'
|
||||
|
|
@ -68,7 +84,7 @@ They are specified as environment variables:
|
|||
|
||||
`ANDROID_HOME` - path to [Android SDK](https://developer.android.com/studio/command-line/sdkmanager.html)
|
||||
|
||||
`ANDROID_NDK` - path to [Android NDK](https://developer.android.com/studio/projects/install-ndk)
|
||||
`ANDROID_NDK` - path to [Android NDK](https://developer.android.com/studio/projects/install-ndk). It's recommended to use NDK 21.x.
|
||||
|
||||
`GRADLE_HOME` - path to [gradle](https://gradle.org/releases/)
|
||||
|
||||
|
|
@ -133,7 +149,7 @@ android {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
extractForNativeBuild('org.pytorch:pytorch_android:1.6.0')
|
||||
extractForNativeBuild('org.pytorch:pytorch_android:1.10.0')
|
||||
}
|
||||
|
||||
task extractAARForNativeBuild {
|
||||
|
|
|
|||
|
|
@ -29,7 +29,8 @@ check_gradle() {
|
|||
}
|
||||
|
||||
parse_abis_list() {
|
||||
ABIS_LIST="x86"
|
||||
# sync with https://github.com/pytorch/pytorch/blob/0ca0e02685a9d033ac4f04e2fa5c8ba6dbc5ae50/android/gradle.properties#L1
|
||||
ABIS_LIST="armeabi-v7a,arm64-v8a,x86,x86_64"
|
||||
CUSTOM_ABIS_LIST=false
|
||||
if [ $# -gt 0 ]; then
|
||||
ABIS_LIST=$1
|
||||
|
|
|
|||
|
|
@ -50,7 +50,17 @@ android {
|
|||
}
|
||||
androidTest {
|
||||
java {
|
||||
exclude 'org/pytorch/PytorchHostTests.java'
|
||||
if(System.env.BUILD_LITE_INTERPRETER == '0') {
|
||||
println 'Build test for full jit (pytorch_jni)'
|
||||
exclude 'org/pytorch/PytorchHostTests.java'
|
||||
exclude 'org/pytorch/PytorchLiteInstrumentedTests.java'
|
||||
exclude 'org/pytorch/suite/PytorchLiteInstrumentedTestSuite.java'
|
||||
} else {
|
||||
println 'Build test for lite interpreter (pytorch_jni_lite)'
|
||||
exclude 'org/pytorch/PytorchHostTests.java'
|
||||
exclude 'org/pytorch/PytorchInstrumentedTests.java'
|
||||
exclude 'org/pytorch/suite/PytorchInstrumentedTestSuite.java'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import torch
|
||||
from torch import Tensor
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
OUTPUT_DIR = "src/androidTest/assets/"
|
||||
|
||||
|
|
@ -7,7 +9,8 @@ def scriptAndSave(module, fileName):
|
|||
script_module = torch.jit.script(module)
|
||||
print(script_module.graph)
|
||||
outputFileName = OUTPUT_DIR + fileName
|
||||
script_module.save(outputFileName)
|
||||
# note that the lite interpreter model can also be used in full JIT
|
||||
script_module._save_for_lite_interpreter(outputFileName)
|
||||
print("Saved to " + outputFileName)
|
||||
print('=' * 80)
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ sourceSets {
|
|||
java {
|
||||
srcDir '../src/androidTest/java'
|
||||
exclude '**/PytorchInstrumented*'
|
||||
exclude '**/PytorchLiteInstrumented*'
|
||||
}
|
||||
resources.srcDirs = ["../src/androidTest/assets"]
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -10,7 +10,11 @@ import java.util.Objects;
|
|||
public class PytorchHostTests extends PytorchTestBase {
|
||||
|
||||
@Override
|
||||
protected String assetFilePath(String assetName) throws IOException {
|
||||
protected Module loadModel(String path) throws IOException {
|
||||
return Module.load(assetFilePath(path));
|
||||
}
|
||||
|
||||
private String assetFilePath(String assetName) throws IOException {
|
||||
Path tempFile = Files.createTempFile("test", ".pt");
|
||||
try (InputStream resource =
|
||||
Objects.requireNonNull(getClass().getClassLoader().getResourceAsStream("test.pt"))) {
|
||||
|
|
|
|||
|
|
@ -14,7 +14,11 @@ import org.junit.runner.RunWith;
|
|||
public class PytorchInstrumentedTests extends PytorchTestBase {
|
||||
|
||||
@Override
|
||||
protected String assetFilePath(String assetName) throws IOException {
|
||||
protected Module loadModel(String path) throws IOException {
|
||||
return Module.load(assetFilePath(path));
|
||||
}
|
||||
|
||||
private String assetFilePath(String assetName) throws IOException {
|
||||
final Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
|
||||
File file = new File(appContext.getFilesDir(), assetName);
|
||||
if (file.exists() && file.length() > 0) {
|
||||
|
|
@ -35,4 +39,5 @@ public class PytorchInstrumentedTests extends PytorchTestBase {
|
|||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
package org.pytorch;
|
||||
|
||||
import android.content.Context;
|
||||
|
||||
import androidx.test.InstrumentationRegistry;
|
||||
import androidx.test.runner.AndroidJUnit4;
|
||||
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
public class PytorchLiteInstrumentedTests extends PytorchTestBase {
|
||||
|
||||
@Override
|
||||
protected Module loadModel(String path) throws IOException {
|
||||
return LiteModuleLoader.load(assetFilePath(path));
|
||||
}
|
||||
|
||||
private String assetFilePath(String assetName) throws IOException {
|
||||
final Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
|
||||
File file = new File(appContext.getFilesDir(), assetName);
|
||||
if (file.exists() && file.length() > 0) {
|
||||
return file.getAbsolutePath();
|
||||
}
|
||||
|
||||
try (InputStream is = appContext.getAssets().open(assetName)) {
|
||||
try (OutputStream os = new FileOutputStream(file)) {
|
||||
byte[] buffer = new byte[4 * 1024];
|
||||
int read;
|
||||
while ((read = is.read(buffer)) != -1) {
|
||||
os.write(buffer, 0, read);
|
||||
}
|
||||
os.flush();
|
||||
}
|
||||
return file.getAbsolutePath();
|
||||
} catch (IOException e) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testForwardNull() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final IValue input = IValue.from(Tensor.fromBlob(Tensor.allocateByteBuffer(1), new long[] {1}));
|
||||
assertTrue(input.isTensor());
|
||||
final IValue output = module.forward(input);
|
||||
|
|
@ -25,7 +25,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEqBool() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
for (boolean value : new boolean[] {false, true}) {
|
||||
final IValue input = IValue.from(value);
|
||||
assertTrue(input.isBool());
|
||||
|
|
@ -38,7 +38,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEqInt() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
for (long value : new long[] {Long.MIN_VALUE, -1024, -1, 0, 1, 1024, Long.MAX_VALUE}) {
|
||||
final IValue input = IValue.from(value);
|
||||
assertTrue(input.isLong());
|
||||
|
|
@ -51,7 +51,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEqFloat() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
double[] values =
|
||||
new double[] {
|
||||
-Double.MAX_VALUE,
|
||||
|
|
@ -86,7 +86,7 @@ public abstract class PytorchTestBase {
|
|||
}
|
||||
final Tensor inputTensor = Tensor.fromBlob(inputTensorData, inputTensorShape);
|
||||
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final IValue input = IValue.from(inputTensor);
|
||||
assertTrue(input.isTensor());
|
||||
assertTrue(inputTensor == input.toTensor());
|
||||
|
|
@ -103,7 +103,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEqDictIntKeyIntValue() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final Map<Long, IValue> inputMap = new HashMap<>();
|
||||
|
||||
inputMap.put(Long.MIN_VALUE, IValue.from(-Long.MIN_VALUE));
|
||||
|
|
@ -127,7 +127,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEqDictStrKeyIntValue() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final Map<String, IValue> inputMap = new HashMap<>();
|
||||
|
||||
inputMap.put("long_min_value", IValue.from(Long.MIN_VALUE));
|
||||
|
|
@ -151,7 +151,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testListIntSumReturnTuple() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
|
||||
for (int n : new int[] {0, 1, 128}) {
|
||||
long[] a = new long[n];
|
||||
|
|
@ -178,7 +178,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testOptionalIntIsNone() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
|
||||
assertFalse(module.runMethod("optionalIntIsNone", IValue.from(1l)).toBool());
|
||||
assertTrue(module.runMethod("optionalIntIsNone", IValue.optionalNull()).toBool());
|
||||
|
|
@ -186,7 +186,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testIntEq0None() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
|
||||
assertTrue(module.runMethod("intEq0None", IValue.from(0l)).isNull());
|
||||
assertTrue(module.runMethod("intEq0None", IValue.from(1l)).toLong() == 1l);
|
||||
|
|
@ -194,7 +194,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testRunUndefinedMethod() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
module.runMethod("test_undefined_method_throws_exception");
|
||||
}
|
||||
|
||||
|
|
@ -241,7 +241,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEqString() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
String[] values =
|
||||
new String[] {
|
||||
"smoketest",
|
||||
|
|
@ -260,7 +260,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testStr3Concat() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
String[] values =
|
||||
new String[] {
|
||||
"smoketest",
|
||||
|
|
@ -281,7 +281,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testEmptyShape() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final long someNumber = 43;
|
||||
final IValue input = IValue.from(Tensor.fromBlob(new long[] {someNumber}, new long[] {}));
|
||||
final IValue output = module.runMethod("newEmptyShapeWithItem", input);
|
||||
|
|
@ -293,7 +293,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testAliasWithOffset() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final IValue output = module.runMethod("testAliasWithOffset");
|
||||
assertTrue(output.isTensorList());
|
||||
Tensor[] tensors = output.toTensorList();
|
||||
|
|
@ -303,7 +303,7 @@ public abstract class PytorchTestBase {
|
|||
|
||||
@Test
|
||||
public void testNonContiguous() throws IOException {
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final IValue output = module.runMethod("testNonContiguous");
|
||||
assertTrue(output.isTensor());
|
||||
Tensor value = output.toTensor();
|
||||
|
|
@ -316,7 +316,7 @@ public abstract class PytorchTestBase {
|
|||
long[] inputShape = new long[] {1, 3, 2, 2};
|
||||
long[] data = new long[] {1, 11, 101, 2, 12, 102, 3, 13, 103, 4, 14, 104};
|
||||
Tensor inputNHWC = Tensor.fromBlob(data, inputShape, MemoryFormat.CHANNELS_LAST);
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final IValue outputNCHW = module.runMethod("contiguous", IValue.from(inputNHWC));
|
||||
assertIValueTensor(
|
||||
outputNCHW,
|
||||
|
|
@ -334,7 +334,7 @@ public abstract class PytorchTestBase {
|
|||
long[] dataNHWDC = new long[] {1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16};
|
||||
|
||||
Tensor inputNHWDC = Tensor.fromBlob(dataNHWDC, shape, MemoryFormat.CHANNELS_LAST_3D);
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
final IValue outputNCHWD = module.runMethod("contiguous", IValue.from(inputNHWDC));
|
||||
assertIValueTensor(outputNCHWD, MemoryFormat.CONTIGUOUS, shape, dataNCHWD);
|
||||
|
||||
|
|
@ -358,7 +358,7 @@ public abstract class PytorchTestBase {
|
|||
long[] dataWeightOHWI = new long[] {2, 0, 0, 0, 1, 0, 0, 0, -1};
|
||||
Tensor wNHWC = Tensor.fromBlob(dataWeightOHWI, weightShape, MemoryFormat.CHANNELS_LAST);
|
||||
|
||||
final Module module = Module.load(assetFilePath(TEST_MODULE_ASSET_NAME));
|
||||
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
|
||||
|
||||
final IValue outputNCHW =
|
||||
module.runMethod("conv2d", IValue.from(inputNCHW), IValue.from(wNCHW), IValue.from(false));
|
||||
|
|
@ -389,5 +389,5 @@ public abstract class PytorchTestBase {
|
|||
assertArrayEquals(expectedData, t.getDataAsLongArray());
|
||||
}
|
||||
|
||||
protected abstract String assetFilePath(String assetName) throws IOException;
|
||||
protected abstract Module loadModel(String assetName) throws IOException;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
package org.pytorch.suite;
|
||||
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Suite;
|
||||
import org.pytorch.PytorchLiteInstrumentedTests;
|
||||
|
||||
@RunWith(Suite.class)
|
||||
@Suite.SuiteClasses({PytorchLiteInstrumentedTests.class})
|
||||
public class PytorchLiteInstrumentedTestSuite {}
|
||||
|
|
@ -2,10 +2,18 @@
|
|||
Provides the implementations of CUDA BLAS function templates.
|
||||
*/
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDABlas.h>
|
||||
#include <ATen/cuda/Exceptions.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/cuda/CUDAFunctions.h>
|
||||
#include <c10/macros/Export.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
|
||||
// added bf16 support
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && !defined(_MSC_VER)
|
||||
#include <cublasLt.h>
|
||||
#endif
|
||||
|
||||
#define CUDABLAS_POSINT_CHECK(FD, X) \
|
||||
TORCH_CHECK( \
|
||||
|
|
@ -540,6 +548,256 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
|||
}
|
||||
#endif // defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && !defined(_MSC_VER)
|
||||
|
||||
namespace {
|
||||
// Following the pattern of CuSparseDescriptor
|
||||
// Defined here for now because this is the only place cublas_lt interface is
|
||||
// used but can be moved to a header once cublas_lt interface is used in
|
||||
// multiple places.
|
||||
template <typename T, cublasStatus_t (*destructor)(T*)>
|
||||
struct CuBlasLtDeleter {
|
||||
void operator()(T* x) {
|
||||
if (x != nullptr) {
|
||||
TORCH_CUDABLAS_CHECK(destructor(x));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, cublasStatus_t (*destructor)(T*)>
|
||||
class CuBlasLtDescriptor {
|
||||
public:
|
||||
T* descriptor() const {
|
||||
return descriptor_.get();
|
||||
}
|
||||
T* descriptor() {
|
||||
return descriptor_.get();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<T, CuBlasLtDeleter<T, destructor>> descriptor_;
|
||||
};
|
||||
|
||||
class CuBlasLtMatmulDescriptor : public CuBlasLtDescriptor<
|
||||
cublasLtMatmulDescOpaque_t,
|
||||
&cublasLtMatmulDescDestroy> {
|
||||
public:
|
||||
CuBlasLtMatmulDescriptor(
|
||||
cublasComputeType_t compute_type,
|
||||
cudaDataType_t scale_type) {
|
||||
cublasLtMatmulDesc_t raw_descriptor = nullptr;
|
||||
TORCH_CUDABLAS_CHECK(
|
||||
cublasLtMatmulDescCreate(&raw_descriptor, compute_type, scale_type));
|
||||
descriptor_.reset(raw_descriptor);
|
||||
}
|
||||
};
|
||||
|
||||
class CuBlasLtMatrixLayout : public CuBlasLtDescriptor<
|
||||
cublasLtMatrixLayoutOpaque_t,
|
||||
&cublasLtMatrixLayoutDestroy> {
|
||||
public:
|
||||
CuBlasLtMatrixLayout(
|
||||
cudaDataType_t type,
|
||||
uint64_t rows,
|
||||
uint64_t cols,
|
||||
int64_t ld) {
|
||||
cublasLtMatrixLayout_t raw_descriptor = nullptr;
|
||||
TORCH_CUDABLAS_CHECK(
|
||||
cublasLtMatrixLayoutCreate(&raw_descriptor, type, rows, cols, ld));
|
||||
descriptor_.reset(raw_descriptor);
|
||||
}
|
||||
};
|
||||
|
||||
class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
|
||||
cublasLtMatmulPreferenceOpaque_t,
|
||||
&cublasLtMatmulPreferenceDestroy> {
|
||||
public:
|
||||
CuBlasLtMatmulPreference() {
|
||||
cublasLtMatmulPreference_t raw_descriptor = nullptr;
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulPreferenceCreate(&raw_descriptor));
|
||||
descriptor_.reset(raw_descriptor);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
template <typename Dtype>
|
||||
void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
bool transpose_mat2,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k,
|
||||
at::opmath_type<Dtype> alpha_val,
|
||||
const Dtype* mat1_ptr,
|
||||
int64_t mat1_ld,
|
||||
const Dtype* mat2_ptr,
|
||||
int64_t mat2_ld,
|
||||
const Dtype* bias,
|
||||
Dtype* result_ptr,
|
||||
int64_t result_ld) {
|
||||
using opmath_t = at::opmath_type<Dtype>;
|
||||
opmath_t beta_val = 0; // bias is added in epilogue
|
||||
|
||||
cudaDataType_t abcType = CUDA_R_32F;
|
||||
cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;
|
||||
cudaDataType_t scaleType = CUDA_R_32F;
|
||||
if (std::is_same<Dtype, double>::value) {
|
||||
abcType = CUDA_R_64F;
|
||||
computeType = CUBLAS_COMPUTE_64F;
|
||||
scaleType = CUDA_R_64F;
|
||||
} else if (std::is_same<Dtype, float>::value) {
|
||||
if (at::globalContext().allowTF32CuBLAS()) {
|
||||
computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
|
||||
}
|
||||
abcType = CUDA_R_32F;
|
||||
} else if (std::is_same<Dtype, at::Half>::value) {
|
||||
abcType = CUDA_R_16F;
|
||||
} else if (std::is_same<Dtype, at::BFloat16>::value) {
|
||||
abcType = CUDA_R_16BF;
|
||||
}
|
||||
|
||||
CuBlasLtMatmulDescriptor computeDesc(computeType, scaleType);
|
||||
cublasOperation_t transa = transpose_mat1 ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulDescSetAttribute(
|
||||
computeDesc.descriptor(),
|
||||
CUBLASLT_MATMUL_DESC_TRANSA,
|
||||
&transa,
|
||||
sizeof(transa)));
|
||||
cublasOperation_t transb = transpose_mat2 ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulDescSetAttribute(
|
||||
computeDesc.descriptor(),
|
||||
CUBLASLT_MATMUL_DESC_TRANSB,
|
||||
&transb,
|
||||
sizeof(transb)));
|
||||
cublasLtEpilogue_t epilogue = CUBLASLT_EPILOGUE_BIAS;
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulDescSetAttribute(
|
||||
computeDesc.descriptor(),
|
||||
CUBLASLT_MATMUL_DESC_EPILOGUE,
|
||||
&epilogue,
|
||||
sizeof(epilogue)));
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulDescSetAttribute(
|
||||
computeDesc.descriptor(),
|
||||
CUBLASLT_MATMUL_DESC_BIAS_POINTER,
|
||||
&bias,
|
||||
sizeof(Dtype*)));
|
||||
|
||||
CuBlasLtMatrixLayout Adesc(
|
||||
abcType, transpose_mat1 ? k : m, transpose_mat1 ? m : k, mat1_ld);
|
||||
CuBlasLtMatrixLayout Bdesc(
|
||||
abcType, transpose_mat2 ? n : k, transpose_mat2 ? k : n, mat2_ld);
|
||||
CuBlasLtMatrixLayout Cdesc(abcType, m, n, result_ld);
|
||||
|
||||
CuBlasLtMatmulPreference preference;
|
||||
size_t workspaceSize = 0;
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulPreferenceSetAttribute(
|
||||
preference.descriptor(),
|
||||
CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
|
||||
&workspaceSize,
|
||||
sizeof(workspaceSize)));
|
||||
|
||||
auto workspace = at::empty(
|
||||
{static_cast<int64_t>(workspaceSize)},
|
||||
at::device({at::kCUDA, at::cuda::current_device()}).dtype(at::kByte));
|
||||
|
||||
cublasLtMatmulHeuristicResult_t heuristicResult = {};
|
||||
int returnedResult = 0;
|
||||
cublasLtHandle_t ltHandle =
|
||||
reinterpret_cast<cublasLtHandle_t>(at::cuda::getCurrentCUDABlasHandle());
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmulAlgoGetHeuristic(
|
||||
ltHandle,
|
||||
computeDesc.descriptor(),
|
||||
Adesc.descriptor(),
|
||||
Bdesc.descriptor(),
|
||||
Cdesc.descriptor(),
|
||||
Cdesc.descriptor(),
|
||||
preference.descriptor(),
|
||||
1,
|
||||
&heuristicResult,
|
||||
&returnedResult));
|
||||
if (returnedResult == 0) {
|
||||
TORCH_CUDABLAS_CHECK(CUBLAS_STATUS_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
TORCH_CUDABLAS_CHECK(cublasLtMatmul(
|
||||
ltHandle,
|
||||
computeDesc.descriptor(),
|
||||
&alpha_val,
|
||||
mat1_ptr,
|
||||
Adesc.descriptor(),
|
||||
mat2_ptr,
|
||||
Bdesc.descriptor(),
|
||||
&beta_val,
|
||||
result_ptr,
|
||||
Cdesc.descriptor(),
|
||||
result_ptr,
|
||||
Cdesc.descriptor(),
|
||||
&heuristicResult.algo,
|
||||
workspace.data_ptr(),
|
||||
workspaceSize,
|
||||
at::cuda::getCurrentCUDAStream()));
|
||||
}
|
||||
|
||||
template void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
bool transpose_mat2,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k,
|
||||
at::opmath_type<double> alpha_val,
|
||||
const double* mat1_ptr,
|
||||
int64_t mat1_ld,
|
||||
const double* mat2_ptr,
|
||||
int64_t mat2_ld,
|
||||
const double* bias,
|
||||
double* result_ptr,
|
||||
int64_t result_ld);
|
||||
|
||||
template void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
bool transpose_mat2,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k,
|
||||
at::opmath_type<float> alpha_val,
|
||||
const float* mat1_ptr,
|
||||
int64_t mat1_ld,
|
||||
const float* mat2_ptr,
|
||||
int64_t mat2_ld,
|
||||
const float* bias,
|
||||
float* result_ptr,
|
||||
int64_t result_ld);
|
||||
|
||||
template void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
bool transpose_mat2,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k,
|
||||
at::opmath_type<at::Half> alpha_val,
|
||||
const at::Half* mat1_ptr,
|
||||
int64_t mat1_ld,
|
||||
const at::Half* mat2_ptr,
|
||||
int64_t mat2_ld,
|
||||
const at::Half* bias,
|
||||
at::Half* result_ptr,
|
||||
int64_t result_ld);
|
||||
|
||||
template void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
bool transpose_mat2,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k,
|
||||
at::opmath_type<at::BFloat16> alpha_val,
|
||||
const at::BFloat16* mat1_ptr,
|
||||
int64_t mat1_ld,
|
||||
const at::BFloat16* mat2_ptr,
|
||||
int64_t mat2_ld,
|
||||
const at::BFloat16* bias,
|
||||
at::BFloat16* result_ptr,
|
||||
int64_t result_ld);
|
||||
#endif // defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && !defined(_MSC_VER)
|
||||
|
||||
template <>
|
||||
void trsm<float>(CUDABLAS_TRSM_ARGTYPES(float)) {
|
||||
TORCH_CUDABLAS_CHECK(cublasStrsm(
|
||||
|
|
|
|||
|
|
@ -70,6 +70,24 @@ template <>
|
|||
void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
||||
#endif
|
||||
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && !defined(_MSC_VER)
|
||||
template <typename Dtype>
|
||||
void gemm_and_bias(
|
||||
bool transpose_mat1,
|
||||
bool transpose_mat2,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k,
|
||||
at::opmath_type<Dtype> alpha_val,
|
||||
const Dtype* mat1_ptr,
|
||||
int64_t mat1_ld,
|
||||
const Dtype* mat2_ptr,
|
||||
int64_t mat2_ld,
|
||||
const Dtype* bias,
|
||||
Dtype* result_ptr,
|
||||
int64_t result_ld);
|
||||
#endif
|
||||
|
||||
#define CUDABLAS_BGEMM_ARGTYPES(Dtype) \
|
||||
char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type<Dtype> alpha, \
|
||||
const Dtype *a, int64_t lda, int64_t stridea, \
|
||||
|
|
|
|||
|
|
@ -1,339 +0,0 @@
|
|||
#include <type_traits>
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/AccumulateType.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#include <ATen/Parallel.h>
|
||||
#include <ATen/cpu/vec/vec256/vec256.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
namespace native {
|
||||
|
||||
namespace {
|
||||
|
||||
Tensor gemm_nt(const Tensor& a, const Tensor& b) {
|
||||
return at::native::matmul(a, b.t());
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
void transform_bias_rescale_qkv_inner_loop(
|
||||
int64_t B,
|
||||
int64_t T,
|
||||
int64_t _3D,
|
||||
int64_t D,
|
||||
int64_t num_head,
|
||||
int64_t dim_per_head,
|
||||
scalar_t* qkv_data,
|
||||
scalar_t* qkv_bias_data,
|
||||
scalar_t* q_k_v_data,
|
||||
scalar_t sqrt_dim_per_head,
|
||||
int64_t begin,
|
||||
int64_t end) {
|
||||
for (auto i : c10::irange(begin, end)) {
|
||||
auto t = i % T;
|
||||
i /= T;
|
||||
auto nh = i % num_head;
|
||||
i /= num_head;
|
||||
auto b = i;
|
||||
using Vec = vec::Vectorized<scalar_t>;
|
||||
auto V = vec::Vectorized<scalar_t>::size();
|
||||
auto dh = 0;
|
||||
auto d = nh * dim_per_head;
|
||||
for (; dh + V <= dim_per_head; dh += V, d += V) {
|
||||
// load
|
||||
auto q_bias_data = Vec::loadu(&qkv_bias_data[d + 0 * D]);
|
||||
auto k_bias_data = Vec::loadu(&qkv_bias_data[d + 1 * D]);
|
||||
auto v_bias_data = Vec::loadu(&qkv_bias_data[d + 2 * D]);
|
||||
|
||||
auto q_data =
|
||||
Vec::loadu(&qkv_data[b * _3D * T + t * _3D + d + 0 * D]) +
|
||||
q_bias_data;
|
||||
auto k_data =
|
||||
Vec::loadu(&qkv_data[b * _3D * T + t * _3D + d + 1 * D]) +
|
||||
k_bias_data;
|
||||
auto v_data =
|
||||
Vec::loadu(&qkv_data[b * _3D * T + t * _3D + d + 2 * D]) +
|
||||
v_bias_data;
|
||||
|
||||
q_data = q_data / Vec(sqrt_dim_per_head);
|
||||
|
||||
q_data.store(&q_k_v_data
|
||||
[0 * B * num_head * T * dim_per_head +
|
||||
b * num_head * T * dim_per_head +
|
||||
nh * T * dim_per_head +
|
||||
t * dim_per_head + dh]);
|
||||
k_data.store(&q_k_v_data
|
||||
[1 * B * num_head * T * dim_per_head +
|
||||
b * num_head * T * dim_per_head +
|
||||
nh * T * dim_per_head +
|
||||
t * dim_per_head + dh]);
|
||||
v_data.store(&q_k_v_data
|
||||
[2 * B * num_head * T * dim_per_head +
|
||||
b * num_head * T * dim_per_head +
|
||||
nh * T * dim_per_head +
|
||||
t * dim_per_head + dh]);
|
||||
}
|
||||
for (; dh < dim_per_head; dh++) {
|
||||
auto d = nh * dim_per_head + dh;
|
||||
auto q_bias = qkv_bias_data[d + 0 * D];
|
||||
auto k_bias = qkv_bias_data[d + 1 * D];
|
||||
auto v_bias = qkv_bias_data[d + 2 * D];
|
||||
auto q_data = qkv_data[b * _3D * T + t * _3D + d + 0 * D] + q_bias;
|
||||
auto k_data = qkv_data[b * _3D * T + t * _3D + d + 1 * D] + k_bias;
|
||||
auto v_data = qkv_data[b * _3D * T + t * _3D + d + 2 * D] + v_bias;
|
||||
q_data = q_data / sqrt_dim_per_head;
|
||||
q_k_v_data[0 * B * num_head * T * dim_per_head +
|
||||
b * num_head * T * dim_per_head +
|
||||
nh * T * dim_per_head +
|
||||
t * dim_per_head + dh] = q_data;
|
||||
q_k_v_data[1 * B * num_head * T * dim_per_head +
|
||||
b * num_head * T * dim_per_head +
|
||||
nh * T * dim_per_head +
|
||||
t * dim_per_head + dh] = k_data;
|
||||
q_k_v_data[2 * B * num_head * T * dim_per_head +
|
||||
b * num_head * T * dim_per_head +
|
||||
nh * T * dim_per_head +
|
||||
t * dim_per_head + dh] = v_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compute q = (q + q_bias) / sqrt(dim_per_head), k = k + k_bias, v = v + v_bias
|
||||
std::tuple<Tensor, Tensor, Tensor> transform_bias_rescale_qkv(
|
||||
const Tensor& qkv,
|
||||
const Tensor& qkv_bias,
|
||||
const int64_t num_head) {
|
||||
auto B = qkv.size(0);
|
||||
auto T = qkv.size(1);
|
||||
auto _3D = qkv.size(2);
|
||||
auto D = _3D / 3;
|
||||
TORCH_CHECK(D % num_head == 0);
|
||||
TORCH_CHECK(_3D % 3 == 0);
|
||||
const auto dim_per_head = D / num_head;
|
||||
auto q_k_v = at::empty({3, B, num_head, T, dim_per_head}, qkv.options());
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(q_k_v.is_contiguous());
|
||||
|
||||
const auto qkv_contig = qkv.expect_contiguous();
|
||||
const auto qkv_bias_contig = qkv_bias.expect_contiguous();
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
ScalarType::Half,
|
||||
ScalarType::BFloat16,
|
||||
qkv.scalar_type(),
|
||||
"transform_bias_rescale_qkv",
|
||||
[&] {
|
||||
scalar_t* qkv_data = qkv_contig->data_ptr<scalar_t>();
|
||||
scalar_t* qkv_bias_data = qkv_bias_contig->data_ptr<scalar_t>();
|
||||
scalar_t* q_k_v_data = q_k_v.data_ptr<scalar_t>();
|
||||
const scalar_t sqrt_dim_per_head = std::sqrt(static_cast<scalar_t>(dim_per_head));
|
||||
|
||||
int64_t grain_size =
|
||||
std::max(internal::GRAIN_SIZE / (3 * dim_per_head), (int64_t)1);
|
||||
parallel_for(
|
||||
0, B * num_head * T, grain_size, [&](int64_t begin, int64_t end) {
|
||||
transform_bias_rescale_qkv_inner_loop(B, T, _3D, D, num_head, dim_per_head, qkv_data, qkv_bias_data, q_k_v_data, sqrt_dim_per_head, begin, end);
|
||||
});
|
||||
});
|
||||
auto q_k_v_s =
|
||||
at::native::split(q_k_v.view({3 * B, num_head, T, dim_per_head}), B, 0);
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(q_k_v_s.size() == 3);
|
||||
return std::make_tuple(q_k_v_s[0], q_k_v_s[1], q_k_v_s[2]);
|
||||
}
|
||||
|
||||
Tensor bmm_nt(const Tensor& a, const Tensor& b) {
|
||||
auto a_ = a.view({a.size(0) * a.size(1), a.size(2), a.size(3)});
|
||||
auto b_ = b.view({b.size(0) * b.size(1), b.size(2), b.size(3)});
|
||||
auto bt_ = b_.transpose(2, 1);
|
||||
// TODO: are these a single call to cublas batched matmul?
|
||||
auto c_ = at::matmul(a_, bt_);
|
||||
return c_.view({a.size(0), a.size(1), a.size(2), b.size(2)});
|
||||
}
|
||||
|
||||
void masked_softmax_dropout(
|
||||
Tensor& attn_scores,
|
||||
const c10::optional<Tensor>& attn_mask) {
|
||||
auto B = attn_scores.size(0);
|
||||
auto num_heads = attn_scores.size(1);
|
||||
auto T = attn_scores.size(2);
|
||||
if (attn_mask) {
|
||||
TORCH_CHECK(attn_mask->is_contiguous());
|
||||
} else {
|
||||
at::_softmax_out(attn_scores, attn_scores, 3, false);
|
||||
return;
|
||||
}
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
ScalarType::Half,
|
||||
ScalarType::BFloat16,
|
||||
attn_scores.scalar_type(),
|
||||
"masked_softmax_dropout",
|
||||
[&] {
|
||||
using accscalar_t = acc_type<scalar_t, false>;
|
||||
// TODO: proper implementation with masking.
|
||||
scalar_t* attn_scores_data = attn_scores.data_ptr<scalar_t>();
|
||||
int64_t grain_size = std::min(internal::GRAIN_SIZE / T, (int64_t)1);
|
||||
parallel_for(
|
||||
0, B * num_heads * T, grain_size, [&](int64_t begin, int64_t end) {
|
||||
for (const auto i : c10::irange(begin, end)) {
|
||||
using Vec = vec::Vectorized<scalar_t>;
|
||||
auto V = vec::Vectorized<scalar_t>::size();
|
||||
|
||||
scalar_t* input_data = attn_scores_data + i;
|
||||
auto max_input = Vec(std::numeric_limits<scalar_t>::lowest());
|
||||
// TODO: handle epilogue
|
||||
TORCH_CHECK(T % V == 0, "epilogue not implemented yet");
|
||||
for (auto t = 0; t < T; t += V) {
|
||||
auto v = Vec::loadu(&input_data[t]);
|
||||
max_input = vec::maximum(max_input, v);
|
||||
}
|
||||
|
||||
auto hmax = std::numeric_limits<scalar_t>::lowest();
|
||||
for (auto i = 0; i < V; ++i) {
|
||||
hmax = std::max(max_input[i], hmax);
|
||||
}
|
||||
accscalar_t hsum = 0;
|
||||
TORCH_CHECK(T % V == 0, "epilogue not implemented yet");
|
||||
for (auto t = 0; t < T; t += V) {
|
||||
auto v = Vec::loadu(&input_data[t]);
|
||||
// TODO: vectorize in accscalar_t?
|
||||
for (auto i = 0; i < V; ++i) {
|
||||
hsum += std::exp(static_cast<accscalar_t>(v[i]) - hmax);
|
||||
}
|
||||
}
|
||||
auto inv_denominator = 1.0 / hsum;
|
||||
TORCH_CHECK(T % V == 0, "epilogue not implemented yet");
|
||||
for (auto t = 0; t < T; t += V) {
|
||||
Vec v = Vec::loadu(&input_data[t]);
|
||||
|
||||
// TODO: vectorize in accscalar_t?
|
||||
// TODO this faster solution does not work on Android build
|
||||
/*
|
||||
for (auto i = 0; i < V; ++i) {
|
||||
v[i] = static_cast<scalar_t>(std::exp(static_cast<accscalar_t>(v[i]) - hmax) * inv_denominator);
|
||||
}
|
||||
v.store(&input_data[t]);
|
||||
*/
|
||||
for (auto i = 0; i < V; ++i) {
|
||||
input_data[t + i] = static_cast<scalar_t>(std::exp(static_cast<accscalar_t>(v[i]) - hmax) * inv_denominator);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Tensor bmm_nn(const Tensor& a, const Tensor& b) {
|
||||
auto a_ = a.view({a.size(0) * a.size(1), a.size(2), a.size(3)});
|
||||
auto b_ = b.view({b.size(0) * b.size(1), b.size(2), b.size(3)});
|
||||
// TODO: are these a single call to cublas batched matmul?
|
||||
auto c_ = at::matmul(a_, b_);
|
||||
return c_.view({a.size(0), a.size(1), a.size(2), b.size(3)});
|
||||
}
|
||||
|
||||
Tensor transform_0213(const Tensor& a) {
|
||||
// TODO: check perf vs dedicated kernel.
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.size(1));
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.size(3));
|
||||
return a.permute({0, 2, 1, 3})
|
||||
.contiguous()
|
||||
.view({a.size(0), a.size(2), a.size(1) * a.size(3)});
|
||||
}
|
||||
|
||||
Tensor gemm_nt_bias(const Tensor& a, const Tensor& b, const Tensor& c) {
|
||||
auto a_ = a.view({a.size(0) * a.size(1), a.size(2)});
|
||||
auto r_ = at::native::linear(a_, b, c);
|
||||
return r_.view({a.size(0), a.size(1), r_.size(1)});
|
||||
}
|
||||
|
||||
void debug_assert_shape(const Tensor& t, c10::IntArrayRef shape) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY((size_t)t.dim() == shape.size(), "expected ", shape.size(), "-D tensor but got ", t.dim());
|
||||
for (auto idx : c10::irange(shape.size())) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(t.sizes()[idx] == shape[idx], "expected dim ", idx, " to be ", shape[idx], " but got ", t.sizes()[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::tuple<Tensor, Tensor, Tensor> transform_bias_rescale_qkv_op_cpu(
|
||||
const Tensor& qkv,
|
||||
const Tensor& qkv_bias,
|
||||
const int64_t num_head) {
|
||||
auto result = transform_bias_rescale_qkv(qkv, qkv_bias, num_head);
|
||||
return std::make_tuple(std::get<0>(result).clone(), std::get<1>(result).clone(), std::get<2>(result).clone());
|
||||
}
|
||||
|
||||
Tensor multi_head_self_attention_cpu(
|
||||
const Tensor& query,
|
||||
const Tensor& qkv_weight,
|
||||
const Tensor& qkv_bias,
|
||||
const Tensor& proj_weight,
|
||||
const Tensor& proj_bias,
|
||||
const int64_t num_head,
|
||||
const c10::optional<Tensor>& mask) {
|
||||
// query shape: [B, T, D]
|
||||
// qkv_weight shape: [3 * D, D]
|
||||
|
||||
const auto D = query.sizes()[2];
|
||||
|
||||
TORCH_CHECK(query.dim() == 3, "expected 3-dimensional query, got ", query.dim(), "-D tensor");
|
||||
TORCH_CHECK(qkv_weight.dim() == 2, "expected 2-dimensional qkv_weight, got ", qkv_weight.dim(), "-D tensor");
|
||||
TORCH_CHECK(D * 3 == qkv_weight.sizes()[0], "expected qkv_weight first dim to be 3x last dim of query");
|
||||
TORCH_CHECK(D == qkv_weight.sizes()[1], "expected qkv_weight second dim and last dim of query to be equal");
|
||||
TORCH_CHECK(qkv_bias.dim() == 1, "expected 2-dimensional qkv_bias, got ", qkv_bias.dim(), "-D tensor");
|
||||
TORCH_CHECK(qkv_bias.sizes()[0] == 3 * D, "expected qkv_bias first dim and first dim of query to be equal");
|
||||
TORCH_CHECK(D % num_head == 0, "D must divide evenly by num_head");
|
||||
|
||||
#ifndef NDEBUG
|
||||
const auto B = query.sizes()[0];
|
||||
const auto T = query.sizes()[1];
|
||||
const auto dim_per_head = D / num_head;
|
||||
#endif
|
||||
|
||||
// shape: [B, T, 3 x D]
|
||||
auto qkv = gemm_nt(query, qkv_weight);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(qkv, {B, T, 3 * D});
|
||||
#endif
|
||||
|
||||
// shape: 3 x [B, num_head, T, dim_per_head]
|
||||
auto q_k_v = transform_bias_rescale_qkv(qkv, qkv_bias, num_head);
|
||||
const auto& q = std::get<0>(q_k_v);
|
||||
const auto& k = std::get<1>(q_k_v);
|
||||
const auto& v = std::get<2>(q_k_v);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(q, {B, num_head, T, dim_per_head});
|
||||
debug_assert_shape(k, {B, num_head, T, dim_per_head});
|
||||
debug_assert_shape(v, {B, num_head, T, dim_per_head});
|
||||
#endif
|
||||
|
||||
// shape: [B, num_head, T, T]
|
||||
auto qkt = bmm_nt(q, k);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(qkt, {B, num_head, T, T});
|
||||
#endif
|
||||
|
||||
// shape: [B, num_head, T, T]
|
||||
masked_softmax_dropout(qkt, mask);
|
||||
|
||||
// shape: [B, num_head, T, dim_per_head]
|
||||
auto attn_ctx = bmm_nn(qkt, v);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(attn_ctx, {B, num_head, T, dim_per_head});
|
||||
#endif
|
||||
|
||||
// shape: [B, T, D]
|
||||
auto attn = transform_0213(attn_ctx);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(attn, {B, T, D});
|
||||
#endif
|
||||
|
||||
// shape: [B, T, D]
|
||||
auto proj = gemm_nt_bias(attn, proj_weight, proj_bias);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(proj, {B, T, D});
|
||||
#endif
|
||||
return proj;
|
||||
}
|
||||
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
|
@ -102,9 +102,27 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
|||
IntArrayRef mat1_sizes = mat1.sizes();
|
||||
IntArrayRef mat2_sizes = mat2.sizes();
|
||||
IntArrayRef self__sizes;
|
||||
bool useLtInterface = false;
|
||||
at::ScalarType scalar_type = self.scalar_type();
|
||||
c10::MaybeOwned<Tensor> self_;
|
||||
if (&result != &self) {
|
||||
self_ = expand_size(self, {mat1_sizes[0], mat2_sizes[1]}, "addmm");
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && !defined(_MSC_VER)
|
||||
// Strangely, if mat2 has only 1 row or column, we get
|
||||
// CUBLAS_STATUS_INVALID_VALUE error from cublasLtMatmulAlgoGetHeuristic.
|
||||
// self.dim() == 1 && result.dim() == 2 && self.sizes()[0] == mat2_sizes[1]
|
||||
// is to use lt interface only when self is bias.
|
||||
useLtInterface = beta.toComplexDouble() == 1.0 && self.dim() == 1 &&
|
||||
result.dim() == 2 && self.sizes()[0] == mat2_sizes[1] &&
|
||||
self.is_contiguous() &&
|
||||
(scalar_type == at::ScalarType::Double ||
|
||||
scalar_type == at::ScalarType::Float ||
|
||||
scalar_type == at::ScalarType::Half ||
|
||||
scalar_type == at::ScalarType::BFloat16) &&
|
||||
mat2_sizes[0] > 1 && mat2_sizes[1] > 1;
|
||||
#endif
|
||||
if (!useLtInterface) {
|
||||
self_ = expand_size(self, {mat1_sizes[0], mat2_sizes[1]}, "addmm");
|
||||
}
|
||||
self__sizes = self_->sizes();
|
||||
} else {
|
||||
self_ = c10::MaybeOwned<Tensor>::borrowed(self);
|
||||
|
|
@ -115,8 +133,8 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
|||
}
|
||||
|
||||
if (&result != &self) {
|
||||
at::native::resize_output(result, self__sizes);
|
||||
if (beta.toComplexDouble() != 0.0) {
|
||||
at::native::resize_output(result, {mat1_sizes[0], mat2_sizes[1]});
|
||||
if (beta.toComplexDouble() != 0.0 && !useLtInterface) {
|
||||
at::native::copy_(result, *self_);
|
||||
}
|
||||
}
|
||||
|
|
@ -147,7 +165,6 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
|||
int64_t mat1_ld = mat1_->stride((transpose_mat1 == transpose_result) ? 1 : 0);
|
||||
int64_t mat2_ld = mat2_->stride((transpose_mat2 == transpose_result) ? 1 : 0);
|
||||
int64_t result_ld = result_->stride(transpose_result ? 0 : 1);
|
||||
at::ScalarType scalar_type = self_->scalar_type();
|
||||
|
||||
if (mat1.numel() == 0) {
|
||||
// By definition, when beta==0, values in self should be ignored. nans and infs
|
||||
|
|
@ -170,24 +187,61 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
|||
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!result_->is_conj());
|
||||
|
||||
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, scalar_type, "addmm_cuda", [&] {
|
||||
using opmath_t = at::opmath_type<scalar_t>;
|
||||
opmath_t alpha_val = alpha.to<opmath_t>();
|
||||
opmath_t beta_val = beta.to<opmath_t>();
|
||||
scalar_t* mat1_ptr = mat1_->data_ptr<scalar_t>();
|
||||
scalar_t* mat2_ptr = mat2_->data_ptr<scalar_t>();
|
||||
scalar_t* result_ptr = result_->data_ptr<scalar_t>();
|
||||
at::cuda::blas::gemm<scalar_t>(
|
||||
transpose_mat1 ? mat1_->is_conj() ? 'c' : 't' : 'n',
|
||||
transpose_mat2 ? mat2_->is_conj() ? 'c' : 't' : 'n',
|
||||
m, n, k,
|
||||
alpha_val,
|
||||
mat1_ptr, mat1_ld,
|
||||
mat2_ptr, mat2_ld,
|
||||
beta_val,
|
||||
result_ptr, result_ld
|
||||
);
|
||||
});
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && !defined(_MSC_VER)
|
||||
if (useLtInterface) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
at::ScalarType::Half,
|
||||
at::ScalarType::BFloat16,
|
||||
scalar_type,
|
||||
"addmm_cuda_lt",
|
||||
[&] {
|
||||
at::cuda::blas::gemm_and_bias<scalar_t>(
|
||||
transpose_mat1,
|
||||
transpose_mat2,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
alpha.to<at::opmath_type<scalar_t>>(),
|
||||
mat1_->data_ptr<scalar_t>(),
|
||||
mat1_ld,
|
||||
mat2_->data_ptr<scalar_t>(),
|
||||
mat2_ld,
|
||||
self.data_ptr<scalar_t>(),
|
||||
result_->data_ptr<scalar_t>(),
|
||||
result_ld);
|
||||
});
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(
|
||||
at::ScalarType::Half,
|
||||
at::ScalarType::BFloat16,
|
||||
scalar_type,
|
||||
"addmm_cuda",
|
||||
[&] {
|
||||
using opmath_t = at::opmath_type<scalar_t>;
|
||||
opmath_t alpha_val = alpha.to<opmath_t>();
|
||||
opmath_t beta_val = beta.to<opmath_t>();
|
||||
scalar_t* mat1_ptr = mat1_->data_ptr<scalar_t>();
|
||||
scalar_t* mat2_ptr = mat2_->data_ptr<scalar_t>();
|
||||
scalar_t* result_ptr = result_->data_ptr<scalar_t>();
|
||||
at::cuda::blas::gemm<scalar_t>(
|
||||
transpose_mat1 ? mat1_->is_conj() ? 'c' : 't' : 'n',
|
||||
transpose_mat2 ? mat2_->is_conj() ? 'c' : 't' : 'n',
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
alpha_val,
|
||||
mat1_ptr,
|
||||
mat1_ld,
|
||||
mat2_ptr,
|
||||
mat2_ld,
|
||||
beta_val,
|
||||
result_ptr,
|
||||
result_ld);
|
||||
});
|
||||
}
|
||||
|
||||
if (!result.is_same(*result_)) {
|
||||
result.copy_(*result_);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,89 +4,9 @@
|
|||
#include <ATen/cuda/CUDAConfig.h>
|
||||
#include <ATen/cuda/PinnedMemoryAllocator.h>
|
||||
|
||||
#if AT_MAGMA_ENABLED()
|
||||
#include <magma_types.h>
|
||||
#include <magma_v2.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
||||
#if AT_MAGMA_ENABLED()
|
||||
|
||||
// RAII for a MAGMA Queue
|
||||
struct MAGMAQueue {
|
||||
|
||||
// Default constructor without a device will cause
|
||||
// destroying a queue which has not been initialized.
|
||||
MAGMAQueue() = delete;
|
||||
|
||||
// Constructor
|
||||
explicit MAGMAQueue(int64_t device_id) {
|
||||
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
// Magma operations is numerically sensitive, so TF32 should be off
|
||||
// regardless of the global flag.
|
||||
TORCH_CUDABLAS_CHECK(cublasGetMathMode(handle, &original_math_mode));
|
||||
TORCH_CUDABLAS_CHECK(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH));
|
||||
#endif
|
||||
magma_queue_create_from_cuda(
|
||||
device_id,
|
||||
at::cuda::getCurrentCUDAStream(),
|
||||
handle,
|
||||
at::cuda::getCurrentCUDASparseHandle(),
|
||||
&magma_queue_);
|
||||
}
|
||||
|
||||
// Getter
|
||||
magma_queue_t get_queue() const { return magma_queue_; }
|
||||
|
||||
// Destructor
|
||||
~MAGMAQueue() {
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
// We've manually set the math mode to CUBLAS_DEFAULT_MATH, now we
|
||||
// should restore the original math mode back
|
||||
cublasHandle_t handle = magma_queue_get_cublas_handle(magma_queue_);
|
||||
cublasSetMathMode(handle, original_math_mode);
|
||||
#endif
|
||||
magma_queue_destroy(magma_queue_);
|
||||
}
|
||||
|
||||
private:
|
||||
magma_queue_t magma_queue_;
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
cublasMath_t original_math_mode;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline magma_int_t magma_int_cast(int64_t value, const char* varname) {
|
||||
auto result = static_cast<magma_int_t>(value);
|
||||
if (static_cast<int64_t>(result) != value) {
|
||||
AT_ERROR("magma: The value of ", varname, "(", (long long)value,
|
||||
") is too large to fit into a magma_int_t (", sizeof(magma_int_t), " bytes)");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// MAGMA functions that don't take a magma_queue_t aren't stream safe
|
||||
// Work around this by synchronizing with the default stream
|
||||
struct MagmaStreamSyncGuard {
|
||||
MagmaStreamSyncGuard() {
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
if (stream != at::cuda::getDefaultCUDAStream()) {
|
||||
at::cuda::stream_synchronize(stream);
|
||||
}
|
||||
}
|
||||
|
||||
~MagmaStreamSyncGuard() noexcept(false) {
|
||||
auto default_stream = at::cuda::getDefaultCUDAStream();
|
||||
if (at::cuda::getCurrentCUDAStream() != default_stream) {
|
||||
at::cuda::stream_synchronize(default_stream);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
static inline int cuda_int_cast(int64_t value, const char* varname) {
|
||||
auto result = static_cast<int>(value);
|
||||
TORCH_CHECK(static_cast<int64_t>(result) == value,
|
||||
|
|
|
|||
|
|
@ -1,342 +0,0 @@
|
|||
#include <type_traits>
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/AccumulateType.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#include <ATen/TensorAccessor.h>
|
||||
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <ATen/cuda/detail/KernelUtils.h>
|
||||
#include <ATen/cuda/detail/IndexUtils.cuh>
|
||||
#include <ATen/native/cuda/Loops.cuh>
|
||||
#include <ATen/native/cuda/MemoryAccess.cuh>
|
||||
#include <ATen/native/cuda/block_reduce.cuh>
|
||||
#include <ATen/native/cuda/PersistentSoftmax.cuh>
|
||||
|
||||
#include <c10/cuda/CUDAMathCompat.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
namespace native {
|
||||
|
||||
namespace {
|
||||
|
||||
Tensor gemm_nt(const Tensor& a, const Tensor& b) {
|
||||
return at::native::matmul(a, b.t());
|
||||
}
|
||||
|
||||
static constexpr int TRANSFORM_BIAS_RESCALE_VEC = 4;
|
||||
|
||||
template <typename scalar_t, typename accscalar_t, bool assume_aligned>
|
||||
__global__ void transform_bias_rescale_qkv_kernel(
|
||||
// [B, T, 3 * D]
|
||||
const PackedTensorAccessor64<scalar_t, 3, RestrictPtrTraits> qkv,
|
||||
// [3 * D]
|
||||
const PackedTensorAccessor64<scalar_t, 1, RestrictPtrTraits> qkv_bias,
|
||||
// [3, B, NH, T, DH]
|
||||
PackedTensorAccessor64<scalar_t, 5, RestrictPtrTraits> q_k_v) {
|
||||
// warp per DH.
|
||||
// so launch B * NH * T warps.
|
||||
auto NH = q_k_v.size(2);
|
||||
auto T = q_k_v.size(3);
|
||||
auto DH = q_k_v.size(4);
|
||||
|
||||
auto t = blockIdx.x % T;
|
||||
auto b = blockIdx.x / T;
|
||||
|
||||
auto D = NH * DH;
|
||||
const scalar_t sqrt_dim_per_head = std::sqrt(static_cast<scalar_t>(DH));
|
||||
|
||||
if (assume_aligned) {
|
||||
constexpr int VEC = TRANSFORM_BIAS_RESCALE_VEC;
|
||||
using LoadT = memory::aligned_vector<scalar_t, VEC>;
|
||||
for (int32_t d_v = threadIdx.x; d_v < D / VEC; d_v += blockDim.x) {
|
||||
auto d = d_v * VEC;
|
||||
auto nh = d / DH;
|
||||
auto dh = d % DH;
|
||||
scalar_t qkv_bias_q[VEC];
|
||||
scalar_t qkv_bias_k[VEC];
|
||||
scalar_t qkv_bias_v[VEC];
|
||||
scalar_t qkv_q[VEC];
|
||||
scalar_t qkv_k[VEC];
|
||||
scalar_t qkv_v[VEC];
|
||||
|
||||
// Here we require D % VEC == 0 for these vectorized loads.
|
||||
*reinterpret_cast<LoadT*>(&qkv_bias_q) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv_bias[d + 0 * D]);
|
||||
*reinterpret_cast<LoadT*>(&qkv_bias_k) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv_bias[d + 1 * D]);
|
||||
*reinterpret_cast<LoadT*>(&qkv_bias_v) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv_bias[d + 2 * D]);
|
||||
|
||||
*reinterpret_cast<LoadT*>(&qkv_q) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv[b][t][d + 0 * D]);
|
||||
*reinterpret_cast<LoadT*>(&qkv_k) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv[b][t][d + 1 * D]);
|
||||
*reinterpret_cast<LoadT*>(&qkv_v) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv[b][t][d + 2 * D]);
|
||||
|
||||
#pragma unroll
|
||||
// TODO: specialize for float2half2/half2float2?
|
||||
for (auto ii = 0; ii < VEC; ++ii) {
|
||||
qkv_q[ii] = static_cast<scalar_t>(
|
||||
(static_cast<accscalar_t>(qkv_q[ii]) +
|
||||
static_cast<accscalar_t>(qkv_bias_q[ii])) /
|
||||
static_cast<accscalar_t>(sqrt_dim_per_head));
|
||||
qkv_k[ii] = static_cast<scalar_t>(
|
||||
(static_cast<accscalar_t>(qkv_k[ii]) +
|
||||
static_cast<accscalar_t>(qkv_bias_k[ii])));
|
||||
qkv_v[ii] = static_cast<scalar_t>(
|
||||
(static_cast<accscalar_t>(qkv_v[ii]) +
|
||||
static_cast<accscalar_t>(qkv_bias_v[ii])));
|
||||
}
|
||||
|
||||
// Here we require DH % VEC == 0 for these vectorized stores.
|
||||
*reinterpret_cast<LoadT*>(&q_k_v[0][b][nh][t][dh]) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv_q);
|
||||
*reinterpret_cast<LoadT*>(&q_k_v[1][b][nh][t][dh]) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv_k);
|
||||
*reinterpret_cast<LoadT*>(&q_k_v[2][b][nh][t][dh]) =
|
||||
*reinterpret_cast<const LoadT*>(&qkv_v);
|
||||
}
|
||||
} else {
|
||||
// Same as above, but we can't vectorize memory access.
|
||||
for (int32_t d = threadIdx.x; d < D; d += blockDim.x) {
|
||||
auto nh = d / DH;
|
||||
auto dh = d % DH;
|
||||
scalar_t qkv_bias_q = qkv_bias[d + 0 * D];
|
||||
scalar_t qkv_bias_k = qkv_bias[d + 1 * D];
|
||||
scalar_t qkv_bias_v = qkv_bias[d + 2 * D];
|
||||
scalar_t qkv_q = qkv[b][t][d + 0 * D];
|
||||
scalar_t qkv_k = qkv[b][t][d + 1 * D];
|
||||
scalar_t qkv_v = qkv[b][t][d + 2 * D];
|
||||
qkv_q = static_cast<scalar_t>(
|
||||
(static_cast<accscalar_t>(qkv_q) +
|
||||
static_cast<accscalar_t>(qkv_bias_q)) /
|
||||
static_cast<accscalar_t>(sqrt_dim_per_head));
|
||||
qkv_k = static_cast<scalar_t>(
|
||||
(static_cast<accscalar_t>(qkv_k) +
|
||||
static_cast<accscalar_t>(qkv_bias_k)));
|
||||
qkv_v = static_cast<scalar_t>(
|
||||
(static_cast<accscalar_t>(qkv_v) +
|
||||
static_cast<accscalar_t>(qkv_bias_v)));
|
||||
|
||||
q_k_v[0][b][nh][t][dh] = qkv_q;
|
||||
q_k_v[1][b][nh][t][dh] = qkv_k;
|
||||
q_k_v[2][b][nh][t][dh] = qkv_v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compute q = (q + q_bias) / sqrt(dim_per_head), k = k + k_bias, v = v + v_bias
|
||||
std::tuple<Tensor, Tensor, Tensor> transform_bias_rescale_qkv(
|
||||
const Tensor& qkv,
|
||||
const Tensor& qkv_bias,
|
||||
const int64_t num_head) {
|
||||
auto B = qkv.size(0);
|
||||
auto T = qkv.size(1);
|
||||
auto _3D = qkv.size(2);
|
||||
auto D = _3D / 3;
|
||||
TORCH_CHECK(D % num_head == 0);
|
||||
const auto dim_per_head = D / num_head;
|
||||
auto q_k_v = at::empty({3, B, num_head, T, dim_per_head}, qkv.options());
|
||||
#define CALL_KERNEL(assume_aligned) \
|
||||
transform_bias_rescale_qkv_kernel<scalar_t, accscalar_t, assume_aligned> \
|
||||
<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( \
|
||||
qkv.packed_accessor64<scalar_t, 3, RestrictPtrTraits>(), \
|
||||
qkv_bias.packed_accessor64<scalar_t, 1, RestrictPtrTraits>(), \
|
||||
q_k_v.packed_accessor64<scalar_t, 5, RestrictPtrTraits>())
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
ScalarType::Half,
|
||||
ScalarType::BFloat16,
|
||||
qkv.scalar_type(),
|
||||
"transform_bias_rescale_qkv",
|
||||
[&] {
|
||||
using accscalar_t = acc_type<scalar_t, true>;
|
||||
auto threads = std::max(std::min<int32_t>(1024, D / TRANSFORM_BIAS_RESCALE_VEC), 1);
|
||||
auto blocks = B * T;
|
||||
if (dim_per_head % TRANSFORM_BIAS_RESCALE_VEC == 0) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
||||
D % TRANSFORM_BIAS_RESCALE_VEC == 0,
|
||||
"D = num_heads * dim_per_head, so we should have dim_per_head % "
|
||||
"TRANSFORM_BIAS_RESCALE_VEC == 0 => "
|
||||
"D % TRANSFORM_BIAS_RESCALE_VEC == 0");
|
||||
CALL_KERNEL(true);
|
||||
} else {
|
||||
CALL_KERNEL(false);
|
||||
}
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
});
|
||||
#undef CALL_KERNEL
|
||||
auto q_k_v_s =
|
||||
at::native::split(q_k_v.view({3 * B, num_head, T, dim_per_head}), B, 0);
|
||||
return std::make_tuple(q_k_v_s[0], q_k_v_s[1], q_k_v_s[2]);
|
||||
}
|
||||
|
||||
Tensor bmm_nt(const Tensor& a, const Tensor& b) {
|
||||
auto a_ = a.view({a.size(0) * a.size(1), a.size(2), a.size(3)});
|
||||
auto b_ = b.view({b.size(0) * b.size(1), b.size(2), b.size(3)});
|
||||
auto bt_ = b_.transpose(2, 1);
|
||||
// TODO: are these a single call to cublas batched matmul?
|
||||
auto c_ = at::matmul(a_, bt_);
|
||||
return c_.view({a.size(0), a.size(1), a.size(2), b.size(2)});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__inline__ __device__ T WarpReduceMax(T val) {
|
||||
#pragma unroll
|
||||
for (int offset = (C10_WARP_SIZE >> 1); offset > 0; offset >>= 1) {
|
||||
val = std::max(val, WARP_SHFL_DOWN(val, offset));
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__inline__ __device__ T WarpReduceSum(T val) {
|
||||
#pragma unroll
|
||||
for (int offset = (C10_WARP_SIZE >> 1); offset > 0; offset >>= 1) {
|
||||
val += WARP_SHFL_DOWN(val, offset);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
void masked_softmax_dropout(
|
||||
const Tensor& attn_scores,
|
||||
const c10::optional<Tensor>& attn_mask) {
|
||||
auto B = attn_scores.size(0);
|
||||
auto num_heads = attn_scores.size(1);
|
||||
auto T = attn_scores.size(2);
|
||||
if (attn_mask) {
|
||||
TORCH_CHECK(attn_mask->is_contiguous());
|
||||
}
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
ScalarType::Half,
|
||||
ScalarType::BFloat16,
|
||||
attn_scores.scalar_type(),
|
||||
"masked_softmax_dropout",
|
||||
[&] {
|
||||
using accscalar_t = acc_type<scalar_t, true>;
|
||||
// TODO: proper implementation with masking.
|
||||
dispatch_softmax_forward<scalar_t, scalar_t, accscalar_t, false, false>(
|
||||
attn_scores.data_ptr<scalar_t>(),
|
||||
attn_scores.data_ptr<scalar_t>(),
|
||||
T,
|
||||
T,
|
||||
B * num_heads * T
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
Tensor bmm_nn(const Tensor& a, const Tensor& b) {
|
||||
auto a_ = a.view({a.size(0) * a.size(1), a.size(2), a.size(3)});
|
||||
auto b_ = b.view({b.size(0) * b.size(1), b.size(2), b.size(3)});
|
||||
// TODO: are these a single call to cublas batched matmul?
|
||||
auto c_ = at::matmul(a_, b_);
|
||||
return c_.view({a.size(0), a.size(1), a.size(2), b.size(3)});
|
||||
}
|
||||
|
||||
Tensor transform_0213(const Tensor& a) {
|
||||
// TODO: check perf vs dedicated kernel.
|
||||
return a.permute({0, 2, 1, 3})
|
||||
.contiguous()
|
||||
.view({a.size(0), a.size(2), a.size(1) * a.size(3)});
|
||||
}
|
||||
|
||||
Tensor gemm_nt_bias(const Tensor& a, const Tensor& b, const Tensor& c) {
|
||||
auto a_ = a.view({a.size(0) * a.size(1), a.size(2)});
|
||||
auto r_ = at::native::linear(a_, b, c);
|
||||
return r_.view({a.size(0), a.size(1), r_.size(1)});
|
||||
}
|
||||
|
||||
void debug_assert_shape(const Tensor& t, c10::IntArrayRef shape) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY((size_t)t.dim() == shape.size(), "expected ", shape.size(), "-D tensor but got ", t.dim());
|
||||
for (auto idx : c10::irange(shape.size())) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(t.sizes()[idx] == shape[idx], "expected dim ", idx, " to be ", shape[idx], " but got ", t.sizes()[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
std::tuple<Tensor, Tensor, Tensor> transform_bias_rescale_qkv_op_cuda(
|
||||
const Tensor& qkv,
|
||||
const Tensor& qkv_bias,
|
||||
const int64_t num_head) {
|
||||
auto result = transform_bias_rescale_qkv(qkv, qkv_bias, num_head);
|
||||
return std::make_tuple(std::get<0>(result).clone(), std::get<1>(result).clone(), std::get<2>(result).clone());
|
||||
}
|
||||
|
||||
Tensor multi_head_self_attention_cuda(
|
||||
const Tensor& query,
|
||||
const Tensor& qkv_weight,
|
||||
const Tensor& qkv_bias,
|
||||
const Tensor& proj_weight,
|
||||
const Tensor& proj_bias,
|
||||
const int64_t num_head,
|
||||
const c10::optional<Tensor>& mask) {
|
||||
// query shape: [B, T, D]
|
||||
// qkv_weight shape: [3 * D, D]
|
||||
|
||||
const auto D = query.sizes()[2];
|
||||
|
||||
TORCH_CHECK(query.dim() == 3, "expected 3-dimensional query, got ", query.dim(), "-D tensor");
|
||||
TORCH_CHECK(qkv_weight.dim() == 2, "expected 2-dimensional qkv_weight, got ", qkv_weight.dim(), "-D tensor");
|
||||
TORCH_CHECK(D * 3 == qkv_weight.sizes()[0], "expected qkv_weight first dim to be 3x last dim of query");
|
||||
TORCH_CHECK(D == qkv_weight.sizes()[1], "expected qkv_weight second dim and last dim of query to be equal");
|
||||
TORCH_CHECK(D % num_head == 0, "D must divide evenly by num_head");
|
||||
|
||||
#ifndef NDEBUG
|
||||
const auto B = query.sizes()[0];
|
||||
const auto T = query.sizes()[1];
|
||||
const auto dim_per_head = D / num_head;
|
||||
#endif
|
||||
|
||||
// shape: [B, T, 3 x D]
|
||||
auto qkv = gemm_nt(query, qkv_weight);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(qkv, {B, T, 3 * D});
|
||||
#endif
|
||||
|
||||
// shape: 3 x [B, num_head, T, dim_per_head]
|
||||
auto q_k_v = transform_bias_rescale_qkv(qkv, qkv_bias, num_head);
|
||||
const auto& q = std::get<0>(q_k_v);
|
||||
const auto& k = std::get<1>(q_k_v);
|
||||
const auto& v = std::get<2>(q_k_v);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(q, {B, num_head, T, dim_per_head});
|
||||
debug_assert_shape(k, {B, num_head, T, dim_per_head});
|
||||
debug_assert_shape(v, {B, num_head, T, dim_per_head});
|
||||
#endif
|
||||
|
||||
// shape: [B, num_head, T, T]
|
||||
auto qkt = bmm_nt(q, k);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(qkt, {B, num_head, T, T});
|
||||
#endif
|
||||
|
||||
// shape: [B, num_head, T, T]
|
||||
masked_softmax_dropout(qkt, mask);
|
||||
|
||||
// shape: [B, num_head, T, dim_per_head]
|
||||
auto attn_ctx = bmm_nn(qkt, v);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(attn_ctx, {B, num_head, T, dim_per_head});
|
||||
#endif
|
||||
|
||||
// shape: [B, T, D]
|
||||
auto attn = transform_0213(attn_ctx);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(attn, {B, T, D});
|
||||
#endif
|
||||
|
||||
// shape: [B, T, D]
|
||||
auto proj = gemm_nt_bias(attn, proj_weight, proj_bias);
|
||||
#ifndef NDEBUG
|
||||
debug_assert_shape(proj, {B, T, D});
|
||||
#endif
|
||||
|
||||
return proj;
|
||||
}
|
||||
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
|
@ -13,6 +13,7 @@
|
|||
#include <ATen/native/LinearAlgebra.h>
|
||||
#include <ATen/native/BatchLinearAlgebra.h>
|
||||
#include <ATen/native/cuda/linalg/BatchLinearAlgebraLib.h>
|
||||
#include <ATen/native/cuda/linalg/MagmaUtils.h>
|
||||
#include <ATen/native/cpu/zmath.h>
|
||||
|
||||
#if AT_MAGMA_ENABLED()
|
||||
|
|
|
|||
88
aten/src/ATen/native/cuda/linalg/MagmaUtils.h
Normal file
88
aten/src/ATen/native/cuda/linalg/MagmaUtils.h
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#pragma once
|
||||
#include <ATen/cuda/CUDAConfig.h>
|
||||
|
||||
#if AT_MAGMA_ENABLED()
|
||||
#include <magma_types.h>
|
||||
#include <magma_v2.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
||||
#if AT_MAGMA_ENABLED()
|
||||
|
||||
// RAII for a MAGMA Queue
|
||||
struct MAGMAQueue {
|
||||
|
||||
// Default constructor without a device will cause
|
||||
// destroying a queue which has not been initialized.
|
||||
MAGMAQueue() = delete;
|
||||
|
||||
// Constructor
|
||||
explicit MAGMAQueue(int64_t device_id) {
|
||||
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
// Magma operations is numerically sensitive, so TF32 should be off
|
||||
// regardless of the global flag.
|
||||
TORCH_CUDABLAS_CHECK(cublasGetMathMode(handle, &original_math_mode));
|
||||
TORCH_CUDABLAS_CHECK(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH));
|
||||
#endif
|
||||
magma_queue_create_from_cuda(
|
||||
device_id,
|
||||
at::cuda::getCurrentCUDAStream(),
|
||||
handle,
|
||||
at::cuda::getCurrentCUDASparseHandle(),
|
||||
&magma_queue_);
|
||||
}
|
||||
|
||||
// Getter
|
||||
magma_queue_t get_queue() const { return magma_queue_; }
|
||||
|
||||
// Destructor
|
||||
~MAGMAQueue() {
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
// We've manually set the math mode to CUBLAS_DEFAULT_MATH, now we
|
||||
// should restore the original math mode back
|
||||
cublasHandle_t handle = magma_queue_get_cublas_handle(magma_queue_);
|
||||
cublasSetMathMode(handle, original_math_mode);
|
||||
#endif
|
||||
magma_queue_destroy(magma_queue_);
|
||||
}
|
||||
|
||||
private:
|
||||
magma_queue_t magma_queue_;
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
cublasMath_t original_math_mode;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline magma_int_t magma_int_cast(int64_t value, const char* varname) {
|
||||
auto result = static_cast<magma_int_t>(value);
|
||||
if (static_cast<int64_t>(result) != value) {
|
||||
AT_ERROR("magma: The value of ", varname, "(", (long long)value,
|
||||
") is too large to fit into a magma_int_t (", sizeof(magma_int_t), " bytes)");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// MAGMA functions that don't take a magma_queue_t aren't stream safe
|
||||
// Work around this by synchronizing with the default stream
|
||||
struct MagmaStreamSyncGuard {
|
||||
MagmaStreamSyncGuard() {
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
if (stream != at::cuda::getDefaultCUDAStream()) {
|
||||
at::cuda::stream_synchronize(stream);
|
||||
}
|
||||
}
|
||||
|
||||
~MagmaStreamSyncGuard() noexcept(false) {
|
||||
auto default_stream = at::cuda::getDefaultCUDAStream();
|
||||
if (at::cuda::getCurrentCUDAStream() != default_stream) {
|
||||
at::cuda::stream_synchronize(default_stream);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
|
@ -2549,16 +2549,6 @@
|
|||
CUDA: layer_norm_cuda
|
||||
CompositeImplicitAutograd: math_native_layer_norm
|
||||
|
||||
- func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, int num_head, Tensor? mask=None) -> Tensor
|
||||
dispatch:
|
||||
CPU: multi_head_self_attention_cpu
|
||||
CUDA: multi_head_self_attention_cuda
|
||||
|
||||
- func: _transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_head) -> (Tensor, Tensor, Tensor)
|
||||
dispatch:
|
||||
CPU: transform_bias_rescale_qkv_op_cpu
|
||||
CUDA: transform_bias_rescale_qkv_op_cuda
|
||||
|
||||
- func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
||||
dispatch:
|
||||
CPU: layer_norm_backward_cpu
|
||||
|
|
@ -6066,7 +6056,7 @@
|
|||
- func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
|
||||
variants: function, method
|
||||
|
||||
- func: _scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
|
||||
- func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
|
||||
variants: function, method
|
||||
dispatch:
|
||||
CPU: scatter_reduce_two_cpu
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ void main() {
|
|||
const ivec3 pos = ivec3(gl_GlobalInvocationID);
|
||||
|
||||
if (all(lessThan(pos, uBlock.size.xyz))) {
|
||||
imageStore(uOutput, pos, tanh(texelFetch(uInput, pos, 0)));
|
||||
const vec4 intex = texelFetch(uInput, pos, 0);
|
||||
imageStore(
|
||||
uOutput,
|
||||
pos,
|
||||
tanh(clamp(intex, -15.0, 15.0)));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@ void main() {
|
|||
const ivec3 pos = ivec3(gl_GlobalInvocationID);
|
||||
|
||||
if (all(lessThan(pos, uBlock.size.xyz))) {
|
||||
imageStore(uOutput, pos, tanh(imageLoad(uOutput, pos)));
|
||||
const vec4 intex = imageLoad(uOutput, pos);
|
||||
imageStore(
|
||||
uOutput,
|
||||
pos,
|
||||
tanh(clamp(intex, -15.0, 15.0)));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -322,6 +322,13 @@ Tensor add_tensor(
|
|||
const Tensor& self_arg,
|
||||
const Tensor& other_arg,
|
||||
const Scalar& alpha) {
|
||||
if (other_arg.sizes().size() == 0) {
|
||||
return arithmetic_scalar(
|
||||
self_arg,
|
||||
other_arg.item<float>(),
|
||||
c10::optional<Scalar>(alpha.to<float>()),
|
||||
VK_KERNEL(add_scalar));
|
||||
}
|
||||
return arithmetic_tensor(
|
||||
self_arg, other_arg, c10::optional<Scalar>(alpha), VK_KERNEL(add));
|
||||
}
|
||||
|
|
@ -354,6 +361,13 @@ Tensor sub_tensor(
|
|||
const Tensor& self_arg,
|
||||
const Tensor& other_arg,
|
||||
const Scalar& alpha) {
|
||||
if (other_arg.sizes().size() == 0) {
|
||||
return arithmetic_scalar(
|
||||
self_arg,
|
||||
other_arg.item<float>(),
|
||||
c10::optional<Scalar>(-1 * alpha.to<float>()),
|
||||
VK_KERNEL(add_scalar));
|
||||
}
|
||||
return arithmetic_tensor(
|
||||
self_arg, other_arg, c10::optional<Scalar>(alpha), VK_KERNEL(sub));
|
||||
}
|
||||
|
|
@ -374,6 +388,13 @@ Tensor& mul_scalar_(Tensor& self, const Scalar& other) {
|
|||
}
|
||||
|
||||
Tensor mul_tensor(const Tensor& self_arg, const Tensor& other_arg) {
|
||||
if (other_arg.sizes().size() == 0) {
|
||||
return arithmetic_scalar(
|
||||
self_arg,
|
||||
other_arg.item<float>(),
|
||||
c10::optional<Scalar>(),
|
||||
VK_KERNEL(mul_scalar));
|
||||
}
|
||||
return arithmetic_tensor(
|
||||
self_arg, other_arg, c10::optional<Scalar>(), VK_KERNEL(mul));
|
||||
}
|
||||
|
|
@ -400,6 +421,13 @@ Tensor& div_scalar_(Tensor& self, const Scalar& other) {
|
|||
}
|
||||
|
||||
Tensor div_tensor(const Tensor& self_arg, const Tensor& other_arg) {
|
||||
if (other_arg.sizes().size() == 0) {
|
||||
return arithmetic_scalar(
|
||||
self_arg,
|
||||
1.0 / other_arg.item<float>(),
|
||||
c10::optional<Scalar>(),
|
||||
VK_KERNEL(mul_scalar));
|
||||
}
|
||||
return arithmetic_tensor(
|
||||
self_arg, other_arg, c10::optional<Scalar>(), VK_KERNEL(div));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1551,7 +1551,7 @@ TEST(VulkanAPITest, tanh) {
|
|||
return;
|
||||
}
|
||||
|
||||
const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
|
||||
const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 30;
|
||||
const auto in_vulkan = in_cpu.vulkan();
|
||||
|
||||
const auto out_cpu = at::tanh(in_cpu);
|
||||
|
|
@ -1570,7 +1570,7 @@ TEST(VulkanAPITest, tanh_) {
|
|||
return;
|
||||
}
|
||||
|
||||
auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
|
||||
auto cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat)) * 30;
|
||||
auto vulkan = cpu.vulkan();
|
||||
|
||||
at::tanh_(cpu);
|
||||
|
|
|
|||
|
|
@ -35,6 +35,58 @@ static void cat_op_channel_perf(benchmark::State& state) {
|
|||
}
|
||||
}
|
||||
|
||||
static void gru_op_perf(benchmark::State& state) {
|
||||
// Guard
|
||||
if (!at::is_vulkan_available()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Arrange
|
||||
const int H_in = static_cast<int>(state.range(0)); // input_size
|
||||
const int H_out = static_cast<int>(state.range(1)); // hidden_size
|
||||
const int num_layers = static_cast<int>(state.range(2));
|
||||
const double gru_dropout = .0;
|
||||
const bool has_biases = true;
|
||||
const bool train = false;
|
||||
const bool bidirectional = false;
|
||||
const bool batch_first = true;
|
||||
const auto in_cpu = at::rand({1, 1, H_in}, at::device(at::kCPU).dtype(at::kFloat));
|
||||
const auto h0_cpu = at::rand({num_layers, 1, H_out}, at::device(at::kCPU).dtype(at::kFloat));
|
||||
|
||||
c10::List<at::Tensor> weight_ih_l; // shape (3 * hidden_size, input_size)
|
||||
c10::List<at::Tensor> weight_hh_l; // shape (3 * hidden_size, hidden_size)
|
||||
c10::List<at::Tensor> bias_ih_l; // shape (3 * hidden_size)
|
||||
c10::List<at::Tensor> bias_hh_l; // shape (3 * hidden_size)
|
||||
for (int i = 0; i < num_layers; ++i) {
|
||||
weight_ih_l.emplace_back(at::rand({3 * H_out, H_in}, at::device(at::kCPU).dtype(at::kFloat)));
|
||||
weight_hh_l.emplace_back(at::rand({3 * H_out, H_out}, at::device(at::kCPU).dtype(at::kFloat)));
|
||||
bias_ih_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
|
||||
bias_hh_l.emplace_back(at::rand({3 * H_out}, at::device(at::kCPU).dtype(at::kFloat)));
|
||||
}
|
||||
|
||||
// put this guard here to run inference inststead of training
|
||||
// to avoid the following error:
|
||||
// C++ exception with description "0INTERNAL ASSERT FAILED at "xplat/caffe2/aten/src/ATen/core/boxing/KernelFunction.cpp":31, please report a bug to PyTorch. aten::gru.input has kernels registered to both CompositeImplicitAutograd and a backend mapped to AutogradOther. This makes the backend kernel unreachable; the dispatcher will always prefer the CompositeImplicitAutograd lowering (see Note [Ambiguity in AutogradOther kernel]). If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated Autograd dispatch key for the backend.
|
||||
// If you only want to run inference instead of training, add `c10::InferenceMode mode;` before model.forward(). Note this guard is only available in C++ but not Python at present.
|
||||
c10::InferenceMode mode;
|
||||
|
||||
// Act
|
||||
while (state.KeepRunning()) {
|
||||
// weights/biases should be always on CPU.
|
||||
const auto out_vulkan = at::gru(in_cpu.vulkan(), h0_cpu.vulkan(), { weight_ih_l.get(0), weight_hh_l.get(0), bias_ih_l.get(0), bias_hh_l.get(0),
|
||||
weight_ih_l.get(1), weight_hh_l.get(1), bias_ih_l.get(1), bias_hh_l.get(1) },
|
||||
has_biases, num_layers, gru_dropout, train, bidirectional, batch_first);
|
||||
|
||||
auto vulkan_output = std::get<0>(out_vulkan);
|
||||
auto vulkan_hidden = std::get<1>(out_vulkan);
|
||||
|
||||
// to avoid out-of-memory issues, release resources by waiting and flushing all GPU operations
|
||||
at::native::vulkan::api::context()->wait(vulkan_output);
|
||||
at::native::vulkan::api::context()->wait(vulkan_hidden);
|
||||
at::native::vulkan::api::context()->flush();
|
||||
}
|
||||
}
|
||||
|
||||
static void CommonBenchmarkSettings(benchmark::internal::Benchmark* b) {
|
||||
b->Unit(benchmark::kMillisecond);
|
||||
b->ArgNames({"N", "C", "H", "W"});
|
||||
|
|
@ -48,6 +100,7 @@ BENCHMARK(cat_op_channel_perf)->Apply(CommonBenchmarkSettings)->Threads(1)->Iter
|
|||
BENCHMARK(cat_op_channel_perf)->Apply(CommonBenchmarkSettings)->Threads(1)->Iterations(5000)->Args({3, 4, 221, 193}); // small multiple of 4 channels
|
||||
BENCHMARK(cat_op_channel_perf)->Apply(CommonBenchmarkSettings)->Threads(1)->Iterations(5000)->Args({3, 3, 221, 193}); // small non-multiple of 4 channels
|
||||
BENCHMARK(cat_op_channel_perf)->Apply(CommonBenchmarkSettings)->Threads(3)->Iterations(1000)->Args({3, 40, 221, 193}); // big multiple of 4 channels (multi-thread)
|
||||
BENCHMARK(gru_op_perf)->Apply(CommonBenchmarkSettings)->Threads(1)->Iterations(1000)->Args({384, 384, 2}); // McLaren Model inputs
|
||||
BENCHMARK_MAIN();
|
||||
|
||||
#endif /* USE_VULKAN_API */
|
||||
|
|
|
|||
83
benchmarks/static_runtime/test_cpu_fusion.cc
Normal file
83
benchmarks/static_runtime/test_cpu_fusion.cc
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
#include <gtest/gtest.h>
|
||||
#include <torch/csrc/jit/runtime/static/impl.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
using namespace torch;
|
||||
using namespace torch::jit;
|
||||
using namespace torch::jit::test;
|
||||
|
||||
TEST(CpuFusion, Simple) {
|
||||
const auto simple_script = R"JIT(
|
||||
def forward(self, a, b):
|
||||
return (a + b).relu().tanh()
|
||||
)JIT";
|
||||
|
||||
Module m("module");
|
||||
m.define(simple_script);
|
||||
|
||||
StaticModuleOptions opts; // start with the defaults.
|
||||
opts.enable_tensorexpr_fusion = true;
|
||||
|
||||
auto input1 = at::randn({2, 3});
|
||||
auto input2 = at::ones({2, 3});
|
||||
|
||||
auto smodule = StaticModule(m, /* is_frozen */ false, opts, {input1, input2});
|
||||
StaticRuntime runtime(smodule);
|
||||
|
||||
// Test with sample inputs
|
||||
{
|
||||
auto actual = runtime({input1, input2}, {});
|
||||
auto expect = at::tanh(at::relu(input1 + input2));
|
||||
EXPECT_TRUE(at::allclose(expect, actual.toTensor()));
|
||||
}
|
||||
|
||||
// Test with different inputs
|
||||
{
|
||||
auto new_input1 = at::randn({5, 14});
|
||||
auto new_input2 = at::randn({5, 14});
|
||||
auto actual = runtime({new_input1, new_input2}, {});
|
||||
auto expect = at::tanh(at::relu(new_input1 + new_input2));
|
||||
EXPECT_TRUE(at::allclose(expect, actual.toTensor()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CpuFusion, FallbackGraph) {
|
||||
const auto simple_script = R"JIT(
|
||||
def forward(self, a, b):
|
||||
return (a + b).relu().tanh()
|
||||
)JIT";
|
||||
|
||||
Module m("module");
|
||||
m.define(simple_script);
|
||||
|
||||
StaticModuleOptions opts; // start with the defaults.
|
||||
opts.enable_tensorexpr_fusion = true;
|
||||
|
||||
auto sample_input1 = at::randn({2, 3});
|
||||
auto sample_input2 = at::ones({2, 3});
|
||||
auto smodule = StaticModule(
|
||||
m, /* is_frozen */ false, opts, {sample_input1, sample_input2});
|
||||
|
||||
StaticRuntime runtime(smodule);
|
||||
|
||||
// The sample inputs above were contiguous. Now, use a strided input
|
||||
// to trigger running the fallback graph.
|
||||
{
|
||||
auto input1 = at::narrow(at::randn({2, 6}), 1, 0, 3);
|
||||
auto input2 = at::ones({2, 3});
|
||||
auto expect = at::tanh(at::relu(input1 + input2));
|
||||
auto actual = runtime({input1, input2}, {});
|
||||
EXPECT_TRUE(at::allclose(expect, actual.toTensor()));
|
||||
}
|
||||
|
||||
// Test with strided inputs of different size.
|
||||
{
|
||||
auto input1 = at::narrow(at::randn({10, 30}), 1, 0, 25);
|
||||
auto input2 = at::randn({10, 25});
|
||||
auto expect = at::tanh(at::relu(input1 + input2));
|
||||
auto actual = runtime({input1, input2}, {});
|
||||
EXPECT_TRUE(at::allclose(expect, actual.toTensor()));
|
||||
}
|
||||
}
|
||||
|
|
@ -180,35 +180,48 @@ class vkRunner final : public Runner<T> {
|
|||
virtual c10::IValue run(
|
||||
T& module,
|
||||
const std::vector<c10::IValue>& inputs) override {
|
||||
// Upload the input tensor(s) to GPU memory.
|
||||
inputs_.clear();
|
||||
inputs_.reserve(inputs.size());
|
||||
for (const auto& input : inputs) {
|
||||
if (input.isTensor()) {
|
||||
inputs_.emplace_back(input.toTensor().vulkan());
|
||||
}
|
||||
else if (input.isList()) {
|
||||
const c10::List<c10::IValue> input_as_list = input.toList();
|
||||
c10::List<at::Tensor> input_vk_list;
|
||||
input_vk_list.reserve(input_as_list.size());
|
||||
for (int i=0; i < input_as_list.size(); ++i) {
|
||||
const c10::IValue element = input_as_list.get(i);
|
||||
if (element.isTensor()) {
|
||||
input_vk_list.emplace_back(element.toTensor().vulkan());
|
||||
}
|
||||
else {
|
||||
CAFFE_THROW("Input of type c10::List must only contain Tensors!");
|
||||
}
|
||||
|
||||
if (inputs_.size() == 0) {
|
||||
// Upload the input tensor(s) to GPU memory.
|
||||
inputs_.clear();
|
||||
inputs_.reserve(inputs.size());
|
||||
for (const auto& input : inputs) {
|
||||
if (input.isTensor()) {
|
||||
inputs_.emplace_back(at::rand(input.toTensor().sizes()).vulkan());
|
||||
}
|
||||
else if (input.isTensorList()) {
|
||||
const c10::List<at::Tensor> input_as_list = input.toTensorList();
|
||||
c10::List<at::Tensor> input_vk_list;
|
||||
input_vk_list.reserve(input_as_list.size());
|
||||
for (int i=0; i < input_as_list.size(); ++i) {
|
||||
const at::Tensor element = input_as_list.get(i);
|
||||
input_vk_list.emplace_back(at::rand(element.sizes()).vulkan());
|
||||
}
|
||||
inputs_.emplace_back(c10::IValue(input_vk_list));
|
||||
}
|
||||
else {
|
||||
CAFFE_THROW("Inputs must only contain IValues of type c10::Tensor or c10::TensorList!");
|
||||
}
|
||||
inputs_.emplace_back(c10::IValue(input_vk_list));
|
||||
}
|
||||
else {
|
||||
CAFFE_THROW("Inputs must only contain IValues of type c10::Tensor or c10::List!");
|
||||
}
|
||||
}
|
||||
|
||||
// Run, and download the output tensor to system memory.
|
||||
return module.forward(inputs_).toTensor().cpu();
|
||||
c10::IValue output = module.forward(inputs_);
|
||||
if (output.isTensor()) {
|
||||
return output.toTensor().cpu();
|
||||
}
|
||||
else if (output.isTensorList()) {
|
||||
return output.toTensorList().get(0).cpu();
|
||||
}
|
||||
else if (output.isList()) {
|
||||
return output.toList().get(0).toTensor().cpu();
|
||||
}
|
||||
else if (output.isTuple()) {
|
||||
return output.toTuple()->elements()[0].toTensor().cpu();
|
||||
}
|
||||
else {
|
||||
CAFFE_THROW("Outputs must only be either c10::Tensor or c10::TensorList!");
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class BisectPercentileOp final : public Operator<Context> {
|
|||
pct_upper_.size(),
|
||||
"Feature (raw) data and upper bound dimension should match.");
|
||||
n_features = pct_lens_.size();
|
||||
index.reserve(n_features + 1);
|
||||
index.resize(n_features + 1);
|
||||
index[0] = 0;
|
||||
for (int i = 1; i <= n_features; ++i) {
|
||||
index[i] = index[i - 1] + pct_lens_[i - 1];
|
||||
|
|
@ -115,13 +115,10 @@ class BisectPercentileOp final : public Operator<Context> {
|
|||
int lo,
|
||||
int hi,
|
||||
float val) {
|
||||
int mid;
|
||||
bool low_cond, high_cond;
|
||||
|
||||
while (lo < hi) {
|
||||
mid = (lo + hi) >> 1;
|
||||
low_cond = (data[mid] <= val);
|
||||
high_cond = (val < data[mid + 1]);
|
||||
const auto mid = lo + (hi - lo) / 2;
|
||||
const bool low_cond = (data[mid] <= val);
|
||||
const bool high_cond = (val < data[mid + 1]);
|
||||
if (low_cond && high_cond) {
|
||||
return mid;
|
||||
} else if (!low_cond) {
|
||||
|
|
|
|||
|
|
@ -1,13 +1,16 @@
|
|||
import errno
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
from collections import namedtuple
|
||||
from typing import List
|
||||
|
||||
import caffe2.python.hypothesis_test_util as htu
|
||||
import hypothesis.strategies as st
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from caffe2.python import core, test_util, workspace, model_helper, brew
|
||||
from hypothesis import given, settings
|
||||
|
|
@ -783,8 +786,7 @@ class MyModule(torch.jit.ScriptModule):
|
|||
return x + y + z
|
||||
|
||||
@torch.jit.script_method
|
||||
def multi_input_tensor_list(self, tensor_list): # pyre-ignore: PT type annotations
|
||||
# type: (List[Tensor]) -> Tensor
|
||||
def multi_input_tensor_list(self, tensor_list: List[Tensor]) -> Tensor:
|
||||
return tensor_list[0] + tensor_list[1] + tensor_list[2]
|
||||
|
||||
@torch.jit.script_method
|
||||
|
|
|
|||
|
|
@ -115,11 +115,13 @@ constexpr uint64_t kMinProducedFileFormatVersion = 0x3L;
|
|||
// torchscript constant table. Also update tensor storage schema adapting to
|
||||
// the unify format, the root key of tensor storage is updated from {index} to
|
||||
// {the_pointer_value_the_tensor.storage}, for example:
|
||||
// `140245072983168.storage` Forward-compatibility change. 0x6L: Implicit
|
||||
// opereator versioning using number of specified argument. Refer to the
|
||||
// summary of https://github.com/pytorch/pytorch/pull/56845 for details. 0x7L:
|
||||
// Enable support for operators with default arguments plus out arguments.
|
||||
// 0x8L: Emit promoted operators as instructions
|
||||
// `140245072983168.storage` Forward-compatibility change.
|
||||
// 0x6L: Implicit opereator versioning using number of specified argument.
|
||||
// Refer to the summary of https://github.com/pytorch/pytorch/pull/56845 for details.
|
||||
// 0x7L: Enable support for operators with default arguments plus out arguments.
|
||||
// Refer. See https://github.com/pytorch/pytorch/pull/63651 for details
|
||||
// 0x8L: Emit promoted operators as instructions.
|
||||
// See https://github.com/pytorch/pytorch/pull/71662 for details
|
||||
constexpr uint64_t kProducedBytecodeVersion = 0x8L;
|
||||
|
||||
// static_assert(
|
||||
|
|
|
|||
|
|
@ -593,6 +593,7 @@ Tensor class reference
|
|||
Tensor.scatter_
|
||||
Tensor.scatter_add_
|
||||
Tensor.scatter_add
|
||||
Tensor.scatter_reduce
|
||||
Tensor.select
|
||||
Tensor.select_scatter
|
||||
Tensor.set_
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ Indexing, Slicing, Joining, Mutating Ops
|
|||
select_scatter
|
||||
slice_scatter
|
||||
scatter_add
|
||||
scatter_reduce
|
||||
split
|
||||
squeeze
|
||||
stack
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <torch/csrc/jit/ir/ir.h>
|
||||
#include <torch/csrc/jit/ir/irparser.h>
|
||||
#include <torch/csrc/jit/passes/tensorexpr_fuser.h>
|
||||
#include <torch/csrc/jit/runtime/interpreter.h>
|
||||
#include <torch/csrc/jit/testing/file_check.h>
|
||||
#include <sstream>
|
||||
|
||||
|
|
@ -350,5 +351,52 @@ TEST(TEFuserPass, FuserPass_WhereList) {
|
|||
testing::FileCheck().check_not("prim::TensorExprGroup")->run(*g);
|
||||
}
|
||||
|
||||
TEST(TEFuserPass, DynamicShapeFusion) {
|
||||
WithCPUFuser cf;
|
||||
const auto graph_string = R"IR(
|
||||
graph(%0 : Float(10, 5, strides=[5, 1], device=cpu),
|
||||
%1 : Float(10, 5, strides=[5, 1], device=cpu)):
|
||||
%2 : Float(10, 5, strides=[5, 1], device=cpu) = aten::mul(%0, %1)
|
||||
%3 : Float(10, 5, strides=[5, 1], device=cpu) = aten::mul(%2, %1)
|
||||
return (%3))IR";
|
||||
auto g = std::make_shared<Graph>();
|
||||
torch::jit::parseIR(graph_string, g.get());
|
||||
|
||||
g->lint();
|
||||
FuseTensorExprs(
|
||||
g,
|
||||
/* min_group_size = */ 2,
|
||||
/* add_composed_op = */ true,
|
||||
/* fuse_to_dynamic_shapes = */ true);
|
||||
Code code(g, "");
|
||||
|
||||
testing::FileCheck()
|
||||
.check("prim::TensorExprDynamicGroup_")
|
||||
->check("prim::TensorExprDynamicGuard")
|
||||
->check("prim::TensorExprGroup_")
|
||||
->run(*g);
|
||||
|
||||
auto run_and_compare = [&](const std::vector<at::Tensor>& inputs) {
|
||||
TORCH_INTERNAL_ASSERT(inputs.size() == 2);
|
||||
|
||||
auto ref = at::mul(at::mul(inputs[0], inputs[1]), inputs[1]);
|
||||
|
||||
InterpreterState interp(code);
|
||||
Stack stack(inputs.begin(), inputs.end());
|
||||
interp.run(stack);
|
||||
at::Tensor out = pop(stack).toTensor();
|
||||
ASSERT_TRUE(at::allclose(out, ref));
|
||||
};
|
||||
|
||||
std::vector<at::Tensor> inputs = {at::rand({10, 5}), at::rand({10, 5})};
|
||||
run_and_compare(inputs);
|
||||
|
||||
std::vector<at::Tensor> inputs2 = {at::rand({20, 5}), at::rand({20, 5})};
|
||||
run_and_compare(inputs2);
|
||||
|
||||
std::vector<at::Tensor> inputs3 = {at::rand({25, 60}), at::rand({25, 60})};
|
||||
run_and_compare(inputs3);
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
} // namespace torch
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ from torch.distributed.algorithms.join import Join, Joinable, JoinHook
|
|||
from torch.distributed.optim import ZeroRedundancyOptimizer
|
||||
from torch.distributed.optim.zero_redundancy_optimizer import _broadcast_object
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.optim import SGD
|
||||
from torch.optim import SGD, AdamW
|
||||
from torch.testing._internal import common_distributed, common_utils
|
||||
from torch.testing._internal.common_utils import (
|
||||
TEST_WITH_ASAN,
|
||||
|
|
@ -249,27 +249,54 @@ class TestZeroRedundancyOptimizerSingleRank(TestZeroRedundancyOptimizer):
|
|||
|
||||
def test_constructor(self):
|
||||
"""Check the robustness of the ZeroRedundancyOptimizer constructor by
|
||||
passing different values for `params`"""
|
||||
passing different values for the ``params`` argument."""
|
||||
self.dist_init(self.rank)
|
||||
|
||||
m = torch.nn.Linear(1, 1)
|
||||
# (input, expected error)
|
||||
inputs = [
|
||||
m = torch.nn.Sequential(
|
||||
torch.nn.Linear(5, 10),
|
||||
torch.nn.Linear(10, 10),
|
||||
torch.nn.Linear(10, 10),
|
||||
)
|
||||
|
||||
# Test various constructor inputs in the form: (input, expected error)
|
||||
ctor_inputs = [
|
||||
([], ValueError), # empty parameter list
|
||||
(torch.randn(1), TypeError), # non-iterable: `torch.Tensor`
|
||||
(1.2, TypeError), # non-iterable: `float`
|
||||
([{"params": m.parameters()}], TypeError), # iterable of dict
|
||||
(list(m.parameters()) + [42], TypeError), # iterable containing non-`torch.Tensor`
|
||||
([
|
||||
{"params": [l.weight for l in m]},
|
||||
{"params": [l.bias for l in m]},
|
||||
], None), # iterable of dict
|
||||
(list(m.parameters()) + [42], TypeError), # iterable containing invalid type
|
||||
(m.parameters(), None), # `params` as a generator
|
||||
(list(m.parameters()), None) # `params` as a list
|
||||
]
|
||||
|
||||
for input, error in inputs:
|
||||
if (error):
|
||||
for ctor_input, error in ctor_inputs:
|
||||
if error:
|
||||
with self.assertRaises(error):
|
||||
ZeroRedundancyOptimizer(input, optimizer_class=SGD, lr=0.1)
|
||||
ZeroRedundancyOptimizer(ctor_input, optimizer_class=SGD, lr=0.01)
|
||||
else:
|
||||
ZeroRedundancyOptimizer(input, optimizer_class=SGD, lr=0.1)
|
||||
ZeroRedundancyOptimizer(ctor_input, optimizer_class=SGD, lr=0.01)
|
||||
|
||||
# Test constructing with multiple parameter groups more thoroughly
|
||||
weight_decay = 0.01
|
||||
lr = 0.01
|
||||
betas = (0.9, 0.999)
|
||||
eps = 1e-8
|
||||
params = [
|
||||
{"params": [l.weight for l in m], "weight_decay": 0.},
|
||||
{"params": [l.bias for l in m], "weight_decay": weight_decay},
|
||||
]
|
||||
o = ZeroRedundancyOptimizer(
|
||||
params, optimizer_class=AdamW,
|
||||
lr=lr, betas=betas, eps=eps,
|
||||
)
|
||||
assert len(o.param_groups) == 2, \
|
||||
f"Expected 2 ZeRO param groups, but got {len(o.param_groups)}"
|
||||
assert len(o.optim.param_groups) == 2, \
|
||||
"Expected 2 local optimizer param groups, but got " \
|
||||
f"{len(o.optim.param_groups)}"
|
||||
|
||||
def test_same_dense_param_type(self):
|
||||
"""Check that ZeroRedundancyOptimizer raises an exception if the input
|
||||
|
|
@ -459,7 +486,76 @@ class TestZeroRedundancyOptimizerDistributed(TestZeroRedundancyOptimizer):
|
|||
all_trainable()
|
||||
some_trainable()
|
||||
|
||||
@common_distributed.skip_if_no_gpu
|
||||
def test_multiple_param_groups(self):
|
||||
"""
|
||||
Tests parity between constructing ZeRO with multiple parameter groups
|
||||
upfront versus adding parameter groups to ZeRO after construction
|
||||
versus a non-sharded optimizer.
|
||||
"""
|
||||
self.dist_init(self.rank)
|
||||
|
||||
model1 = torch.nn.Sequential(
|
||||
torch.nn.Linear(5, 10),
|
||||
torch.nn.Linear(10, 10),
|
||||
torch.nn.Linear(10, 5),
|
||||
)
|
||||
model2 = copy.deepcopy(model1)
|
||||
model3 = copy.deepcopy(model1)
|
||||
model1 = model1.to(self.device)
|
||||
model2 = model2.to(self.device)
|
||||
model3 = model3.to(self.device)
|
||||
|
||||
batch_size = 8
|
||||
num_iters = 3
|
||||
inputs = [
|
||||
torch.randn(batch_size, 5).to(self.device) for _ in range(num_iters)
|
||||
]
|
||||
wd = 0.01
|
||||
lr = 0.01
|
||||
# Construct `optim1` with both parameter groups upfront
|
||||
optim1 = ZeroRedundancyOptimizer(
|
||||
[
|
||||
{"params": [l.weight for l in model1], "weight_decay": 0.},
|
||||
{"params": [l.bias for l in model1], "weight_decay": wd},
|
||||
],
|
||||
optimizer_class=AdamW, lr=lr,
|
||||
)
|
||||
# Construct `optim2` by adding the second parameter after
|
||||
optim2 = ZeroRedundancyOptimizer(
|
||||
[l.weight for l in model2],
|
||||
optimizer_class=AdamW, lr=lr, weight_decay=0.,
|
||||
)
|
||||
optim2.add_param_group(
|
||||
{"params": [l.bias for l in model2], "weight_decay": wd}
|
||||
)
|
||||
# Construct `optim3` as a non-sharded optimizer
|
||||
optim3 = AdamW(
|
||||
[
|
||||
{"params": [l.weight for l in model3], "weight_decay": 0.},
|
||||
{"params": [l.bias for l in model3], "weight_decay": wd},
|
||||
], lr=lr,
|
||||
)
|
||||
|
||||
# Check parity over a few iterations
|
||||
for iter in range(num_iters):
|
||||
for model, optim in (
|
||||
(model1, optim1), (model2, optim2), (model3, optim3),
|
||||
):
|
||||
optim.zero_grad()
|
||||
out = model(inputs[iter])
|
||||
loss = out.sum()
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
for layer1, layer2, layer3 in zip(model1, model2, model3):
|
||||
assert torch.allclose(layer1.weight, layer2.weight)
|
||||
assert torch.allclose(layer1.weight, layer3.weight)
|
||||
assert torch.allclose(layer1.bias, layer2.bias)
|
||||
assert torch.allclose(layer1.bias, layer3.bias)
|
||||
|
||||
@common_distributed.skip_if_lt_x_gpu(2)
|
||||
@common_distributed.skip_if_rocm
|
||||
def test_collect_shards(self):
|
||||
""" Check the state consolidation mechanism, and the state dict exposed by ZeroRedundancyOptimizer"""
|
||||
self.dist_init(self.rank)
|
||||
|
|
|
|||
|
|
@ -106,7 +106,8 @@ ALLOW_LIST = [
|
|||
("aten::_scatter_reduce", datetime.date(2022, 1, 31)),
|
||||
("aten::native_multi_head_self_attention", datetime.date(9999, 1, 1)),
|
||||
("aten::_native_multi_head_self_attention", datetime.date(9999, 1, 1)),
|
||||
("aten::scatter_reduce.two", datetime.date(2022, 3, 15)),
|
||||
("aten::_transform_bias_rescale_qkv", datetime.date(9999, 1, 1)),
|
||||
("aten::_scatter_reduce.two", datetime.date(9999, 1, 1)),
|
||||
]
|
||||
|
||||
ALLOW_LIST_COMPILED = [
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ from collections import OrderedDict
|
|||
from torch.nn.utils.rnn import PackedSequence
|
||||
from torch.onnx import CheckerError, register_custom_op_symbolic, unregister_custom_op_symbolic
|
||||
from torch.onnx.symbolic_helper import _unimplemented
|
||||
from torch.onnx.utils import unpack_quantized_tensor
|
||||
|
||||
|
||||
def flatten_tuples(elem):
|
||||
|
|
@ -108,9 +109,16 @@ def inline_flatten_list(inputs, res_list):
|
|||
return res_list
|
||||
|
||||
|
||||
def unpack_to_numpy(value):
|
||||
value_unpacked = []
|
||||
for value_ in value:
|
||||
value_unpacked.extend(unpack_quantized_tensor(value_))
|
||||
value_final = [to_numpy(v) for v in value_unpacked]
|
||||
return value_final
|
||||
|
||||
|
||||
def run_ort(ort_sess, input):
|
||||
input = flatten_tuples(input)
|
||||
input = to_numpy(input)
|
||||
input = unpack_to_numpy(flatten_tuples(input))
|
||||
ort_inputs = dict((ort_sess.get_inputs()[i].name, input) for i, input in enumerate(input))
|
||||
ort_outs = ort_sess.run(None, ort_inputs)
|
||||
return inline_flatten_list(ort_outs, [])
|
||||
|
|
@ -118,7 +126,7 @@ def run_ort(ort_sess, input):
|
|||
|
||||
def ort_compare_with_pytorch(ort_outs, output, rtol, atol):
|
||||
output, _ = torch.jit._flatten(output)
|
||||
outputs = [to_numpy(outp) for outp in output]
|
||||
outputs = unpack_to_numpy(output)
|
||||
|
||||
# compare onnxruntime and PyTorch results
|
||||
assert len(outputs) == len(ort_outs), "number of outputs differ"
|
||||
|
|
@ -5895,7 +5903,24 @@ class TestONNXRuntime(unittest.TestCase):
|
|||
return torch.pixel_shuffle(x, upscale_factor=2)
|
||||
|
||||
x = torch.randn(2, 16, 4, 3, requires_grad=True)
|
||||
y = torch.randn(4, 32, 8, 4, requires_grad=True)
|
||||
self.run_test(PixelShuffle(), x)
|
||||
self.run_test(PixelShuffle(), x, input_names=["x"],
|
||||
dynamic_axes={"x": [0, 1, 2, 3]},
|
||||
test_with_inputs=[y])
|
||||
|
||||
@skipIfUnsupportedMinOpsetVersion(9)
|
||||
def test_pixel_unshuffle(self):
|
||||
class PixelUnshuffle(torch.nn.Module):
|
||||
def forward(self, x):
|
||||
return torch.pixel_unshuffle(x, downscale_factor=2)
|
||||
|
||||
x = torch.randn(2, 16, 4, 6, requires_grad=True)
|
||||
y = torch.randn(4, 32, 8, 4, requires_grad=True)
|
||||
self.run_test(PixelUnshuffle(), x)
|
||||
self.run_test(PixelUnshuffle(), x, input_names=["x"],
|
||||
dynamic_axes={"x": [0, 1, 2, 3]},
|
||||
test_with_inputs=[y])
|
||||
|
||||
@skipIfUnsupportedMinOpsetVersion(9)
|
||||
def test_reciprocal(self):
|
||||
|
|
@ -6924,6 +6949,128 @@ class TestONNXRuntime(unittest.TestCase):
|
|||
x = torch.randn(2, 3, 5, 5)
|
||||
self.run_test(Det(), x)
|
||||
|
||||
def test_linalg_norm(self):
|
||||
class LinalgSingleDimModel(torch.nn.Module):
|
||||
def __init__(self, ord_val):
|
||||
super(LinalgSingleDimModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.norm(x, ord=self.ord, dim=1)
|
||||
|
||||
x = torch.randn(2, 3, 5, 5)
|
||||
self.run_test(LinalgSingleDimModel(None), x)
|
||||
self.run_test(LinalgSingleDimModel(2), x)
|
||||
self.run_test(LinalgSingleDimModel(float('inf')), x)
|
||||
self.run_test(LinalgSingleDimModel(-float('inf')), x)
|
||||
self.run_test(LinalgSingleDimModel(-4), x)
|
||||
self.run_test(LinalgSingleDimModel(1.5), x)
|
||||
|
||||
class LinalgMultiDimModel(torch.nn.Module):
|
||||
def __init__(self, ord_val):
|
||||
super(LinalgMultiDimModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.norm(x, ord=self.ord, dim=(0, 2))
|
||||
|
||||
x = torch.randn(2, 3, 5, 5)
|
||||
self.run_test(LinalgMultiDimModel('fro'), x)
|
||||
self.run_test(LinalgMultiDimModel(float('inf')), x)
|
||||
self.run_test(LinalgMultiDimModel(-float('inf')), x)
|
||||
self.run_test(LinalgMultiDimModel(1), x)
|
||||
self.run_test(LinalgMultiDimModel(-1), x)
|
||||
|
||||
class LinalgNoDimNoOrdModel(torch.nn.Module):
|
||||
def forward(self, x):
|
||||
return torch.linalg.norm(x)
|
||||
|
||||
x = torch.randn(2, 3, 5, 5)
|
||||
self.run_test(LinalgNoDimNoOrdModel(), x)
|
||||
y = torch.randn(2, 3)
|
||||
self.run_test(LinalgNoDimNoOrdModel(), y)
|
||||
z = torch.randn(2)
|
||||
self.run_test(LinalgNoDimNoOrdModel(), z)
|
||||
|
||||
class LinalgNoDim1DModel(torch.nn.Module):
|
||||
def __init__(self, ord_val):
|
||||
super(LinalgNoDim1DModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.norm(x, ord=self.ord)
|
||||
|
||||
x = torch.randn(2)
|
||||
self.run_test(LinalgNoDim1DModel(None), x)
|
||||
self.run_test(LinalgNoDim1DModel(2), x)
|
||||
self.run_test(LinalgNoDim1DModel(float('inf')), x)
|
||||
self.run_test(LinalgNoDim1DModel(-float('inf')), x)
|
||||
self.run_test(LinalgNoDim1DModel(-4), x)
|
||||
self.run_test(LinalgNoDim1DModel(1.5), x)
|
||||
|
||||
class LinalgNoDim2DModel(torch.nn.Module):
|
||||
def __init__(self, ord_val):
|
||||
super(LinalgNoDim2DModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.norm(x, ord=self.ord)
|
||||
|
||||
x = torch.randn(2, 3)
|
||||
self.run_test(LinalgNoDim2DModel('fro'), x)
|
||||
self.run_test(LinalgNoDim2DModel(float('inf')), x)
|
||||
self.run_test(LinalgNoDim2DModel(-float('inf')), x)
|
||||
self.run_test(LinalgNoDim2DModel(1), x)
|
||||
self.run_test(LinalgNoDim2DModel(-1), x)
|
||||
|
||||
@skipIfUnsupportedMinOpsetVersion(11)
|
||||
def test_linalg_vector_norm_zero(self):
|
||||
class LinalgVectorNormModel(torch.nn.Module):
|
||||
def __init__(self, ord_val):
|
||||
super(LinalgVectorNormModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.vector_norm(x, ord=self.ord)
|
||||
|
||||
x = torch.randn(2, 3, 5, 5)
|
||||
self.run_test(LinalgVectorNormModel(0), x)
|
||||
|
||||
def test_linalg_vector_norm(self):
|
||||
class LinalgVectorNormModel(torch.nn.Module):
|
||||
def __init__(self, ord_val, dim_info):
|
||||
super(LinalgVectorNormModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
self.dim, self.keepdim = dim_info
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.vector_norm(x, ord=self.ord, dim=self.dim, keepdim=self.keepdim)
|
||||
|
||||
x = torch.randn(2, 3, 5, 5)
|
||||
ord_options = [2, float('inf'), -float('inf'), -4, 1.5]
|
||||
dim_options = [(None, False), (1, False), ((1, 2), False), ((1, 2), True)]
|
||||
for ord_val in ord_options:
|
||||
for dim_info in dim_options:
|
||||
self.run_test(LinalgVectorNormModel(ord_val, dim_info), x)
|
||||
|
||||
def test_linalg_matrix_norm(self):
|
||||
class LinalgMatrixNormModel(torch.nn.Module):
|
||||
def __init__(self, ord_val, dim_val=(-2, -1), keepdim_val=False):
|
||||
super(LinalgMatrixNormModel, self).__init__()
|
||||
self.ord = ord_val
|
||||
self.dim = dim_val
|
||||
self.keepdim = keepdim_val
|
||||
|
||||
def forward(self, x):
|
||||
return torch.linalg.matrix_norm(x, ord=self.ord, dim=self.dim, keepdim=self.keepdim)
|
||||
|
||||
x = torch.randn(2, 3, 5, 5)
|
||||
ord_options = ['fro', float('inf'), -float('inf'), 1, -1]
|
||||
for ord_val in ord_options:
|
||||
self.run_test(LinalgMatrixNormModel(ord_val), x)
|
||||
self.run_test(LinalgMatrixNormModel(ord_val, (0, 2)), x)
|
||||
self.run_test(LinalgMatrixNormModel(ord_val, (0, 2), True), x)
|
||||
|
||||
# This test checks output scalar type in the ONNX graph should not be null
|
||||
# https://github.com/pytorch/pytorch/issues/28607
|
||||
@skipIfUnsupportedMinOpsetVersion(10)
|
||||
|
|
@ -10256,6 +10403,18 @@ class TestONNXRuntime(unittest.TestCase):
|
|||
loaded_model = onnx.load_from_string(f.getvalue())
|
||||
self.assertEqual(loaded_model.graph.output[0].type.tensor_type.shape.dim[1].dim_value, 128)
|
||||
|
||||
@skipIfUnsupportedMinOpsetVersion(10)
|
||||
def test_quantized_linear(self):
|
||||
model = torch.nn.quantized.Linear(1, 2)
|
||||
input = torch.rand(1, 1)
|
||||
input_tensor = torch.quantize_per_tensor(input, 1, 0, torch.quint8)
|
||||
# Currently, we need convert the model to ScriptModule before export.
|
||||
# The reason is that PackedParams contains int (not tensor).
|
||||
# Then it fails when the exporter calls _trace_and_get_graph_from_model().
|
||||
# TODO: https://msdata.visualstudio.com/Vienna/_workitems/edit/1547858
|
||||
self.run_test(torch.jit.trace(model, input_tensor), (input_tensor,))
|
||||
self.run_test(torch.jit.script(model), (input_tensor,))
|
||||
|
||||
def make_test(name, base, layer, bidirectional, initial_state,
|
||||
variable_length, dropout, script_test_min_opset_version,
|
||||
**extra_kwargs):
|
||||
|
|
|
|||
|
|
@ -114,5 +114,42 @@ class TestONNXShapeInference(unittest.TestCase):
|
|||
slice = g.op("Slice", input, start_input, end, axis, step)
|
||||
self.run_test(g, slice.node(), expect_tensor(None, shape=(None, None)))
|
||||
|
||||
def test_broadcast_matmul(self):
|
||||
g = self.create_empty_graph()
|
||||
constant = self.insert_tensor_constant(g, torch.ones(5, 1, 2))
|
||||
constant_2 = self.insert_tensor_constant(g, torch.ones(3, 1, 2, 1))
|
||||
shape = g.op("MatMul", constant, constant_2)
|
||||
self.run_test(g, shape.node(), expect_tensor("Float", shape=(3, 5, 1, 1)))
|
||||
|
||||
# test when first input is of rank 1
|
||||
g = self.create_empty_graph()
|
||||
constant = self.insert_tensor_constant(g, torch.ones(2))
|
||||
constant_2 = self.insert_tensor_constant(g, torch.ones(3, 1, 2, 1))
|
||||
shape = g.op("MatMul", constant, constant_2)
|
||||
self.run_test(g, shape.node(), expect_tensor("Float", shape=(3, 1, 1)))
|
||||
|
||||
# test when second input is of rank 1
|
||||
g = self.create_empty_graph()
|
||||
constant = self.insert_tensor_constant(g, torch.ones(5, 1, 2))
|
||||
constant_2 = self.insert_tensor_constant(g, torch.ones(2))
|
||||
shape = g.op("MatMul", constant, constant_2)
|
||||
self.run_test(g, shape.node(), expect_tensor("Float", shape=(5, 1)))
|
||||
|
||||
# test when both inputs are of rank 1
|
||||
g = self.create_empty_graph()
|
||||
constant = self.insert_tensor_constant(g, torch.ones(2))
|
||||
constant_2 = self.insert_tensor_constant(g, torch.ones(2))
|
||||
shape = g.op("MatMul", constant, constant_2)
|
||||
self.run_test(g, shape.node(), expect_tensor("Float", shape=()))
|
||||
|
||||
def test_expand(self):
|
||||
g = self.create_empty_graph()
|
||||
input = g.addInput()
|
||||
constant = self.insert_tensor_constant(g, torch.ones(2, 4))
|
||||
input.setType(constant.type().with_sizes([None, None]))
|
||||
shape = g.op("Shape", input)
|
||||
expand = g.op("Expand", constant, shape)
|
||||
self.run_test(g, expand.node(), expect_tensor("Float", shape=(None, None)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -853,9 +853,10 @@ class TestQuantizeDBR(QuantizeDBRTestCase):
|
|||
qconfig = torch.quantization.default_qconfig
|
||||
self._test_auto_tracing(model_fp32, qconfig, (torch.randn(1, 1, 2, 2),))
|
||||
|
||||
@unittest.skip('this depends on unsupported syntax detection, currently disabled')
|
||||
def test_vovnet_sequential(self):
|
||||
|
||||
# We cannot quantize SequentialAppendList directly because
|
||||
# AutoQuantizationStateModuleDict would appear in self.items.
|
||||
# However, we can wrap it and quantize the wrapper.
|
||||
class SequentialAppendList(nn.Sequential):
|
||||
def __init__(self, *args):
|
||||
super(SequentialAppendList, self).__init__(*args)
|
||||
|
|
@ -870,7 +871,16 @@ class TestQuantizeDBR(QuantizeDBRTestCase):
|
|||
x = torch.cat(concat_list, dim=1)
|
||||
return x
|
||||
|
||||
m = SequentialAppendList(torch.nn.Conv2d(1, 1, 1)).eval()
|
||||
class Wrapper(nn.Module):
|
||||
def __init__(self, *args):
|
||||
super().__init__()
|
||||
self.append_list = SequentialAppendList(*args)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.append_list(x)
|
||||
return x
|
||||
|
||||
m = Wrapper(torch.nn.Conv2d(1, 1, 1)).eval()
|
||||
qconfig = torch.quantization.default_qconfig
|
||||
self._test_auto_tracing(m, qconfig, (torch.randn(1, 1, 1, 1),))
|
||||
|
||||
|
|
@ -922,10 +932,11 @@ class TestQuantizeDBR(QuantizeDBRTestCase):
|
|||
model_fp32, qconfig, (torch.randn(1, 1, 2, 2),),
|
||||
fuse_modules=False)
|
||||
|
||||
# this is broken because AutoQuantizationState appears in self.items
|
||||
@unittest.skip('TODO fix this')
|
||||
def test_module_calls_items(self):
|
||||
class M(torch.nn.ModuleDict):
|
||||
# We cannot quantize M1 directly because
|
||||
# AutoQuantizationStateModuleDict would appear in self.items.
|
||||
# However, we can wrap it and quantize the wrapper.
|
||||
class M1(torch.nn.ModuleDict):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
for i in range(2):
|
||||
|
|
@ -938,10 +949,22 @@ class TestQuantizeDBR(QuantizeDBRTestCase):
|
|||
layers.append(layer(x))
|
||||
return torch.cat(layers, dim=1)
|
||||
|
||||
model_fp32 = M().eval()
|
||||
class M2(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.m1 = M1()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.m1(x)
|
||||
return x
|
||||
|
||||
model_fp32 = M2().eval()
|
||||
qconfig = torch.quantization.default_qconfig
|
||||
self._test_auto_tracing(
|
||||
model_fp32, qconfig, (torch.randn(1, 1, 2, 2),))
|
||||
model_fp32, qconfig, (torch.randn(1, 1, 2, 2),),
|
||||
# TODO(future PR): implement observer sharing for torch.cat
|
||||
# in DBR quant, to ensure that numerical behavior matches
|
||||
do_fx_comparison=False)
|
||||
|
||||
def test_subclass_of_quantizeable_module(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.quantized as nnq
|
||||
import torch.nn.quantized._reference as nnqr
|
||||
from torch.nn.utils.rnn import PackedSequence
|
||||
from torch.ao.quantization import (
|
||||
quantize,
|
||||
|
|
@ -75,130 +74,6 @@ import unittest
|
|||
import numpy as np
|
||||
|
||||
class TestQuantizeEagerOps(QuantizationTestCase):
|
||||
def _test_reference_module_impl(self,
|
||||
float_module_class,
|
||||
quantized_module_class,
|
||||
extra_module_kwargs,
|
||||
input_size):
|
||||
class M(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv = float_module_class(**extra_module_kwargs)
|
||||
self.quant = QuantStub()
|
||||
self.dequant = DeQuantStub()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.quant(x)
|
||||
x = self.conv(x)
|
||||
x = self.dequant(x)
|
||||
return x
|
||||
|
||||
class RefM(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv = float_module_class(**extra_module_kwargs)
|
||||
self.quant1 = QuantStub()
|
||||
self.dequant1 = DeQuantStub()
|
||||
self.quant2 = QuantStub()
|
||||
self.dequant2 = DeQuantStub()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.quant1(x)
|
||||
x = self.dequant1(x)
|
||||
x = self.conv(x)
|
||||
x = self.quant2(x)
|
||||
x = self.dequant2(x)
|
||||
return x
|
||||
|
||||
qengine = 'fbgemm'
|
||||
with override_quantized_engine(qengine):
|
||||
data = torch.randn(*input_size, dtype=torch.float)
|
||||
original_m = M()
|
||||
original_ref_m = RefM()
|
||||
|
||||
original_ref_m.conv.weight = torch.nn.Parameter(original_m.conv.weight.detach())
|
||||
original_ref_m.conv.bias = torch.nn.Parameter(original_m.conv.bias.detach())
|
||||
|
||||
original_m.qconfig = torch.quantization.default_qconfig
|
||||
|
||||
m = prepare(original_m)
|
||||
# calibration
|
||||
m(data)
|
||||
m = convert(m)
|
||||
# check if the module is properly quantized
|
||||
self.assertEqual(type(m.quant), nnq.Quantize)
|
||||
self.assertEqual(type(m.conv), quantized_module_class)
|
||||
self.assertEqual(type(m.dequant), nnq.DeQuantize)
|
||||
res = m(data)
|
||||
|
||||
# quantize the reference model
|
||||
original_ref_m.eval()
|
||||
original_ref_m.qconfig = torch.quantization.default_qconfig
|
||||
|
||||
ref_m = prepare(original_ref_m)
|
||||
ref_m(data)
|
||||
reference_module_mapping = {
|
||||
QuantStub: nnq.Quantize,
|
||||
DeQuantStub: nnq.DeQuantize,
|
||||
nn.Conv1d: nnqr.Conv1d,
|
||||
nn.Conv2d: nnqr.Conv2d,
|
||||
nn.Conv3d: nnqr.Conv3d,
|
||||
nn.ConvTranspose1d: nnqr.ConvTranspose1d,
|
||||
nn.ConvTranspose2d: nnqr.ConvTranspose2d,
|
||||
nn.ConvTranspose3d: nnqr.ConvTranspose3d,
|
||||
}
|
||||
ref_m = convert(ref_m, mapping=reference_module_mapping)
|
||||
ref_res = ref_m(data)
|
||||
self.assertEqual(res, ref_res)
|
||||
|
||||
def test_conv_1d(self):
|
||||
self._test_reference_module_impl(
|
||||
nn.Conv1d,
|
||||
nnq.Conv1d,
|
||||
{'in_channels': 1, 'out_channels': 1, 'kernel_size': 1},
|
||||
(16, 1, 1)
|
||||
)
|
||||
|
||||
def test_conv_2d(self):
|
||||
self._test_reference_module_impl(
|
||||
nn.Conv2d,
|
||||
nnq.Conv2d,
|
||||
{'in_channels': 1, 'out_channels': 1, 'kernel_size': 1},
|
||||
(16, 1, 10, 10)
|
||||
)
|
||||
|
||||
def test_conv_3d(self):
|
||||
self._test_reference_module_impl(
|
||||
nn.Conv3d,
|
||||
nnq.Conv3d,
|
||||
{'in_channels': 1, 'out_channels': 1, 'kernel_size': 1},
|
||||
(16, 1, 10, 10, 10)
|
||||
)
|
||||
|
||||
def test_conv_transpose_1d(self):
|
||||
self._test_reference_module_impl(
|
||||
nn.ConvTranspose1d,
|
||||
nnq.ConvTranspose1d,
|
||||
{'in_channels': 1, 'out_channels': 1, 'kernel_size': 1},
|
||||
(16, 1, 1)
|
||||
)
|
||||
|
||||
def test_conv_transpose_2d(self):
|
||||
self._test_reference_module_impl(
|
||||
nn.ConvTranspose2d,
|
||||
nnq.ConvTranspose2d,
|
||||
{'in_channels': 1, 'out_channels': 1, 'kernel_size': 1},
|
||||
(16, 1, 10, 10)
|
||||
)
|
||||
|
||||
def test_conv_transpose_3d(self):
|
||||
self._test_reference_module_impl(
|
||||
nn.ConvTranspose3d,
|
||||
nnq.ConvTranspose3d,
|
||||
{'in_channels': 1, 'out_channels': 1, 'kernel_size': 1},
|
||||
(16, 1, 10, 10, 10)
|
||||
)
|
||||
|
||||
def _test_activation_op_impl(
|
||||
self, float_module_class, quantized_module_class, extra_module_kwargs):
|
||||
""" Implementation for testing common activation ops like leaky relu
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# Owner(s): ["oncall: quantization"]
|
||||
|
||||
import copy
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
|
@ -10,6 +11,7 @@ from torch.nn.modules.utils import _pair
|
|||
import torch.nn.quantized as nnq
|
||||
import torch.nn.quantized.dynamic as nnqd
|
||||
import torch.nn.qat as nnqat
|
||||
import torch.nn.intrinsic.qat as nniqat
|
||||
import torch.nn.qat.dynamic as nnqatd
|
||||
from torch.ao.quantization import (
|
||||
prepare,
|
||||
|
|
@ -984,6 +986,43 @@ class TestQuantizeEagerQATNumerics(QuantizationTestCase):
|
|||
qat_op_optim.step()
|
||||
qat_ref_op_optim.step()
|
||||
|
||||
@override_qengines
|
||||
def test_linear_bn_numerics(self):
|
||||
qengine = torch.backends.quantized.engine
|
||||
m_ref = nn.Sequential(
|
||||
nn.Linear(4, 4),
|
||||
nn.BatchNorm1d(4),
|
||||
)
|
||||
m_ref_copy = copy.deepcopy(m_ref)
|
||||
m_ref_copy = torch.ao.quantization.fuse_modules_qat(m_ref_copy, [['0', '1']])
|
||||
qconfig = torch.ao.quantization.get_default_qat_qconfig(qengine)
|
||||
m_ref_copy[0].qconfig = qconfig
|
||||
m = nniqat.LinearBn1d.from_float(m_ref_copy[0])
|
||||
|
||||
# without fake_quants, fused QAT module should match fp32 module
|
||||
m.apply(torch.quantization.disable_fake_quant)
|
||||
data = torch.randn(4, 4)
|
||||
r1 = m_ref(data)
|
||||
r2 = m(data)
|
||||
self.assertTrue(torch.allclose(r1, r2))
|
||||
|
||||
@override_qengines
|
||||
def test_linear_bn_workflow(self):
|
||||
qengine = torch.backends.quantized.engine
|
||||
m = nn.Sequential(
|
||||
QuantStub(),
|
||||
nn.Linear(4, 4),
|
||||
nn.BatchNorm1d(4),
|
||||
)
|
||||
data = torch.randn(4, 4)
|
||||
m.qconfig = torch.ao.quantization.get_default_qat_qconfig(qengine)
|
||||
m = torch.ao.quantization.fuse_modules_qat(m, [['1', '2']])
|
||||
mp = prepare_qat(m)
|
||||
mp(data)
|
||||
mq = convert(mp)
|
||||
self.assertTrue(type(mq[1]) == nnq.Linear)
|
||||
self.assertTrue(type(mq[2]) == nn.Identity)
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise RuntimeError("This test file is not meant to be run directly, use:\n\n"
|
||||
"\tpython test/test_quantization.py TESTNAME\n\n"
|
||||
|
|
|
|||
|
|
@ -17533,51 +17533,12 @@ class TestNNDeviceType(NNTestCase):
|
|||
)
|
||||
self.assertEqual(output_non_contig, output_contig)
|
||||
|
||||
|
||||
@onlyCUDA
|
||||
@dtypes(*itertools.product((torch.int, torch.long), (torch.int, torch.long)))
|
||||
def test_embedding_bag_bfloat16(self, device, dtypes):
|
||||
self._test_EmbeddingBag(device, 'sum', True, wdtype=torch.bfloat16, dtype=dtypes[0], odtype=dtypes[1], test_backward=True)
|
||||
self._test_EmbeddingBag(device, 'mean', True, wdtype=torch.bfloat16, dtype=dtypes[0], odtype=dtypes[1], test_backward=True)
|
||||
|
||||
@dtypesIfCUDA(torch.float)
|
||||
@dtypes(torch.float)
|
||||
def test_transform_bias_rescale_qkv(self, device, dtype):
|
||||
# TODO: debug CPU test failure with settings (48, 4, 16, 8) and add that mode
|
||||
tests = [
|
||||
(64, 4, 16, 8),
|
||||
# dim_per_head = 12 does not divide evenly by CPU vectorization length of 8
|
||||
(24, 2, 4, 2),
|
||||
# Make sure CUDA can handle small input sizes
|
||||
(2, 2, 2, 2),
|
||||
# dim_per_head = 6 does not divide evenly by CUDA vectorization length of 4, causes alignment issues
|
||||
(24, 4, 4, 2)
|
||||
]
|
||||
for (embed_dim, num_heads, sl, bs) in tests:
|
||||
x = torch.randn(sl, bs, embed_dim, device=device, dtype=dtype) * 10
|
||||
qkv = torch.nn.Linear(embed_dim, 3 * embed_dim, device=device, dtype=dtype)
|
||||
|
||||
with torch.no_grad():
|
||||
(q, k, v) = torch._transform_bias_rescale_qkv(x @ qkv.weight.t(), qkv.bias, num_head=num_heads)
|
||||
|
||||
def simple_transform_bias_rescale_qkv(qkv, bias):
|
||||
(q, k, v) = torch.split(qkv, embed_dim, dim=-1)
|
||||
(q_bias, k_bias, v_bias) = torch.split(bias, embed_dim, dim=-1)
|
||||
return tuple(
|
||||
x.reshape((sl, bs, num_heads, embed_dim // num_heads)).transpose(2, 1)
|
||||
for x in (
|
||||
(q + q_bias) / math.sqrt(embed_dim // num_heads),
|
||||
(k + k_bias),
|
||||
(v + v_bias)
|
||||
)
|
||||
)
|
||||
correct_q, correct_k, correct_v = simple_transform_bias_rescale_qkv(x @ qkv.weight.t(), qkv.bias)
|
||||
|
||||
self.assertEqual(q.size(), correct_q.size())
|
||||
self.assertTrue(torch.allclose(q, correct_q))
|
||||
self.assertTrue(torch.allclose(k, correct_k))
|
||||
self.assertTrue(torch.allclose(v, correct_v))
|
||||
|
||||
@onlyCUDA
|
||||
@dtypes(torch.half, torch.float, torch.double)
|
||||
def test_multihead_attention_dtype(self, device, dtype):
|
||||
|
|
|
|||
|
|
@ -5773,7 +5773,7 @@ class TestTorch(TestCase):
|
|||
|
||||
for reduce in reduces:
|
||||
for dim in range(len(shape)):
|
||||
output = input._scatter_reduce(dim, index, reduce, output_size=output_size)
|
||||
output = input.scatter_reduce(dim, index, reduce, output_size=output_size)
|
||||
|
||||
# Check that output is of the correct size
|
||||
output_shape = copy.copy(shape)
|
||||
|
|
@ -5807,16 +5807,16 @@ class TestTorch(TestCase):
|
|||
self.assertTrue(torch.allclose(output, expected))
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, "Expected `dim` to be in range -3 to 2"):
|
||||
torch._scatter_reduce(input, 4, index, "sum")
|
||||
torch.scatter_reduce(input, 4, index, "sum")
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, "Shape mismatch"):
|
||||
index2 = torch.randint(0, output_size, (10, ), dtype=torch.long, device=device)
|
||||
torch._scatter_reduce(input, 0, index2, "sum")
|
||||
torch.scatter_reduce(input, 0, index2, "sum")
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, "Expected `index` values to be in range 0 to 2"):
|
||||
input2 = torch.randn(10, dtype=dtype, device=device)
|
||||
index2 = torch.tensor([0, 1, 0, 1, 2, 3, 3, 4, 4, 3])
|
||||
torch._scatter_reduce(input2, 0, index2, "sum", output_size=2)
|
||||
torch.scatter_reduce(input2, 0, index2, "sum", output_size=2)
|
||||
|
||||
def test_structseq_repr(self):
|
||||
a = torch.arange(250).reshape(5, 5, 10)
|
||||
|
|
|
|||
2
third_party/fbgemm
vendored
2
third_party/fbgemm
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit 365abe3ee878b2592e9a33f937d96df0048d99dd
|
||||
Subproject commit ab3ca6647d3f4be25423c5f997256a8a219fb762
|
||||
|
|
@ -2595,6 +2595,6 @@
|
|||
- name: _efficientzerotensor(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
||||
output_differentiability: [False]
|
||||
|
||||
- name: _scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
|
||||
- name: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
|
||||
self: scatter_reduce_backward(grad, self, dim, index, reduce, result)
|
||||
index: non_differentiable
|
||||
|
|
|
|||
|
|
@ -1176,7 +1176,6 @@ aten_native_source_non_codegen_list = [
|
|||
"aten/src/ATen/native/quantized/library.cpp",
|
||||
"aten/src/ATen/quantized/QTensorImpl.cpp",
|
||||
"aten/src/ATen/quantized/Quantizer.cpp",
|
||||
"aten/src/ATen/native/attention.cpp",
|
||||
"aten/src/ATen/native/Activation.cpp",
|
||||
"aten/src/ATen/native/AdaptiveAveragePooling.cpp",
|
||||
"aten/src/ATen/native/AdaptiveAveragePooling3d.cpp",
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
21ca53c291a88b53dac85751b7a0203ca610ac94b7adaff3c092cf30df4168f2
|
||||
e1c8b97b919541a99e0a355df5c3f9e8abebc64259dbee6f8c68e1ef90582856
|
||||
|
|
@ -1 +1 @@
|
|||
5fde7bccf65032da297dfb1f18e4a95e96e278fa397e9dcaf364dfe23ec46353
|
||||
1485a242a96c737ba7cdd9f259114f2201accdb46d87ac7a8650b1a814cd4d4d
|
||||
|
|
@ -193,50 +193,45 @@ In multiline mode, each line next includes the name of a CircleCI job,
|
|||
followed by the time of the specified test in that job at that commit.
|
||||
Example:
|
||||
|
||||
$ tools/stats/test_history.py --mode=multiline --ref=594a66 --sha-length=8 --test=test_set_dir \
|
||||
--job pytorch_linux_xenial_py3_6_gcc5_4_test --job pytorch_linux_xenial_py3_6_gcc7_test
|
||||
2021-02-10 11:13:34Z 594a66d7 pytorch_linux_xenial_py3_6_gcc5_4_test 0.36s
|
||||
2021-02-10 11:13:34Z 594a66d7 pytorch_linux_xenial_py3_6_gcc7_test 0.573s errored
|
||||
2021-02-10 10:13:25Z 9c0caf03 pytorch_linux_xenial_py3_6_gcc5_4_test 0.819s
|
||||
2021-02-10 10:13:25Z 9c0caf03 pytorch_linux_xenial_py3_6_gcc7_test 0.449s
|
||||
2021-02-10 10:09:14Z 602434bc pytorch_linux_xenial_py3_6_gcc5_4_test 0.361s
|
||||
2021-02-10 10:09:14Z 602434bc pytorch_linux_xenial_py3_6_gcc7_test 0.454s
|
||||
2021-02-10 10:09:10Z 2e35fe95 (no reports in S3)
|
||||
2021-02-10 10:09:07Z ff73be7e (no reports in S3)
|
||||
2021-02-10 10:05:39Z 74082f0d (no reports in S3)
|
||||
2021-02-10 07:42:29Z 0620c96f pytorch_linux_xenial_py3_6_gcc5_4_test 0.414s
|
||||
2021-02-10 07:42:29Z 0620c96f pytorch_linux_xenial_py3_6_gcc5_4_test 0.476s
|
||||
2021-02-10 07:42:29Z 0620c96f pytorch_linux_xenial_py3_6_gcc7_test 0.377s
|
||||
2021-02-10 07:42:29Z 0620c96f pytorch_linux_xenial_py3_6_gcc7_test 0.326s
|
||||
$ tools/stats/test_history.py --mode=multiline --ref=86a961af879 --sha-length=8 \
|
||||
--test=test_composite_compliance_dot_cpu_float32 \
|
||||
--job linux-xenial-py3.7-gcc5.4-test-default1 --job linux-xenial-py3.7-gcc7-test-default1
|
||||
2022-02-18 15:47:37Z 86a961af linux-xenial-py3.7-gcc5.4-test-default1 0.001s
|
||||
2022-02-18 15:47:37Z 86a961af linux-xenial-py3.7-gcc7-test-default1 0.001s
|
||||
2022-02-18 15:12:34Z f5e201e4 linux-xenial-py3.7-gcc5.4-test-default1 0.001s
|
||||
2022-02-18 15:12:34Z f5e201e4 linux-xenial-py3.7-gcc7-test-default1 0.001s
|
||||
2022-02-18 13:14:56Z 1c0df265 linux-xenial-py3.7-gcc5.4-test-default1 0.001s
|
||||
2022-02-18 13:14:56Z 1c0df265 linux-xenial-py3.7-gcc7-test-default1 0.001s
|
||||
2022-02-18 13:14:56Z e73eaffd (no reports in S3)
|
||||
2022-02-18 06:29:12Z 710f12f5 linux-xenial-py3.7-gcc5.4-test-default1 0.001s
|
||||
|
||||
Another multiline example, this time with the --all flag:
|
||||
|
||||
$ tools/stats/test_history.py --mode=multiline --all --ref=321b9 --delta=12 --sha-length=8 \
|
||||
--test=test_qr_square_many_batched_complex_cuda
|
||||
2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2 424.284s
|
||||
2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test 0.006s skipped
|
||||
2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_test 402.572s
|
||||
2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test 287.164s
|
||||
2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2 436.732s
|
||||
2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test 0.006s skipped
|
||||
2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_test 407.616s
|
||||
2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test 287.044s
|
||||
$ tools/stats/test_history.py --mode=multiline --all --ref=86a961af879 --delta=12 --sha-length=8 \
|
||||
--test=test_composite_compliance_dot_cuda_float32
|
||||
2022-02-18 03:49:46Z 69389fb5 linux-bionic-cuda10.2-py3.9-gcc7-test-default1 0.001s skipped
|
||||
2022-02-18 03:49:46Z 69389fb5 linux-bionic-cuda10.2-py3.9-gcc7-test-slow1 0.001s skipped
|
||||
2022-02-18 03:49:46Z 69389fb5 linux-xenial-cuda11.3-py3.7-gcc7-test-default1 0.001s skipped
|
||||
2022-02-18 03:49:46Z 69389fb5 periodic-linux-bionic-cuda11.5-py3.7-gcc7-test-default1 0.001s skipped
|
||||
2022-02-18 03:49:46Z 69389fb5 periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test-default1 0.001s skipped
|
||||
2022-02-18 03:49:46Z 69389fb5 periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test-default1 0.001s skipped
|
||||
|
||||
In columns mode, the name of the job isn't printed, but the order of the
|
||||
columns is guaranteed to match the order of the jobs passed on the
|
||||
command line. Example:
|
||||
|
||||
$ tools/stats/test_history.py --mode=columns --ref=3cf783 --sha-length=8 --test=test_set_dir \
|
||||
--job pytorch_linux_xenial_py3_6_gcc5_4_test --job pytorch_linux_xenial_py3_6_gcc7_test
|
||||
2021-02-10 12:18:50Z 3cf78395 0.644s 0.312s
|
||||
2021-02-10 11:13:34Z 594a66d7 0.360s errored
|
||||
2021-02-10 10:13:25Z 9c0caf03 0.819s 0.449s
|
||||
2021-02-10 10:09:14Z 602434bc 0.361s 0.454s
|
||||
2021-02-10 10:09:10Z 2e35fe95
|
||||
2021-02-10 10:09:07Z ff73be7e
|
||||
2021-02-10 10:05:39Z 74082f0d
|
||||
2021-02-10 07:42:29Z 0620c96f 0.414s 0.377s (2 job re-runs omitted)
|
||||
2021-02-10 07:27:53Z 33afb5f1 0.381s 0.294s
|
||||
$ tools/stats/test_history.py --mode=columns --ref=86a961af879 --sha-length=8 \
|
||||
--test=test_composite_compliance_dot_cpu_float32 \
|
||||
--job linux-xenial-py3.7-gcc5.4-test-default1 --job linux-xenial-py3.7-gcc7-test-default1
|
||||
2022-02-18 15:47:37Z 86a961af 0.001s 0.001s
|
||||
2022-02-18 15:12:34Z f5e201e4 0.001s 0.001s
|
||||
2022-02-18 13:14:56Z 1c0df265 0.001s 0.001s
|
||||
2022-02-18 13:14:56Z e73eaffd
|
||||
2022-02-18 06:29:12Z 710f12f5 0.001s 0.001s
|
||||
2022-02-18 05:20:30Z 51b04f27 0.001s 0.001s
|
||||
2022-02-18 03:49:46Z 69389fb5 0.001s 0.001s
|
||||
2022-02-18 00:19:12Z 056b6260 0.001s 0.001s
|
||||
2022-02-17 23:58:32Z 39fb7714 0.001s 0.001s
|
||||
|
||||
Minor note: in columns mode, a blank cell means that no report was found
|
||||
in S3, while the word "absent" means that a report was found but the
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ def parse_description(description: str) -> List[Example]:
|
|||
return examples
|
||||
|
||||
|
||||
@unittest.skip("Skipping as this test is fragile, issue #73083")
|
||||
class TestTestHistory(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
|
|
|
|||
|
|
@ -326,7 +326,8 @@ def _jit_pass_onnx_remove_print(graph: Graph) -> None: ...
|
|||
def _jit_pass_onnx_preprocess_caffe2(graph: Graph) -> None: ...
|
||||
def _jit_pass_onnx_unpack_quantized_weights(
|
||||
graph: Graph,
|
||||
paramsDict: Dict[str, IValue]
|
||||
paramsDict: Dict[str, IValue],
|
||||
caffe2: _bool
|
||||
) -> Dict[str, IValue]: ...
|
||||
def _jit_pass_onnx_quantization_insert_permutes(
|
||||
graph: Graph,
|
||||
|
|
@ -409,7 +410,7 @@ def _import_ir_module_from_package(
|
|||
) -> ScriptModule: ...
|
||||
|
||||
def _assign_output_shapes(graph: Graph, inputs: List[Tensor]) -> Graph: ...
|
||||
def _check_onnx_proto(proto: str) -> None: ...
|
||||
def _check_onnx_proto(proto: str, full_check: _bool = False) -> None: ...
|
||||
def _propagate_and_assign_input_shapes(
|
||||
graph: Graph,
|
||||
inputs: Tuple[Tensor, ...],
|
||||
|
|
|
|||
|
|
@ -3374,6 +3374,12 @@ Example::
|
|||
|
||||
""".format(**reproducibility_notes))
|
||||
|
||||
add_docstr_all('scatter_reduce', r"""
|
||||
scatter_reduce(input, dim, index, reduce, *, output_size=None) -> Tensor
|
||||
|
||||
See :func:`torch.scatter_reduce`
|
||||
""")
|
||||
|
||||
add_docstr_all('select',
|
||||
r"""
|
||||
select(dim, index) -> Tensor
|
||||
|
|
|
|||
|
|
@ -8547,6 +8547,59 @@ scatter_add(input, dim, index, src) -> Tensor
|
|||
Out-of-place version of :meth:`torch.Tensor.scatter_add_`
|
||||
""")
|
||||
|
||||
add_docstr(torch.scatter_reduce, r"""
|
||||
scatter_reduce(input, dim, index, reduce, *, output_size=None) -> Tensor
|
||||
|
||||
Reduces all values from the :attr:`input` tensor to the indices specified in
|
||||
the :attr:`index` tensor. For each value in :attr:`input`, its output index is
|
||||
specified by its index in :attr:`input` for ``dimension != dim`` and by the
|
||||
corresponding value in :attr:`index` for ``dimension = dim``.
|
||||
The applied reduction for non-unique indices is defined via the :attr:`reduce`
|
||||
argument (:obj:`"sum"`, :obj:`"prod"`, :obj:`"mean"`, :obj:`"amax"`, :obj:`"amin"`).
|
||||
For non-existing indices, the output will be filled with the identity of the
|
||||
applied reduction (1 for :obj:`"prod"` and 0 otherwise).
|
||||
|
||||
It is also required that ``index.size(d) == input.size(d)`` for all dimensions ``d``.
|
||||
Moreover, if :attr:`output_size` is defined the the values of :attr:`index` must be
|
||||
between ``0`` and ``output_size - 1`` inclusive.
|
||||
|
||||
|
||||
For a 3-D tensor with :obj:`reduce="sum"`, the output is given as::
|
||||
|
||||
out[index[i][j][k]][j][k] += input[i][j][k] # if dim == 0
|
||||
out[i][index[i][j][k]][k] += input[i][j][k] # if dim == 1
|
||||
out[i][j][index[i][j][k]] += input[i][j][k] # if dim == 2
|
||||
|
||||
Note:
|
||||
This out-of-place operation is similar to the in-place versions of
|
||||
:meth:`~torch.Tensor.scatter_` and :meth:`~torch.Tensor.scatter_add_`,
|
||||
in which the output tensor is automatically created according to the
|
||||
maximum values in :attr:`index` and filled based on the identity of the
|
||||
applied reduction.
|
||||
|
||||
Note:
|
||||
{forward_reproducibility_note}
|
||||
|
||||
Args:
|
||||
input (Tensor): the input tensor
|
||||
dim (int): the axis along which to index
|
||||
index (LongTensor): the indices of elements to scatter and reduce.
|
||||
src (Tensor): the source elements to scatter and reduce
|
||||
reduce (str): the reduction operation to apply for non-unique indices
|
||||
(:obj:`"sum"`, :obj:`"prod"`, :obj:`"mean"`, :obj:`"amax"`, :obj:`"amin"`)
|
||||
output_size (int, optional): the size of the output at dimension :attr:`dim`.
|
||||
If set to :obj:`None`, will get automatically inferred according to
|
||||
:obj:`index.max() + 1`
|
||||
|
||||
Example::
|
||||
|
||||
>>> input = torch.tensor([1, 2, 3, 4, 5, 6])
|
||||
>>> index = torch.tensor([0, 1, 0, 1, 2, 1])
|
||||
>>> torch.scatter_reduce(input, 0, index, reduce="sum", output_size=3)
|
||||
tensor([4, 12, 5])
|
||||
|
||||
""".format(**reproducibility_notes))
|
||||
|
||||
add_docstr(torch.select,
|
||||
r"""
|
||||
select(input, dim, index) -> Tensor
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ def get_base_name_to_sets_of_related_ops() -> Dict[str, Set[NSNodeTargetType]]:
|
|||
nnqatd.Linear,
|
||||
nnqd.Linear,
|
||||
nniqat.LinearReLU,
|
||||
nniqat.LinearBn1d,
|
||||
nn.modules.linear.NonDynamicallyQuantizableLinear,
|
||||
]),
|
||||
# linear functionals
|
||||
|
|
@ -572,6 +573,7 @@ def get_node_type_to_io_type_map() -> Dict[str, Set[NSNodeTargetType]]:
|
|||
nniqat.ConvReLU2d,
|
||||
nniqat.ConvReLU3d,
|
||||
nniqat.LinearReLU,
|
||||
nniqat.LinearBn1d,
|
||||
nniqd.LinearReLU,
|
||||
])
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ from .utils import (
|
|||
get_torch_function_hook_type,
|
||||
get_module_hook_type,
|
||||
OpQuantizeabilityType,
|
||||
AutoQuantizationStateModuleDict,
|
||||
get_fqn_valid_for_module_dict_key,
|
||||
)
|
||||
from .model_utils import (
|
||||
pack_weights_for_functionals,
|
||||
|
|
@ -350,6 +352,8 @@ def add_auto_observation(
|
|||
for _, child_child in child.named_modules():
|
||||
leaves.add(child_child)
|
||||
|
||||
self._fqn_to_auto_quant_state_map = AutoQuantizationStateModuleDict()
|
||||
|
||||
for fqn, v in named_modules:
|
||||
|
||||
# fqn is the global FQN, i.e. 'foo.bar.baz'
|
||||
|
|
@ -366,14 +370,39 @@ def add_auto_observation(
|
|||
if v is self:
|
||||
# for the top level module only, specify input
|
||||
# and output dtypes
|
||||
v._auto_quant_state = AutoQuantizationState(
|
||||
auto_quant_state = AutoQuantizationState(
|
||||
qconfig_dict, fqn,
|
||||
input_dtypes, output_dtypes)
|
||||
pass
|
||||
else:
|
||||
v._auto_quant_state = AutoQuantizationState(
|
||||
auto_quant_state = AutoQuantizationState(
|
||||
qconfig_dict, fqn)
|
||||
|
||||
# The code below registers the auto_quant_state object
|
||||
# of the child in the module hierarchy of the parent,
|
||||
# and adds the auto_quant_state object to the child
|
||||
# with a raw __setattr__, without registering it in
|
||||
# the module hierarchy of the child.
|
||||
# This is solving the problem of both storing extra state
|
||||
# (observers) as well as not modifying the meaning of user
|
||||
# code in child modules which iterates over all module
|
||||
# children.
|
||||
#
|
||||
# This narrows down the issue of dynamically adding
|
||||
# children to only affect the top level module and not
|
||||
# the children.
|
||||
|
||||
# On the parent, register this module in the FQN map
|
||||
fqn_to_use_for_key = \
|
||||
get_fqn_valid_for_module_dict_key(fqn)
|
||||
self._fqn_to_auto_quant_state_map[fqn_to_use_for_key] = \
|
||||
auto_quant_state
|
||||
# On the child, manually set the attribute without
|
||||
# going through the `torch.nn.Module.__setattr__`
|
||||
# function, to prevent this object from appearing in
|
||||
# the child's module hierarchy.
|
||||
object.__setattr__(
|
||||
v, '_auto_quant_state', auto_quant_state)
|
||||
|
||||
global_op_idx[0] = 0
|
||||
|
||||
output = super().__call__(*new_args, **new_kwargs)
|
||||
|
|
@ -688,6 +717,6 @@ def add_auto_convert(module : torch.nn.Module) -> torch.nn.Module:
|
|||
# checking the fix into `torch.nn.Sequential` to avoid the patch.
|
||||
def _nn_sequential_patched_forward(cls, input):
|
||||
for module in cls:
|
||||
if not isinstance(module, AutoQuantizationState):
|
||||
if not isinstance(module, AutoQuantizationStateModuleDict):
|
||||
input = module(input)
|
||||
return input
|
||||
|
|
|
|||
|
|
@ -8,7 +8,10 @@ import torch
|
|||
import torch.fx
|
||||
from .mappings import conv_ops
|
||||
from .quantization_state import AutoQuantizationState
|
||||
from .utils import get_packable_arg_idxs
|
||||
from .utils import (
|
||||
get_packable_arg_idxs,
|
||||
AutoQuantizationStateModuleDict,
|
||||
)
|
||||
|
||||
class AllModuleTracer(torch.fx.Tracer):
|
||||
"""
|
||||
|
|
@ -207,7 +210,7 @@ class AllModuleTracer(torch.fx.Tracer):
|
|||
# class.
|
||||
# TODO(future): remove the hack
|
||||
def call_module(self, m: torch.nn.Module, forward: Callable[..., Any], args : Tuple[Any, ...], kwargs : Dict[str, Any]) -> Any:
|
||||
if isinstance(m, AutoQuantizationState):
|
||||
if isinstance(m, AutoQuantizationStateModuleDict):
|
||||
return args[0]
|
||||
return super().call_module(m, forward, args, kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -583,10 +583,9 @@ def get_torch_function_hook_type(
|
|||
# the direct __dict__ accesses are for performance, because
|
||||
# the default `torch.nn.Module.__getattr__` has overhead.
|
||||
parent_module_has_qstate = parent_module is not None and \
|
||||
'_modules' in parent_module.__dict__ and \
|
||||
'_auto_quant_state' in parent_module.__dict__['_modules']
|
||||
'_auto_quant_state' in parent_module.__dict__
|
||||
needs_op_hooks = parent_module_has_qstate and \
|
||||
parent_module.__dict__['_modules']['_auto_quant_state'].cur_op_needs_hooks(func) # type: ignore[union-attr, operator]
|
||||
parent_module.__dict__['_auto_quant_state'].cur_op_needs_hooks(func) # type: ignore[union-attr, operator]
|
||||
|
||||
if needs_op_hooks:
|
||||
return HookType.OP_HOOKS
|
||||
|
|
@ -608,17 +607,15 @@ def get_module_hook_type(
|
|||
if cached_hook_type is not None:
|
||||
return cached_hook_type
|
||||
parent_module_has_qstate = parent_module is not None and \
|
||||
'_modules' in parent_module.__dict__ and \
|
||||
'_auto_quant_state' in parent_module.__dict__['_modules']
|
||||
'_auto_quant_state' in parent_module.__dict__
|
||||
needs_op_hooks = parent_module_has_qstate and \
|
||||
parent_module.__dict__['_modules']['_auto_quant_state'].cur_op_needs_hooks(cur_module) # type: ignore[union-attr, operator]
|
||||
parent_module.__dict__['_auto_quant_state'].cur_op_needs_hooks(cur_module) # type: ignore[union-attr, operator]
|
||||
# We need IO hooks if
|
||||
# * we are calling forward on a module (always True here)
|
||||
# * that module has quant state
|
||||
# * that module does not need op hooks for the parent
|
||||
needs_io_hooks = (
|
||||
'_modules' in cur_module.__dict__ and
|
||||
'_auto_quant_state' in cur_module.__dict__['_modules'] and
|
||||
'_auto_quant_state' in cur_module.__dict__ and
|
||||
(not needs_op_hooks)
|
||||
)
|
||||
needs_arg_dequants = parent_module_has_qstate and not needs_op_hooks
|
||||
|
|
@ -727,3 +724,18 @@ def get_cur_qconfig(
|
|||
qconfig_dict, cur_op_type, cur_fqn, global_qconfig)
|
||||
|
||||
return qconfig
|
||||
|
||||
|
||||
# We store quantization state for all children on the top level module in a
|
||||
# ModuleDict. In order to properly special case this module from other
|
||||
# ModuleDict instances, we create a marker class for it.
|
||||
class AutoQuantizationStateModuleDict(torch.nn.ModuleDict):
|
||||
pass
|
||||
|
||||
def get_fqn_valid_for_module_dict_key(fqn: str) -> str:
|
||||
"""
|
||||
Modifies `fqn` to make it a valid key to a ModuleDict.
|
||||
"""
|
||||
if fqn == '':
|
||||
fqn = ' '
|
||||
return fqn.replace('.', ':')
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@ def prepare(model, qconfig_dict, example_inputs, inplace=False, allow_list=None,
|
|||
for v in parents_to_delete_auto_quant_state:
|
||||
del v._auto_quant_state
|
||||
|
||||
del model._fqn_to_auto_quant_state_map
|
||||
|
||||
# the model hierarchy might have changed during fusion, so we
|
||||
# have to delete the cached module hook types
|
||||
for k, v in model.named_modules():
|
||||
|
|
|
|||
|
|
@ -114,7 +114,12 @@ def fuse_linear_bn(is_qat, linear, bn):
|
|||
if is_qat:
|
||||
# TODO: remove the assert later
|
||||
assert linear.training, "qat is only supported when linear.training is True currently"
|
||||
raise Exception("Fusing Linear+BatchNorm not yet supported in training.")
|
||||
assert bn.num_features == linear.out_features,\
|
||||
"Output features of Linear must match num_features of BatchNorm1d"
|
||||
assert bn.affine, "Only support fusing BatchNorm1d with affine set to True"
|
||||
assert bn.track_running_stats,\
|
||||
"Only support fusing BatchNorm1d with tracking_running_stats set to True"
|
||||
return nni.LinearBn1d(linear, bn)
|
||||
else:
|
||||
return nn.utils.fusion.fuse_linear_bn_eval(linear, bn)
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ DEFAULT_STATIC_QUANT_MODULE_MAPPINGS : Dict[Callable, Any] = {
|
|||
nniqat.ConvReLU2d: nniq.ConvReLU2d,
|
||||
nniqat.ConvReLU3d: nniq.ConvReLU3d,
|
||||
nniqat.LinearReLU: nniq.LinearReLU,
|
||||
nniqat.LinearBn1d: nnq.Linear,
|
||||
# QAT modules:
|
||||
nnqat.Linear: nnq.Linear,
|
||||
nnqat.Conv2d: nnq.Conv2d,
|
||||
|
|
@ -99,6 +100,7 @@ DEFAULT_QAT_MODULE_MAPPINGS : Dict[Callable, Any] = {
|
|||
nni.ConvReLU2d: nniqat.ConvReLU2d,
|
||||
nni.ConvReLU3d: nniqat.ConvReLU3d,
|
||||
nni.LinearReLU: nniqat.LinearReLU,
|
||||
nni.LinearBn1d: nniqat.LinearBn1d,
|
||||
}
|
||||
|
||||
# Default map for swapping dynamic modules
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from torch.ao.quantization.quantization_mappings import (
|
|||
_has_special_act_post_process,
|
||||
_get_special_act_post_process,
|
||||
)
|
||||
from .utils import get_qparam_dict
|
||||
|
||||
from torch.ao.quantization.stubs import DeQuantStub, QuantWrapper
|
||||
from torch.ao.quantization.qconfig import (
|
||||
add_module_to_qconfig_obs_ctr,
|
||||
|
|
@ -565,15 +565,7 @@ def swap_module(mod, mapping, custom_module_class_mapping):
|
|||
new_mod = custom_module_class_mapping[type(mod)].from_observed(mod)
|
||||
swapped = True
|
||||
elif type(mod) in mapping:
|
||||
qmod = mapping[type(mod)]
|
||||
if hasattr(qmod, '_IS_REFERENCE') and qmod._IS_REFERENCE:
|
||||
assert mod.qconfig is not None
|
||||
weight_post_process = mod.qconfig.weight()
|
||||
weight_post_process(mod.weight)
|
||||
weight_qparams = get_qparam_dict(weight_post_process)
|
||||
new_mod = qmod.from_float(mod, weight_qparams)
|
||||
else:
|
||||
new_mod = qmod.from_float(mod)
|
||||
new_mod = mapping[type(mod)].from_float(mod)
|
||||
swapped = True
|
||||
|
||||
if swapped:
|
||||
|
|
|
|||
|
|
@ -589,11 +589,10 @@ PyObject *THPModule_supportedQEngines(PyObject *_unused, PyObject *noargs)
|
|||
{
|
||||
auto qengines = at::globalContext().supportedQEngines();
|
||||
auto list = THPObjectPtr(PyList_New(qengines.size()));
|
||||
if (!list) return nullptr;
|
||||
for (const auto i : c10::irange(qengines.size())) {
|
||||
PyObject *i64 = THPUtils_packInt64(static_cast<int>(qengines[i]));
|
||||
if (!i64) {
|
||||
throw python_error();
|
||||
}
|
||||
if (!i64) return nullptr;
|
||||
PyList_SET_ITEM(list.get(), i, i64);
|
||||
}
|
||||
return list.release();
|
||||
|
|
@ -607,22 +606,18 @@ PyObject *THPModule_isEnabledXNNPACK(PyObject *_unused, PyObject *noargs)
|
|||
|
||||
PyObject *THPModule_setDefaultMobileCPUAllocator(PyObject *_unused, PyObject *noargs)
|
||||
{
|
||||
try {
|
||||
at::globalContext().setDefaultMobileCPUAllocator();
|
||||
} catch (c10::Error& e) {
|
||||
THPUtils_setError(e.what());
|
||||
}
|
||||
HANDLE_TH_ERRORS
|
||||
at::globalContext().setDefaultMobileCPUAllocator();
|
||||
Py_RETURN_NONE;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
PyObject *THPModule_unsetDefaultMobileCPUAllocator(PyObject *_unused, PyObject *noargs)
|
||||
{
|
||||
try {
|
||||
at::globalContext().unsetDefaultMobileCPUAllocator();
|
||||
} catch (c10::Error& e) {
|
||||
THPUtils_setError(e.what());
|
||||
}
|
||||
HANDLE_TH_ERRORS
|
||||
at::globalContext().unsetDefaultMobileCPUAllocator();
|
||||
Py_RETURN_NONE;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
static PyObject * THPModule_vmapmode_increment_nesting(PyObject* _unused, PyObject *arg) {
|
||||
|
|
|
|||
|
|
@ -155,6 +155,19 @@ struct OpEventData {
|
|||
torch::profiler::impl::CUDAEventStub cuda_event_end_ = nullptr;
|
||||
};
|
||||
|
||||
struct MemoryEventData {
|
||||
int64_t start_time;
|
||||
void* ptr;
|
||||
int64_t alloc_size;
|
||||
int64_t total_allocated;
|
||||
int64_t total_reserved;
|
||||
uint64_t threadID;
|
||||
torch::profiler::impl::kineto::DeviceAndResource kineto_info;
|
||||
c10::DeviceType device_type;
|
||||
c10::DeviceIndex device_index;
|
||||
};
|
||||
static_assert(std::is_pod<MemoryEventData>::value, "Non-POD member of MemoryEventData.");
|
||||
|
||||
// Assumption: Total threads number will not exceed 2^16-1, and total ops will
|
||||
// not exceed 2^48 -1.
|
||||
static inline uint64_t getForwardThreadKey(uint64_t tid, uint64_t seqNr) {
|
||||
|
|
@ -204,29 +217,16 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalStateBase {
|
|||
int64_t total_reserved,
|
||||
c10::Device device) override {
|
||||
if (config_.profile_memory && config_.state != ProfilerState::Disabled) {
|
||||
std::lock_guard<std::mutex> guard(state_mutex_);
|
||||
auto start_time = getTimeUs();
|
||||
if (cpu_trace_) {
|
||||
torch::profiler::impl::kineto::recordThreadInfo();
|
||||
cpu_trace_.addMemoryUsageActivity(
|
||||
kMemoryEventName,
|
||||
torch::profiler::impl::kineto::kineto_ids(),
|
||||
start_time,
|
||||
device,
|
||||
ptr,
|
||||
alloc_size,
|
||||
total_allocated,
|
||||
total_reserved);
|
||||
}
|
||||
|
||||
kineto_events_.emplace_back();
|
||||
auto& evt = kineto_events_.back();
|
||||
evt.name(kMemoryEventName)
|
||||
.startUs(start_time)
|
||||
.deviceIndex(device.index())
|
||||
.deviceType(device.type())
|
||||
.nBytes(alloc_size)
|
||||
.startThreadId(at::RecordFunction::currentThreadId());
|
||||
memory_events_.push_back(
|
||||
{getTimeUs(),
|
||||
ptr,
|
||||
alloc_size,
|
||||
total_allocated,
|
||||
total_reserved,
|
||||
at::RecordFunction::currentThreadId(),
|
||||
torch::profiler::impl::kineto::kineto_ids(),
|
||||
device.type(),
|
||||
device.index()});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -264,6 +264,28 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalStateBase {
|
|||
|
||||
void materializeOpEvents() {
|
||||
std::lock_guard<std::mutex> guard(state_mutex_);
|
||||
|
||||
for (const auto& e : memory_events_) {
|
||||
cpu_trace_.addMemoryUsageActivity(
|
||||
kMemoryEventName,
|
||||
e.kineto_info,
|
||||
e.start_time,
|
||||
c10::Device(e.device_type, e.device_index),
|
||||
e.ptr,
|
||||
e.alloc_size,
|
||||
e.total_allocated,
|
||||
e.total_reserved);
|
||||
|
||||
kineto_events_.emplace_back();
|
||||
auto& evt = kineto_events_.back();
|
||||
evt.name(kMemoryEventName)
|
||||
.startUs(e.start_time)
|
||||
.deviceIndex(e.device_index)
|
||||
.deviceType(e.device_type)
|
||||
.nBytes(e.alloc_size)
|
||||
.startThreadId(e.threadID);
|
||||
}
|
||||
|
||||
for (const auto& e : op_events_) {
|
||||
if (e.end_us_ < e.start_us_) {
|
||||
// We initialize end_us_ to the smallest int64_t, so this means that
|
||||
|
|
@ -406,7 +428,7 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalStateBase {
|
|||
py_event_indices_{
|
||||
{ nullptr,
|
||||
std::string("null") }};
|
||||
for (size_t i = 0; i < py_events.size(); i++) {
|
||||
for (const auto i : c10::irange(py_events.size())) {
|
||||
py_event_indices_.insert({py_events[i].get(), std::to_string(i)});
|
||||
}
|
||||
|
||||
|
|
@ -585,6 +607,7 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalStateBase {
|
|||
uint64_t start_time_;
|
||||
std::set<torch::profiler::impl::ActivityType> activities_;
|
||||
std::deque<OpEventData> op_events_;
|
||||
std::deque<MemoryEventData> memory_events_;
|
||||
torch::profiler::impl::kineto::TraceWrapper cpu_trace_;
|
||||
std::vector<KinetoEvent> kineto_events_;
|
||||
// Optional, if event post-processing is enabled.
|
||||
|
|
|
|||
|
|
@ -833,8 +833,7 @@ void gather(
|
|||
|
||||
if (cur_rank == root)
|
||||
{
|
||||
for (int r = 0; r < numranks; r++)
|
||||
{
|
||||
for (const auto r : c10::irange(numranks)) {
|
||||
if (r != root) {
|
||||
auto* recvbuff = reinterpret_cast<char*>(outputs[r].data_ptr());
|
||||
NCCL_CHECK(ncclRecv(recvbuff, count, type, r, comm, stream));
|
||||
|
|
@ -874,8 +873,7 @@ void scatter(
|
|||
NCCL_CHECK(ncclGroupStart());
|
||||
if (cur_rank == root)
|
||||
{
|
||||
for (int r = 0; r < numranks; r++)
|
||||
{
|
||||
for (const auto r : c10::irange(numranks)) {
|
||||
if (r != root) {
|
||||
size_t send_count = inputs[r].numel();
|
||||
auto send_type = to_nccl_data_type(inputs[r]);
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#define ERROR(msg_fmt, ...) \
|
||||
|
|
@ -47,7 +48,7 @@ int main(int argc, const char** argv) {
|
|||
auto program_headers = (Elf64_Phdr*)(data + header->e_phoff);
|
||||
auto n_program_headers = header->e_phnum;
|
||||
Elf64_Dyn* dynamic = nullptr;
|
||||
for (size_t i = 0; i < n_program_headers; ++i) {
|
||||
for (const auto i : c10::irange(n_program_headers)) {
|
||||
const Elf64_Phdr* phdr = &program_headers[i];
|
||||
if (phdr->p_type == PT_DYNAMIC) {
|
||||
dynamic = reinterpret_cast<Elf64_Dyn*>(data + phdr->p_offset);
|
||||
|
|
|
|||
|
|
@ -650,11 +650,13 @@ Example::
|
|||
.def(
|
||||
"get",
|
||||
[](::c10d::Store& store, const std::string& key) -> py::bytes {
|
||||
auto value = store.get(key);
|
||||
auto value = [&]() {
|
||||
py::gil_scoped_release guard;
|
||||
return store.get(key);
|
||||
}();
|
||||
return py::bytes(
|
||||
reinterpret_cast<char*>(value.data()), value.size());
|
||||
},
|
||||
py::call_guard<py::gil_scoped_release>(),
|
||||
R"(
|
||||
Retrieves the value associated with the given ``key`` in the store. If ``key`` is not
|
||||
present in the store, the function will wait for ``timeout``, which is defined
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ c10::optional<at::Tensor> runTorchSlice_opset10(
|
|||
return c10::nullopt;
|
||||
}
|
||||
auto axes_a = inputTensorValues[3].accessor<int64_t, 1>();
|
||||
axes.reserve(inputTensorValues[3].sizes()[0]);
|
||||
axes.resize(inputTensorValues[3].sizes()[0]);
|
||||
// ONNX slice accepts negative axis, fix this for aten op
|
||||
for (const auto i : c10::irange(inputTensorValues[3].sizes()[0])) {
|
||||
axes[i] = axes_a[i] < 0 ? axes_a[i] + inputTensorValues[0].sizes().size()
|
||||
|
|
|
|||
|
|
@ -61,5 +61,12 @@ Node* transformToONNXConcatNode(
|
|||
bool need_new_input,
|
||||
int opset_version);
|
||||
|
||||
class ScalarTypeHashFunction {
|
||||
public:
|
||||
size_t operator()(const c10::ScalarType& type) const {
|
||||
return static_cast<size_t>(type);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace jit
|
||||
} // namespace torch
|
||||
|
|
|
|||
|
|
@ -761,6 +761,25 @@ static void fuseListConstructListUnpack(Block* b) {
|
|||
}
|
||||
}
|
||||
|
||||
// https://github.com/pytorch/pytorch/wiki/PyTorch-ONNX-exporter#quantized-model-export
|
||||
static void eraseTupleConstruct(Block* block) {
|
||||
size_t index = 0;
|
||||
// TupleConstruct is generated from the symbolics in quantized domain, and
|
||||
// consumed by other quantized operators. The remained TupleConstruct should
|
||||
// be at the output of the blocks.
|
||||
for (auto* output : block->outputs()) {
|
||||
auto output_node = output->node();
|
||||
if (output_node->kind() == prim::TupleConstruct) {
|
||||
block->eraseOutput(index);
|
||||
size_t input_index = 0;
|
||||
for (auto* input : output_node->inputs()) {
|
||||
block->insertOutput(index + (input_index++), input);
|
||||
}
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void removeMaxPoolUnusedOutput(Block* b) {
|
||||
for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
|
||||
auto n = *it;
|
||||
|
|
@ -1025,6 +1044,7 @@ void PeepholeOptimizeONNX(
|
|||
fuseListConstructListUnpack(graph->block());
|
||||
fuseLogSoftmaxNllLoss(graph->block());
|
||||
eraseListConstruct(graph->block(), opset_version);
|
||||
eraseTupleConstruct(graph->block());
|
||||
EliminateDeadCode(
|
||||
graph->block(),
|
||||
true,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include <c10/util/irange.h>
|
||||
#include <torch/csrc/jit/jit_log.h>
|
||||
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
||||
#include <torch/csrc/jit/passes/onnx/helper.h>
|
||||
#include <torch/csrc/jit/passes/onnx/scalar_type_analysis.h>
|
||||
|
||||
namespace torch {
|
||||
|
|
@ -11,13 +12,6 @@ using namespace ::c10::onnx;
|
|||
}
|
||||
|
||||
namespace {
|
||||
class ScalarTypeHashFunction {
|
||||
public:
|
||||
size_t operator()(const c10::ScalarType& type) const {
|
||||
return static_cast<size_t>(type);
|
||||
}
|
||||
};
|
||||
|
||||
const int ONNX_OPSET_14 = 14;
|
||||
|
||||
static const std::unordered_map<c10::ScalarType, int, ScalarTypeHashFunction>
|
||||
|
|
|
|||
|
|
@ -702,54 +702,59 @@ void SetShapeValueFromListConstructNode(Node* lc_node) {
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<::c10::ShapeSymbol> Broadcast(const std::vector<::c10::ShapeSymbol> &input_shape_value_0,
|
||||
const std::vector<::c10::ShapeSymbol> &input_shape_value_1) {
|
||||
size_t rank_0 = input_shape_value_0.size();
|
||||
size_t rank_1 = input_shape_value_1.size();
|
||||
size_t rank_max = std::max(rank_0, rank_1);
|
||||
size_t rank_min = std::min(rank_0, rank_1);
|
||||
std::vector<::c10::ShapeSymbol> final_shape;
|
||||
final_shape.reserve(rank_max);
|
||||
for (auto idx = 0; idx < rank_max; idx++) {
|
||||
final_shape.emplace_back(::c10::ShapeSymbol::newSymbol());
|
||||
}
|
||||
for (auto idx = 0; idx < rank_min; idx++) {
|
||||
const c10::ShapeSymbol& ss_shape_0 =
|
||||
input_shape_value_0[rank_0 - 1 - idx];
|
||||
const c10::ShapeSymbol& ss_shape_1 =
|
||||
input_shape_value_1[rank_1 - 1 - idx];
|
||||
bool is_static_0 = ss_shape_0.is_static();
|
||||
bool is_static_1 = ss_shape_1.is_static();
|
||||
if (is_static_0 && is_static_1) {
|
||||
int64_t static_0_sz = ss_shape_0.static_size();
|
||||
int64_t static_1_sz = ss_shape_1.static_size();
|
||||
final_shape[rank_max - 1 - idx] = ::c10::ShapeSymbol::fromStaticSize(
|
||||
std::max(static_0_sz, static_1_sz));
|
||||
} else if (!is_static_0 && !is_static_1) {
|
||||
if (ss_shape_0.value() == ss_shape_1.value()) {
|
||||
final_shape[rank_max - 1 - idx] = ss_shape_0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rank_0 < rank_1) {
|
||||
for (size_t idx = rank_min; idx < rank_max; idx++) {
|
||||
size_t shape_idx = rank_max - 1 - idx;
|
||||
final_shape[shape_idx] = input_shape_value_1[shape_idx];
|
||||
}
|
||||
} else {
|
||||
for (size_t idx = rank_min; idx < rank_max; idx++) {
|
||||
size_t shape_idx = rank_max - 1 - idx;
|
||||
final_shape[shape_idx] = input_shape_value_0[shape_idx];
|
||||
}
|
||||
}
|
||||
return final_shape;
|
||||
}
|
||||
|
||||
void ProcessBroadcastNode(Node* n) {
|
||||
TORCH_INTERNAL_ASSERT(n->inputs().size() == 2);
|
||||
if (ConstantValueMap::HasShape(n->input(0)->debugName()) &&
|
||||
ConstantValueMap::HasShape(n->input(1)->debugName())) {
|
||||
auto input_shape_0 = ConstantValueMap::GetShape(n->input(0)->debugName());
|
||||
auto input_shape_value_0 = input_shape_0.value().sizes();
|
||||
auto input_shape_value_0 = input_shape_0.value().sizes().value();
|
||||
auto input_shape_1 = ConstantValueMap::GetShape(n->input(1)->debugName());
|
||||
auto input_shape_value_1 = input_shape_1.value().sizes();
|
||||
size_t rank_0 = input_shape_value_0.value().size();
|
||||
size_t rank_1 = input_shape_value_1.value().size();
|
||||
size_t rank_max = std::max(rank_0, rank_1);
|
||||
size_t rank_min = std::min(rank_0, rank_1);
|
||||
std::vector<::c10::ShapeSymbol> final_shape;
|
||||
final_shape.reserve(rank_max);
|
||||
for (auto idx = 0; idx < rank_max; idx++) {
|
||||
final_shape.emplace_back(::c10::ShapeSymbol::newSymbol());
|
||||
}
|
||||
for (auto idx = 0; idx < rank_min; idx++) {
|
||||
const c10::ShapeSymbol& ss_shape_0 =
|
||||
input_shape_value_0.value()[rank_0 - 1 - idx];
|
||||
const c10::ShapeSymbol& ss_shape_1 =
|
||||
input_shape_value_1.value()[rank_1 - 1 - idx];
|
||||
bool is_static_0 = ss_shape_0.is_static();
|
||||
bool is_static_1 = ss_shape_1.is_static();
|
||||
if (is_static_0 && is_static_1) {
|
||||
int64_t static_0_sz = ss_shape_0.static_size();
|
||||
int64_t static_1_sz = ss_shape_1.static_size();
|
||||
final_shape[rank_max - 1 - idx] = ::c10::ShapeSymbol::fromStaticSize(
|
||||
std::max(static_0_sz, static_1_sz));
|
||||
} else if (!is_static_0 && !is_static_1) {
|
||||
if (ss_shape_0.value() == ss_shape_1.value()) {
|
||||
final_shape[rank_max - 1 - idx] = ss_shape_0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rank_0 < rank_1) {
|
||||
for (auto idx = rank_min; idx < rank_max; idx++) {
|
||||
auto shape_idx = rank_max - 1 - idx;
|
||||
final_shape[shape_idx] = input_shape_value_1.value()[shape_idx];
|
||||
}
|
||||
} else {
|
||||
for (auto idx = rank_min; idx < rank_max; idx++) {
|
||||
auto shape_idx = rank_max - 1 - idx;
|
||||
final_shape[shape_idx] = input_shape_value_0.value()[shape_idx];
|
||||
}
|
||||
}
|
||||
|
||||
auto input_shape_value_1 = input_shape_1.value().sizes().value();
|
||||
auto final_shape = Broadcast(input_shape_value_0, input_shape_value_1);
|
||||
UpdateShape(n->output(0), c10::SymbolicShape(final_shape));
|
||||
}
|
||||
}
|
||||
|
|
@ -857,6 +862,8 @@ void ProcessMatMulNode(Node* n) {
|
|||
auto input_shape_value_1 = input_shape_1.sizes().value();
|
||||
size_t rank_0 = input_shape_value_0.size();
|
||||
size_t rank_1 = input_shape_value_1.size();
|
||||
// Handle inputs of rank 1 just like numpy.matmul:
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.matmul.html
|
||||
auto is_rank_0_1 = false;
|
||||
if (rank_0 == 1) {
|
||||
input_shape_value_0.insert(
|
||||
|
|
@ -870,25 +877,20 @@ void ProcessMatMulNode(Node* n) {
|
|||
rank_1 = 2;
|
||||
is_rank_1_1 = true;
|
||||
}
|
||||
size_t rank = std::max(rank_0, rank_1);
|
||||
std::vector<::c10::ShapeSymbol> final_shape;
|
||||
final_shape.reserve(rank);
|
||||
if (rank_0 >= rank_1) {
|
||||
for (auto idx = 0; idx < rank_0 - 2; idx++) {
|
||||
final_shape.emplace_back(input_shape_value_0[idx]);
|
||||
}
|
||||
} else {
|
||||
for (auto idx = 0; idx < rank_1 - 2; idx++) {
|
||||
final_shape.emplace_back(input_shape_value_1[idx]);
|
||||
}
|
||||
// Per https://pytorch.org/docs/stable/generated/torch.matmul.html
|
||||
// the broadcasting logic only applies to the batch dimensions, and not the matrix dimensions
|
||||
// so we remove the matrix dimensions which are the last 2 dimensions before broadcasting
|
||||
auto final_shape = Broadcast(
|
||||
std::vector<::c10::ShapeSymbol>(input_shape_value_0.begin(), input_shape_value_0.end() - 2),
|
||||
std::vector<::c10::ShapeSymbol>(input_shape_value_1.begin(), input_shape_value_1.end() - 2)
|
||||
);
|
||||
// add the last 2 dimensions back, unless they do not exist in the first place and inserted by this function
|
||||
// Then apply [n,k]X[k,m]=[n,m], where n=input_shape_value_0[rank_0 - 2], m=input_shape_value_1[rank_1 - 1]
|
||||
if (!is_rank_0_1) {
|
||||
final_shape.emplace_back(input_shape_value_0[rank_0 - 2]);
|
||||
}
|
||||
final_shape.emplace_back(input_shape_value_0[rank_0 - 2]);
|
||||
final_shape.emplace_back(input_shape_value_1[rank_1 - 1]);
|
||||
if (is_rank_0_1) {
|
||||
final_shape.erase(final_shape.begin());
|
||||
}
|
||||
if (is_rank_1_1) {
|
||||
final_shape.pop_back();
|
||||
if (!is_rank_1_1) {
|
||||
final_shape.emplace_back(input_shape_value_1[rank_1 - 1]);
|
||||
}
|
||||
UpdateShape(n->output(0), c10::SymbolicShape(final_shape));
|
||||
}
|
||||
|
|
@ -1374,6 +1376,8 @@ void ComputeConstant(Node* n, int opset_version) {
|
|||
if (input0_shape_size.has_value()) {
|
||||
auto input0_shape_value = input0_shape_size.value();
|
||||
if (ConstantValueMap::HasValue(n->input(1)->debugName())) {
|
||||
// When value of `shape` is statically known,
|
||||
// output shape can be computed.
|
||||
auto shape_temp = ConstantValueMap::GetValueInto1DInt64Vector(
|
||||
n->input(1)->debugName());
|
||||
auto final_shape =
|
||||
|
|
@ -1381,6 +1385,23 @@ void ComputeConstant(Node* n, int opset_version) {
|
|||
if (final_shape.has_value()) {
|
||||
UpdateShape(n->output(), final_shape.value());
|
||||
}
|
||||
} else if (
|
||||
auto expand_shape =
|
||||
ConstantValueMap::GetShapeInto1DInt64VectorWithOneUnknown(
|
||||
n->input(1)->debugName())) {
|
||||
// When shape of `shape` is statically known,
|
||||
// output rank can be computed.
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
expand_shape.value().size() == 1,
|
||||
"`Shape` input to `Expand` should be a 1-D tensor. Instead got rank ",
|
||||
expand_shape.value().size());
|
||||
if (expand_shape.value()[0] > 0) {
|
||||
std::vector<c10::ShapeSymbol> final_shape;
|
||||
for (const auto i : c10::irange(expand_shape.value()[0])) {
|
||||
final_shape.emplace_back(c10::ShapeSymbol::newSymbol());
|
||||
}
|
||||
UpdateShape(n->output(), c10::SymbolicShape(final_shape));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,12 +9,9 @@
|
|||
#include <torch/csrc/jit/passes/onnx/helper.h>
|
||||
#include <torch/csrc/jit/passes/subgraph_rewrite.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
// TODO: Switch to per operator headers after
|
||||
// https://github.com/pytorch/pytorch/pull/68693 is merged
|
||||
#include <ATen/Functions.h>
|
||||
#else
|
||||
#include <ATen/ops/quantize_per_tensor.h>
|
||||
#include <ATen/ops/zeros.h>
|
||||
#endif
|
||||
|
||||
#include <stack>
|
||||
|
||||
|
|
@ -104,7 +101,7 @@ double getScaleFromInput(Node* input_node) {
|
|||
input_name);
|
||||
}
|
||||
|
||||
Node* CreateQuantizedWeights(
|
||||
Node* CreateQuantizedWeightsCaffe2(
|
||||
std::string data,
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::vector<int64_t> shapes,
|
||||
|
|
@ -118,7 +115,7 @@ Node* CreateQuantizedWeights(
|
|||
return const_node;
|
||||
}
|
||||
|
||||
Node* CreateQuantizedBias(
|
||||
Node* CreateQuantizedBiasCaffe2(
|
||||
std::vector<int64_t> data,
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::vector<int64_t> shapes,
|
||||
|
|
@ -132,6 +129,62 @@ Node* CreateQuantizedBias(
|
|||
return const_node;
|
||||
}
|
||||
|
||||
std::vector<Node*> CreateQuantizedWeights(
|
||||
std::vector<float> data,
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::vector<int64_t> shapes,
|
||||
float scale,
|
||||
int64_t zero_point) {
|
||||
Node* const_node_1 = graph->create(prim::Constant);
|
||||
auto const_value =
|
||||
at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat)
|
||||
.to(at::kCPU);
|
||||
auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
|
||||
at::Tensor const_value_copy = at::empty(c10::IntArrayRef(shapes), options);
|
||||
const_value.copy_(const_value);
|
||||
const_node_1->t_(Symbol::attr("value"), const_value_copy);
|
||||
|
||||
Node* const_node_2 = graph->create(prim::Constant);
|
||||
std::vector<float> scale_v{scale};
|
||||
std::vector<int64_t> scale_shapes{1};
|
||||
auto const_shape =
|
||||
at::from_blob(scale_v.data(), c10::IntArrayRef(scale_shapes), at::kFloat)
|
||||
.to(at::kCPU);
|
||||
at::Tensor const_shape_copy =
|
||||
at::empty(c10::IntArrayRef(scale_shapes), options);
|
||||
const_shape_copy.copy_(const_shape);
|
||||
const_node_2->t_(Symbol::attr("value"), const_shape_copy);
|
||||
|
||||
Node* const_node_3 = graph->create(prim::Constant);
|
||||
std::vector<int64_t> zero_point_v{zero_point};
|
||||
std::vector<int64_t> zero_shapes{1};
|
||||
auto const_zero =
|
||||
at::from_blob(
|
||||
zero_point_v.data(), c10::IntArrayRef(zero_shapes), at::kInt)
|
||||
.to(at::kCPU);
|
||||
at::Tensor const_zero_copy =
|
||||
at::empty(c10::IntArrayRef(zero_shapes), options);
|
||||
const_zero_copy.copy_(const_zero);
|
||||
const_node_3->t_(Symbol::attr("value"), const_zero_copy);
|
||||
|
||||
return {const_node_1, const_node_2, const_node_3};
|
||||
}
|
||||
|
||||
Node* CreateQuantizedBias(
|
||||
std::vector<float> data,
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::vector<int64_t> shapes) {
|
||||
Node* const_node_1 = graph->create(prim::Constant);
|
||||
auto const_bias =
|
||||
at::from_blob(data.data(), c10::IntArrayRef(shapes), at::kFloat)
|
||||
.to(at::kCPU);
|
||||
auto options = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
|
||||
at::Tensor const_bias_copy = at::empty(c10::IntArrayRef(shapes), options);
|
||||
const_bias_copy.copy_(const_bias);
|
||||
const_node_1->t_(Symbol::attr("value"), const_bias_copy);
|
||||
return const_node_1;
|
||||
}
|
||||
|
||||
Node* createIntTuple(
|
||||
const std::vector<int64_t>& is,
|
||||
std::shared_ptr<Graph>& graph) {
|
||||
|
|
@ -158,7 +211,8 @@ void unpackQuantizedWeightsHelper(
|
|||
std::map<std::string, IValue>& paramsDict,
|
||||
const std::string& pattern,
|
||||
const std::string& unpack_fn,
|
||||
QuantizedParamsType params_type) {
|
||||
QuantizedParamsType params_type,
|
||||
bool caffe2 = true) {
|
||||
Graph pattern_graph;
|
||||
std::unordered_map<std::string, Value*> vmap;
|
||||
parseIR(pattern, &pattern_graph, vmap);
|
||||
|
|
@ -368,26 +422,47 @@ void unpackQuantizedWeightsHelper(
|
|||
const int64_t weight_zp = unpacked_weight.q_zero_point() + 128;
|
||||
const int64_t wt_numel = unpacked_weight.numel();
|
||||
|
||||
// Create caffe2::Int8GivenTensorFill node
|
||||
std::ostringstream os;
|
||||
for (const auto i : c10::irange(wt_numel)) {
|
||||
os << static_cast<char>(inp_data[i] + 128);
|
||||
if (caffe2) {
|
||||
// Create caffe2::Int8GivenTensorFill node
|
||||
std::ostringstream os;
|
||||
for (const auto i : c10::irange(wt_numel)) {
|
||||
os << static_cast<char>(inp_data[i] + 128);
|
||||
}
|
||||
Node* c2_weight = CreateQuantizedWeightsCaffe2(
|
||||
os.str(), graph, wt_sizes, unpacked_weight.q_scale(), weight_zp);
|
||||
graph->setInsertPoint(qlinear_node);
|
||||
c2_weight->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(1, c2_weight->output());
|
||||
} else {
|
||||
std::vector<float> unpacked_weight_values;
|
||||
unpacked_weight_values.reserve(unpacked_weight.numel());
|
||||
auto unpacked_weight_data =
|
||||
reinterpret_cast<int8_t*>(unpacked_weight.data_ptr<c10::qint8>());
|
||||
for (const auto i : c10::irange(unpacked_weight.numel())) {
|
||||
unpacked_weight_values.push_back(
|
||||
static_cast<float>(unpacked_weight_data[i]));
|
||||
}
|
||||
std::vector<Node*> c2_weight = CreateQuantizedWeights(
|
||||
unpacked_weight_values,
|
||||
graph,
|
||||
wt_sizes,
|
||||
static_cast<float>(unpacked_weight.q_scale()),
|
||||
weight_zp);
|
||||
graph->setInsertPoint(qlinear_node);
|
||||
c2_weight[0]->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(1, c2_weight[0]->output());
|
||||
c2_weight[1]->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(2, c2_weight[1]->output());
|
||||
c2_weight[2]->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(3, c2_weight[2]->output());
|
||||
}
|
||||
|
||||
Node* c2_weight = CreateQuantizedWeights(
|
||||
os.str(), graph, wt_sizes, unpacked_weight.q_scale(), weight_zp);
|
||||
graph->setInsertPoint(qlinear_node);
|
||||
c2_weight->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(1, c2_weight->output());
|
||||
|
||||
// Add bias
|
||||
at::Tensor original_bias;
|
||||
if (bias.has_value()) {
|
||||
original_bias = bias.value();
|
||||
original_bias.set_requires_grad(false);
|
||||
} else {
|
||||
// Caffe2 ops always expect bias tensor so if not present create empty
|
||||
// tensor.
|
||||
int64_t bias_size = unpacked_weight.size(0);
|
||||
original_bias =
|
||||
at::zeros(bias_size, unpacked_weight.options().dtype(at::kFloat));
|
||||
|
|
@ -402,24 +477,41 @@ void unpackQuantizedWeightsHelper(
|
|||
input_val->type()->str());
|
||||
|
||||
auto input_node = match_vmap.at(vmap.at("r"))->node()->inputs()[0]->node();
|
||||
auto input_scale = getScaleFromInput(input_node);
|
||||
auto q_bias = at::quantize_per_tensor(
|
||||
original_bias, weight_scale * input_scale, 0, at::kQInt32);
|
||||
at::Tensor q_bias;
|
||||
|
||||
std::vector<int64_t> bias_values;
|
||||
bias_values.reserve(q_bias.numel());
|
||||
auto bias_data = (int32_t*)q_bias.data_ptr<c10::qint32>();
|
||||
for (const auto i : c10::irange(q_bias.numel())) {
|
||||
bias_values.push_back(bias_data[i]);
|
||||
if (caffe2) {
|
||||
auto input_scale = getScaleFromInput(input_node);
|
||||
q_bias = at::quantize_per_tensor(
|
||||
original_bias, weight_scale * input_scale, 0, at::kQInt32);
|
||||
std::vector<int64_t> bias_values;
|
||||
bias_values.reserve(q_bias.numel());
|
||||
auto bias_data = (int32_t*)q_bias.data_ptr<c10::qint32>();
|
||||
for (const auto i : c10::irange(q_bias.numel())) {
|
||||
bias_values.push_back(bias_data[i]);
|
||||
}
|
||||
Node* c2_bias = CreateQuantizedBiasCaffe2(
|
||||
bias_values,
|
||||
graph,
|
||||
q_bias.sizes().vec(),
|
||||
q_bias.q_scale(),
|
||||
q_bias.q_zero_point());
|
||||
c2_bias->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(2, c2_bias->output());
|
||||
} else {
|
||||
std::vector<float> bias_values(original_bias.numel());
|
||||
auto bias_data = original_bias.data_ptr<float>();
|
||||
for (const auto i : c10::irange(original_bias.numel())) {
|
||||
bias_values[i] = bias_data[i];
|
||||
}
|
||||
Node* bias =
|
||||
CreateQuantizedBias(bias_values, graph, original_bias.sizes().vec());
|
||||
bias->insertBefore(qlinear_node);
|
||||
// For quantized_linear inputs, the order is input, weight, bias, ....
|
||||
// We unpack weight into 3 values. then it is
|
||||
// input, weight_value, weight_scale, weight_zero_point, bias, ...
|
||||
// Therefore bias is at location 4.
|
||||
qlinear_node->insertInput(4, bias->output());
|
||||
}
|
||||
Node* c2_bias = CreateQuantizedBias(
|
||||
bias_values,
|
||||
graph,
|
||||
q_bias.sizes().vec(),
|
||||
q_bias.q_scale(),
|
||||
q_bias.q_zero_point());
|
||||
c2_bias->insertBefore(qlinear_node);
|
||||
qlinear_node->insertInput(2, c2_bias->output());
|
||||
|
||||
// add conv arguments: stride, padding, dilation, groups
|
||||
if (stride.has_value() && padding.has_value() && dilation.has_value() &&
|
||||
|
|
@ -444,9 +536,59 @@ void unpackQuantizedWeightsHelper(
|
|||
eraseUnusedValuesFromMap(valsToParamsMap);
|
||||
}
|
||||
}
|
||||
|
||||
static std::
|
||||
unordered_map<c10::ScalarType, c10::ScalarType, ScalarTypeHashFunction>
|
||||
qTypeToValType = {
|
||||
{c10::ScalarType::QInt8, c10::ScalarType::Char},
|
||||
{c10::ScalarType::QUInt8, c10::ScalarType::Byte},
|
||||
{c10::ScalarType::QInt32, c10::ScalarType::Int},
|
||||
{c10::ScalarType::QUInt4x2, c10::ScalarType::Byte},
|
||||
};
|
||||
|
||||
// Unpack quantized tensor inputs into {value, scale, zero_point},
|
||||
// Then create a prim::TupleConstruct node based on these three values.
|
||||
void UnpackQuantizedTensorInputs(std::shared_ptr<Graph>& graph) {
|
||||
for (size_t index = 0; index < graph->inputs().size();) {
|
||||
auto g_input = graph->inputs()[index];
|
||||
TensorTypePtr shape_type = g_input->type()->cast<TensorType>();
|
||||
if (!shape_type || !shape_type->scalarType().has_value()) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
auto scalar_type = shape_type->scalarType().value();
|
||||
if (qTypeToValType.find(scalar_type) == qTypeToValType.end()) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
std::string input_name = g_input->debugName();
|
||||
auto input_value =
|
||||
graph->insertInput(index, input_name + "_value")
|
||||
->setType(shape_type->withScalarType(qTypeToValType[scalar_type]));
|
||||
// scale and zero_point type can be found at torch/include/ATen/Operators.h
|
||||
auto input_scale =
|
||||
graph->insertInput(index + 1, input_name + "_scale")
|
||||
->setType(TensorType::create(
|
||||
at::kDouble, at::kCPU, 0, /*requires_grad=*/c10::nullopt));
|
||||
auto input_zero_point =
|
||||
graph->insertInput(index + 2, input_name + "_zero_point")
|
||||
->setType(TensorType::create(
|
||||
at::kLong, at::kCPU, 0, /*requires_grad=*/c10::nullopt));
|
||||
std::vector<Value*> converted{input_value, input_scale, input_zero_point};
|
||||
auto input_tuple =
|
||||
graph->prependNode(graph->createTuple(converted))->output();
|
||||
g_input->replaceAllUsesWith(input_tuple);
|
||||
// Erase the original quantized tensor input.
|
||||
graph->eraseInput(index + converted.size());
|
||||
index += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/pytorch/pytorch/wiki/PyTorch-ONNX-exporter#quantized-model-export
|
||||
void UnpackQuantizedWeights(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::map<std::string, IValue>& paramsDict) {
|
||||
std::map<std::string, IValue>& paramsDict,
|
||||
bool caffe2) {
|
||||
std::string qlinear = R"(
|
||||
graph(%input, %packed_weight, %w_scale, %w_zero_point):
|
||||
%r = quantized::linear(%input, %packed_weight, %w_scale, %w_zero_point)
|
||||
|
|
@ -472,31 +614,36 @@ void UnpackQuantizedWeights(
|
|||
paramsDict,
|
||||
qlinear,
|
||||
"quantized::linear_unpack",
|
||||
QuantizedParamsType::LINEAR);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv2d,
|
||||
"quantized::conv2d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv2d_relu,
|
||||
"quantized::conv2d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv3d,
|
||||
"quantized::conv3d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv3d_relu,
|
||||
"quantized::conv3d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
QuantizedParamsType::LINEAR,
|
||||
caffe2);
|
||||
if (caffe2) {
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv2d,
|
||||
"quantized::conv2d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv2d_relu,
|
||||
"quantized::conv2d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv3d,
|
||||
"quantized::conv3d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
unpackQuantizedWeightsHelper(
|
||||
graph,
|
||||
paramsDict,
|
||||
qconv3d_relu,
|
||||
"quantized::conv3d_unpack",
|
||||
QuantizedParamsType::CONV);
|
||||
} else {
|
||||
UnpackQuantizedTensorInputs(graph);
|
||||
}
|
||||
GRAPH_DUMP("After UnpackQuantizedWeights: ", graph);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <torch/csrc/jit/api/module.h>
|
||||
#include <torch/csrc/jit/ir/ir.h>
|
||||
#include <torch/csrc/onnx/onnx.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
|
@ -10,7 +11,8 @@ namespace jit {
|
|||
|
||||
TORCH_API void UnpackQuantizedWeights(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::map<std::string, IValue>& paramsDict);
|
||||
std::map<std::string, IValue>& paramsDict,
|
||||
bool caffe2);
|
||||
TORCH_API void insertPermutes(
|
||||
std::shared_ptr<Graph>& graph,
|
||||
std::map<std::string, IValue>& paramsDict);
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include <torch/csrc/jit/passes/tensorexpr_fuser.h>
|
||||
#include <torch/csrc/jit/passes/utils/subgraph_utils.h>
|
||||
#include <torch/csrc/jit/runtime/custom_operator.h>
|
||||
#include <torch/csrc/jit/runtime/graph_iterator.h>
|
||||
#include <torch/csrc/jit/runtime/jit_trace.h>
|
||||
#include <torch/csrc/jit/runtime/static/impl.h>
|
||||
#include <torch/csrc/jit/runtime/static/ops.h>
|
||||
|
|
@ -322,6 +323,17 @@ void createFusionGroups(Block* block, AliasDb* aliasDb, size_t min_size) {
|
|||
inlineSmallFusionGroups(block, min_size);
|
||||
}
|
||||
|
||||
void inlineFallbackGraphs(std::shared_ptr<Graph> graph) {
|
||||
DepthFirstGraphNodeIterator it(graph);
|
||||
|
||||
Node* n = nullptr;
|
||||
while ((n = it.next()) != nullptr) {
|
||||
if (n->kind() == prim::FallbackGraph) {
|
||||
SubgraphUtils::unmergeSubgraph(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void performTensorExprFusion(
|
||||
std::shared_ptr<Graph> graph,
|
||||
std::vector<IValue> sample_inputs) {
|
||||
|
|
@ -335,6 +347,7 @@ void performTensorExprFusion(
|
|||
/*min_group_size*/ 2,
|
||||
/*add_composed_op*/ false,
|
||||
/*fuse_to_dynamic_shapes*/ true);
|
||||
inlineFallbackGraphs(traced_graph);
|
||||
graph->block()->clear();
|
||||
graph->block()->cloneFrom(traced_graph->block(), nullptr);
|
||||
GRAPH_DUMP("Graph after fusion: ", graph);
|
||||
|
|
|
|||
|
|
@ -157,10 +157,10 @@ void OptimizeGraph(
|
|||
// TODO: we can avoid this guard by moving operations
|
||||
// to exposed folders.
|
||||
#ifdef FBCODE_CAFFE2
|
||||
if (opts.use_copy_variants) {
|
||||
if (opts.use_copy_variants && !opts.enable_tensorexpr_fusion) {
|
||||
ReplaceWithCopy(graph);
|
||||
}
|
||||
if (opts.use_maybe_copy_variants) {
|
||||
if (opts.use_maybe_copy_variants && !opts.enable_tensorexpr_fusion) {
|
||||
ReplaceWithMaybeCopy(graph);
|
||||
}
|
||||
FuseListUnpack(graph);
|
||||
|
|
|
|||
|
|
@ -166,11 +166,18 @@ struct TORCH_API StaticModuleOptions {
|
|||
bool manage_output_tensors{false};
|
||||
// Gates the ReplaceWithCopy pass, which replaces ops that
|
||||
// sometimes alias their outputs with out variants that
|
||||
// always copy (so the output may participate in memory planning)
|
||||
// always copy (so the output may participate in memory planning).
|
||||
// Since replacing with copies is done after TensorExpr fusion, the
|
||||
// resulting graph does not conform to the assumptions made in the fuser.
|
||||
// So, even if this flag is turned on, the ReplaceWithCopy pass will not
|
||||
// be executed if TensorExpr fusion is enabled.
|
||||
bool use_copy_variants{true};
|
||||
// Gates the ReplaceWithMaybeCopy pass, which replaces ops that
|
||||
// sometimes alias their outputs with subgraphs that include an out
|
||||
// variant.
|
||||
// For the same reason as `use_copy_variants`, the ReplaceWithMaybeCopy pass
|
||||
// will not be executed if TensorExpr fusion is enabled, even if this flag
|
||||
// is turned on.
|
||||
bool use_maybe_copy_variants{true};
|
||||
// enable TensorExpr fusion of ops at model loading time
|
||||
bool enable_tensorexpr_fusion{false};
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include <atomic>
|
||||
|
||||
#include <onnx/checker.h>
|
||||
#include <onnx/shape_inference/implementation.h>
|
||||
#include <onnx/onnx_pb.h>
|
||||
#include <onnx/proto_utils.h>
|
||||
|
||||
|
|
@ -1248,13 +1249,18 @@ std::string serialize_model_proto_to_string(
|
|||
return model_proto->SerializeAsString();
|
||||
}
|
||||
|
||||
void check_onnx_proto(const std::string& proto_string) {
|
||||
void check_onnx_proto(const std::string& proto_string, bool full_check) {
|
||||
onnx::ModelProto model;
|
||||
if (!ParseProtoFromBytes(&model, proto_string.c_str(), proto_string.size())) {
|
||||
throw std::runtime_error("Invalid ONNX proto string.");
|
||||
return;
|
||||
}
|
||||
onnx::checker::check_model(model);
|
||||
|
||||
if (full_check) {
|
||||
onnx::shape_inference::InferShapes(model);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ export_onnx(
|
|||
TORCH_API std::string serialize_model_proto_to_string(
|
||||
const std::shared_ptr<::ONNX_NAMESPACE::ModelProto>& model_proto);
|
||||
|
||||
TORCH_API void check_onnx_proto(const std::string& proto_string);
|
||||
TORCH_API void check_onnx_proto(const std::string& proto_string, bool full_check=false);
|
||||
|
||||
// Serializer for both oldsyle and unified format TorchScript serialization
|
||||
class TORCH_API ScriptModuleSerializer {
|
||||
|
|
@ -85,9 +85,6 @@ class TORCH_API ScriptModuleSerializer {
|
|||
void convertNamedType(const c10::NamedTypePtr& class_type);
|
||||
void convertTypes(const at::NamedTypePtr& root_type);
|
||||
void writeExtraFiles(const Module& module, const ExtraFilesMap& extra_files);
|
||||
void writeMobileMetadata(
|
||||
const Module& module,
|
||||
const ExtraFilesMap& extra_files);
|
||||
void writeByteCode(const Module& module, bool save_mobile_debug_info);
|
||||
void writeArchive(
|
||||
const IValue& value,
|
||||
|
|
|
|||
|
|
@ -946,6 +946,10 @@ std::shared_ptr<LazyGraphExecutor::Async> LazyGraphExecutor::
|
|||
VLOG(3) << "Executing IR graph hash " << HashToString(hash)
|
||||
<< " on device " << async->device << " done!";
|
||||
|
||||
TORCH_CHECK(async->tensors_data.size() == results.size(),
|
||||
"Expected number of outputs does not match TorchScript Stack size: ",
|
||||
async->tensors_data.size(), " != ", results.size());
|
||||
|
||||
for (const auto i : c10::irange(results.size())) {
|
||||
if (async->tensors_data[i] != nullptr) {
|
||||
async->tensors_data[i]->Assign(*results[i]);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <c10/core/ScalarType.h>
|
||||
#include <c10/core/impl/DeviceGuardImplInterface.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/csrc/lazy/core/tensor_util.h>
|
||||
|
||||
namespace torch {
|
||||
|
|
@ -144,7 +145,7 @@ void LTCTensorImpl::setup_size_properties() {
|
|||
// We can't call empty_tensor_restride(c10::MemoryFormat::Contiguous) given we override sizes() too.
|
||||
std::vector<int64_t> updated_strides;
|
||||
updated_strides = ComputeArrayStrides(shape.Get().sizes());
|
||||
for (int i = 0; i < updated_strides.size(); i++) {
|
||||
for (const auto i : c10::irange(updated_strides.size())) {
|
||||
sizes_and_strides_.stride_at_unchecked(i) = updated_strides[i];
|
||||
}
|
||||
generation_ = generation;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <c10/util/irange.h>
|
||||
#include <torch/csrc/lazy/core/view_ops/squeeze.h>
|
||||
#include <torch/csrc/lazy/ts_backend/ts_lowering_context.h>
|
||||
|
||||
|
|
@ -9,7 +10,7 @@ namespace lazy {
|
|||
std::vector<int64_t> BuildSqueezedDimensions(c10::ArrayRef<int64_t> dimensions,
|
||||
int64_t squeeze_dim) {
|
||||
std::vector<int64_t> output_dimensions;
|
||||
for (int64_t i = 0; i < dimensions.size(); ++i) {
|
||||
for (const auto i : c10::irange(dimensions.size())) {
|
||||
int64_t dim = dimensions[i];
|
||||
if (dim != 1 || (i != squeeze_dim && squeeze_dim >= 0)) {
|
||||
output_dimensions.push_back(dim);
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user