mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[PyTorch] Add codegen unboxing ability (#69881)
Summary: RFC: https://github.com/pytorch/rfcs/pull/40 This PR (re)introduces python codegen for unboxing wrappers. Given an entry of `native_functions.yaml` the codegen should be able to generate the corresponding C++ code to convert ivalues from the stack to their proper types. To trigger the codegen, run ``` tools/jit/gen_unboxing.py -d cg/torch/share/ATen ``` Merged changes on CI test. In https://github.com/pytorch/pytorch/issues/71782 I added an e2e test for static dispatch + codegen unboxing. The test exports a mobile model of mobilenetv2, load and run it on a new binary for lite interpreter: `test/mobile/custom_build/lite_predictor.cpp`. ## Lite predictor build specifics 1. Codegen: `gen.py` generates `RegisterCPU.cpp` and `RegisterSchema.cpp`. Now with this PR, once `static_dispatch` mode is enabled, `gen.py` will not generate `TORCH_LIBRARY` API calls in those cpp files, hence avoids interaction with the dispatcher. Once `USE_LIGHTWEIGHT_DISPATCH` is turned on, `cmake/Codegen.cmake` calls `gen_unboxing.py` which generates `UnboxingFunctions.h`, `UnboxingFunctions_[0-4].cpp` and `RegisterCodegenUnboxedKernels_[0-4].cpp`. 2. Build: `USE_LIGHTWEIGHT_DISPATCH` adds generated sources into `all_cpu_cpp` in `aten/src/ATen/CMakeLists.txt`. All other files remain unchanged. In reality all the `Operators_[0-4].cpp` are not necessary but we can rely on linker to strip them off. ## Current CI job test coverage update Created a new CI job `linux-xenial-py3-clang5-mobile-lightweight-dispatch-build` that enables the following build options: * `USE_LIGHTWEIGHT_DISPATCH=1` * `BUILD_LITE_INTERPRETER=1` * `STATIC_DISPATCH_BACKEND=CPU` This job triggers `test/mobile/lightweight_dispatch/build.sh` and builds `libtorch`. Then the script runs C++ tests written in `test_lightweight_dispatch.cpp` and `test_codegen_unboxing.cpp`. Recent commits added tests to cover as many C++ argument type as possible: in `build.sh` we installed PyTorch Python API so that we can export test models in `tests_setup.py`. Then we run C++ test binary to run these models on lightweight dispatch enabled runtime. Pull Request resolved: https://github.com/pytorch/pytorch/pull/69881 Reviewed By: iseeyuan Differential Revision: D33692299 Pulled By: larryliu0820 fbshipit-source-id: 211e59f2364100703359b4a3d2ab48ca5155a023 (cherry picked from commit 58e1c9a25e3d1b5b656282cf3ac2f548d98d530b)
This commit is contained in:
parent
6396547f9e
commit
9ce9803abe
9
.github/generated-ciflow-ruleset.json
generated
vendored
9
.github/generated-ciflow-ruleset.json
generated
vendored
|
|
@ -26,6 +26,7 @@
|
|||
"linux-xenial-py3.7-clang7-asan",
|
||||
"linux-xenial-py3.7-clang7-onnx",
|
||||
"linux-xenial-py3.7-gcc5.4",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
"linux-xenial-py3.7-gcc7",
|
||||
"linux-xenial-py3.7-gcc7-no-ops",
|
||||
"macos-10-15-py3-arm64",
|
||||
|
|
@ -96,6 +97,7 @@
|
|||
"linux-xenial-py3.7-clang7-asan",
|
||||
"linux-xenial-py3.7-clang7-onnx",
|
||||
"linux-xenial-py3.7-gcc5.4",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
"linux-xenial-py3.7-gcc7",
|
||||
"linux-xenial-py3.7-gcc7-no-ops",
|
||||
"parallelnative-linux-xenial-py3.7-gcc5.4",
|
||||
|
|
@ -134,6 +136,7 @@
|
|||
"linux-xenial-py3.7-clang7-asan",
|
||||
"linux-xenial-py3.7-clang7-onnx",
|
||||
"linux-xenial-py3.7-gcc5.4",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
"linux-xenial-py3.7-gcc7",
|
||||
"linux-xenial-py3.7-gcc7-no-ops",
|
||||
"macos-arm64-binary-conda",
|
||||
|
|
@ -164,6 +167,7 @@
|
|||
"ciflow/libtorch": [
|
||||
"libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
|
||||
"libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
"periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7"
|
||||
],
|
||||
"ciflow/linux": [
|
||||
|
|
@ -184,6 +188,7 @@
|
|||
"linux-xenial-py3.7-clang7-asan",
|
||||
"linux-xenial-py3.7-clang7-onnx",
|
||||
"linux-xenial-py3.7-gcc5.4",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
"linux-xenial-py3.7-gcc7",
|
||||
"linux-xenial-py3.7-gcc7-no-ops",
|
||||
"parallelnative-linux-xenial-py3.7-gcc5.4",
|
||||
|
|
@ -209,7 +214,8 @@
|
|||
],
|
||||
"ciflow/mobile": [
|
||||
"linux-xenial-py3-clang5-mobile-build",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-static"
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-static",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build"
|
||||
],
|
||||
"ciflow/noarch": [
|
||||
"linux-bionic-py3.7-clang9"
|
||||
|
|
@ -262,6 +268,7 @@
|
|||
"linux-xenial-py3.7-clang7-asan",
|
||||
"linux-xenial-py3.7-clang7-onnx",
|
||||
"linux-xenial-py3.7-gcc5.4",
|
||||
"linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
"linux-xenial-py3.7-gcc7",
|
||||
"linux-xenial-py3.7-gcc7-no-ops",
|
||||
"macos-10-15-py3-arm64",
|
||||
|
|
|
|||
11
.github/scripts/generate_ci_workflows.py
vendored
11
.github/scripts/generate_ci_workflows.py
vendored
|
|
@ -527,6 +527,17 @@ LINUX_WORKFLOWS = [
|
|||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.7-gcc5.4",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
build_generates_artifacts=False,
|
||||
exclude_test=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LIBTORCH, LABEL_CIFLOW_CPU},
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-xenial-py3.7-clang7-asan",
|
||||
|
|
|
|||
243
.github/workflows/generated-linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build.yml
generated
vendored
Normal file
243
.github/workflows/generated-linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build.yml
generated
vendored
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
# @generated DO NOT EDIT MANUALLY
|
||||
# Template is at: .github/templates/linux_ci_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
tags:
|
||||
- 'ciflow/all/*'
|
||||
- 'ciflow/cpu/*'
|
||||
- 'ciflow/libtorch/*'
|
||||
- 'ciflow/linux/*'
|
||||
- 'ciflow/mobile/*'
|
||||
- 'ciflow/trunk/*'
|
||||
branches:
|
||||
- master
|
||||
- main
|
||||
- release/*
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build
|
||||
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
|
||||
TORCH_CUDA_ARCH_LIST: 5.2
|
||||
IN_CI: 1
|
||||
IS_GHA: 1
|
||||
# This is used for the phase of adding wheel tests only, will be removed once completed
|
||||
IN_WHEEL_TEST: 1
|
||||
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
|
||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
||||
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PYTORCH_RETRY_TEST_CASES: 1
|
||||
concurrency:
|
||||
group: linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc5.4-mobile-lightweight-dispatch-build-build
|
||||
outputs:
|
||||
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
|
||||
steps:
|
||||
- name: print labels
|
||||
run: echo "${PR_LABELS}"
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Calculate docker image tag
|
||||
id: calculate-tag
|
||||
run: |
|
||||
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
|
||||
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "::set-output name=docker_tag::${DOCKER_TAG}"
|
||||
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
|
||||
- name: Check if image should be built
|
||||
id: check
|
||||
env:
|
||||
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
|
||||
run: |
|
||||
set -x
|
||||
# Check if image already exists, if it does then skip building it
|
||||
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
|
||||
exit 0
|
||||
fi
|
||||
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
|
||||
# if we're on the base branch then use the parent commit
|
||||
MERGE_BASE=$(git rev-parse HEAD~)
|
||||
else
|
||||
# otherwise we're on a PR, so use the most recent base commit
|
||||
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
|
||||
fi
|
||||
# Covers the case where a previous tag doesn't exist for the tree
|
||||
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
|
||||
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
|
||||
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
|
||||
exit 1
|
||||
fi
|
||||
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
|
||||
# If no image exists but the hash is the same as the previous hash then we should error out here
|
||||
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
|
||||
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
|
||||
echo " contact the PyTorch team to restore the original images"
|
||||
exit 1
|
||||
fi
|
||||
echo ::set-output name=rebuild::yes
|
||||
- name: Build and push docker image
|
||||
if: ${{ steps.check.outputs.rebuild }}
|
||||
env:
|
||||
DOCKER_SKIP_S3_UPLOAD: 1
|
||||
working-directory: .circleci/docker
|
||||
run: |
|
||||
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
|
||||
./build_docker.sh
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Parse ref
|
||||
shell: bash
|
||||
id: parse-ref
|
||||
run: ./.github/scripts/parse_ref.py
|
||||
- name: Build
|
||||
env:
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
run: |
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e JOB_BASE_NAME \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IS_GHA \
|
||||
-e PR_NUMBER \
|
||||
-e SHA1 \
|
||||
-e BRANCH \
|
||||
-e GITHUB_RUN_ID \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e TORCH_CUDA_ARCH_LIST \
|
||||
-e PR_LABELS \
|
||||
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--detach \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
|
||||
- name: Display and upload binary build size statistics (Click Me)
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
run: |
|
||||
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
|
||||
export COMMIT_TIME
|
||||
pip3 install requests==2.26 boto3==1.16.34
|
||||
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
|
@ -26,6 +26,8 @@ retry pip install --pre torch torchvision \
|
|||
# binary, and running forward pass with a real model.
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
|
||||
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
|
||||
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-lightweight-dispatch* ]]; then
|
||||
test/mobile/lightweight_dispatch/build.sh
|
||||
else
|
||||
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -570,6 +570,8 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
|
|||
elif [[ "${BUILD_ENVIRONMENT}" == *distributed* || "${JOB_BASE_NAME}" == *distributed* ]]; then
|
||||
test_distributed
|
||||
test_rpc
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then
|
||||
test_libtorch
|
||||
elif [[ "${TEST_CONFIG}" = docs_test ]]; then
|
||||
test_docs_test
|
||||
else
|
||||
|
|
|
|||
|
|
@ -435,8 +435,14 @@ else()
|
|||
endif()
|
||||
set(SELECTED_OP_LIST "" CACHE STRING
|
||||
"Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default.")
|
||||
set(STATIC_DISPATCH_BACKEND "" CACHE STRING
|
||||
"Name of the backend for which static dispatch code is generated, e.g.: CPU.")
|
||||
option(
|
||||
STATIC_DISPATCH_BACKEND
|
||||
"Name of the backend for which static dispatch code is generated, e.g.: CPU."
|
||||
"")
|
||||
option(USE_LIGHTWEIGHT_DISPATCH "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly." OFF)
|
||||
if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
|
||||
message(FATAL_ERROR "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.")
|
||||
endif()
|
||||
option(
|
||||
TRACING_BASED
|
||||
"Master flag to build Lite Interpreter with tracing build option"
|
||||
|
|
|
|||
|
|
@ -162,6 +162,9 @@ else()
|
|||
)
|
||||
endif()
|
||||
|
||||
if(USE_LIGHTWEIGHT_DISPATCH)
|
||||
set(all_cpu_cpp ${all_cpu_cpp} ${generated_unboxing_sources})
|
||||
endif()
|
||||
if(AT_MKL_ENABLED)
|
||||
set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
|
||||
endif()
|
||||
|
|
|
|||
41
aten/src/ATen/templates/RegisterCodegenUnboxedKernels.cpp
Normal file
41
aten/src/ATen/templates/RegisterCodegenUnboxedKernels.cpp
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#include <torch/csrc/jit/runtime/operator.h>
|
||||
#include <torch/csrc/jit/runtime/custom_operator.h>
|
||||
#include <torch/csrc/jit/runtime/register_ops_utils.h>
|
||||
|
||||
#include <ATen/UnboxingFunctions.h>
|
||||
|
||||
// ${generated_comment}
|
||||
|
||||
// NOTE [Sharded File]: This file is generated in a sharded fashion to speed up
|
||||
// incremental rebuilds. See the comment at the top of
|
||||
// templates/VariableType.cpp for an analogous, in-depth discussion.
|
||||
//
|
||||
// Generated by tools/jit/gen_unboxing.py. This file registers all ATen ops into JIT op registry instead of c10
|
||||
// dispatcher. JIT op registry only takes boxed kernels, so we are calling unboxing functions in UnboxingFunctions.h
|
||||
// to cast arguments into C++ types (instead of IValue) and delegate to unboxed kernels.
|
||||
|
||||
namespace torch { namespace jit {
|
||||
|
||||
using autograd::Variable;
|
||||
using autograd::variable_list;
|
||||
using at::Scalar;
|
||||
using at::ScalarType;
|
||||
using at::Tensor;
|
||||
using at::TensorOptions;
|
||||
using at::DeviceGuard;
|
||||
|
||||
using ::c10::fmap;
|
||||
using ::c10::filter;
|
||||
|
||||
namespace {
|
||||
|
||||
RegisterOperators reg({
|
||||
|
||||
// Generated operators
|
||||
${unboxed_ops}
|
||||
});
|
||||
|
||||
} // anon namespace
|
||||
|
||||
|
||||
}} // namespace torch::jit
|
||||
35
aten/src/ATen/templates/UnboxingFunctions.cpp
Normal file
35
aten/src/ATen/templates/UnboxingFunctions.cpp
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#include <ATen/UnboxingFunctions.h>
|
||||
#include <ATen/Functions.h>
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/core/functional.h>
|
||||
#include <ATen/core/interned_strings.h>
|
||||
#include <ATen/core/ivalue.h>
|
||||
#include <ATen/core/stack.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
namespace at {
|
||||
namespace unboxing {
|
||||
|
||||
using ::c10::fmap;
|
||||
using ::c10::filter;
|
||||
using torch::jit::peek;
|
||||
using torch::jit::drop;
|
||||
using torch::jit::pack;
|
||||
using torch::jit::pop;
|
||||
|
||||
// Generated function declaration
|
||||
${definitions}
|
||||
|
||||
} // namespace unboxing
|
||||
} // namespace at
|
||||
32
aten/src/ATen/templates/UnboxingFunctions.h
Normal file
32
aten/src/ATen/templates/UnboxingFunctions.h
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// ${generated_comment}
|
||||
|
||||
// Generated by tools/jit/gen_unboxing.py. This file declares code generated boxed C++ functions for operators,
|
||||
// base off of native_functions.yaml (or similar yaml file with the same syntax). The definition of such a boxed
|
||||
// function will pop out IValues from the stack then convert them into the correct C++ types based on given schema. This
|
||||
// unboxing logic is an alternative to template-based metaprogramming unboxing.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
namespace at {
|
||||
namespace unboxing {
|
||||
namespace {
|
||||
|
||||
template<typename T, size_t N>
|
||||
std::array<T, N> as_array(const c10::List<c10::IValue>& list) {
|
||||
std::array<T, N> res;
|
||||
AT_ASSERT(list.size() == N);
|
||||
std::vector<T> vec;
|
||||
for (c10::IValue elem : list) {
|
||||
vec.push_back(elem.to<T>());
|
||||
}
|
||||
std::copy(vec.begin(), vec.end(), res.begin());
|
||||
return res;
|
||||
}
|
||||
} // namespace <anonymous>
|
||||
using Stack = std::vector<c10::IValue>;
|
||||
// Generated function declaration
|
||||
${declarations}
|
||||
|
||||
} // namespace unboxing
|
||||
} // namespace at
|
||||
|
|
@ -1124,13 +1124,16 @@ endif()
|
|||
DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch)
|
||||
endif()
|
||||
|
||||
|
||||
if(BUILD_TEST)
|
||||
if(BUILD_LITE_INTERPRETER)
|
||||
add_subdirectory(
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime
|
||||
${CMAKE_BINARY_DIR}/test_lite_interpreter_runtime
|
||||
)
|
||||
add_subdirectory(
|
||||
${TORCH_ROOT}/test/mobile/lightweight_dispatch
|
||||
${CMAKE_BINARY_DIR}/test_codegen_unboxing
|
||||
)
|
||||
else()
|
||||
add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit)
|
||||
add_subdirectory(
|
||||
|
|
|
|||
|
|
@ -103,6 +103,44 @@ if(INTERN_BUILD_ATEN_OPS)
|
|||
--static_dispatch_backend ${STATIC_DISPATCH_BACKEND})
|
||||
endif()
|
||||
|
||||
# Codegen unboxing
|
||||
if(USE_LIGHTWEIGHT_DISPATCH)
|
||||
file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration)
|
||||
set(GEN_UNBOXING_COMMAND
|
||||
"${PYTHON_EXECUTABLE}" -m tools.jit.gen_unboxing
|
||||
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
|
||||
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
|
||||
)
|
||||
set("GEN_UNBOXING_COMMAND_sources"
|
||||
${GEN_UNBOXING_COMMAND}
|
||||
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
||||
)
|
||||
message(STATUS "Generating sources for lightweight dispatch")
|
||||
execute_process(
|
||||
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
|
||||
RESULT_VARIABLE RETURN_VALUE
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
if(NOT RETURN_VALUE EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to get generated_unboxing_sources list")
|
||||
endif()
|
||||
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
|
||||
add_custom_command(
|
||||
COMMENT "Generating ATen unboxing sources"
|
||||
OUTPUT
|
||||
${generated_unboxing_sources}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
||||
COMMAND ${GEN_UNBOXING_COMMAND_sources}
|
||||
DEPENDS ${all_unboxing_script} ${sources_templates}
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
else() # Otherwise do not generate or include sources into build.
|
||||
set(generated_unboxing_sources "")
|
||||
endif()
|
||||
|
||||
set(GEN_PER_OPERATOR_FLAG)
|
||||
if(USE_PER_OPERATOR_HEADERS)
|
||||
list(APPEND GEN_PER_OPERATOR_FLAG "--per-operator-headers")
|
||||
|
|
@ -182,7 +220,7 @@ if(INTERN_BUILD_ATEN_OPS)
|
|||
add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS
|
||||
${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers}
|
||||
${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources}
|
||||
${generated_declarations_yaml})
|
||||
${generated_declarations_yaml} ${generated_unboxing_sources})
|
||||
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS
|
||||
${cuda_generated_headers} ${cuda_generated_sources})
|
||||
add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE)
|
||||
|
|
|
|||
|
|
@ -117,6 +117,13 @@ if [ "${TRACING_BASED}" == 1 ]; then
|
|||
else
|
||||
CMAKE_ARGS+=("-DTRACING_BASED=OFF")
|
||||
fi
|
||||
if [ "${USE_LIGHTWEIGHT_DISPATCH}" == 1 ]; then
|
||||
CMAKE_ARGS+=("-DUSE_LIGHTWEIGHT_DISPATCH=ON")
|
||||
CMAKE_ARGS+=("-DSTATIC_DISPATCH_BACKEND=CPU")
|
||||
else
|
||||
CMAKE_ARGS+=("-DUSE_LIGHTWEIGHT_DISPATCH=OFF")
|
||||
fi
|
||||
|
||||
CMAKE_ARGS+=("-DBUILD_MOBILE_BENCHMARK=$BUILD_MOBILE_BENCHMARK")
|
||||
CMAKE_ARGS+=("-DBUILD_MOBILE_TEST=$BUILD_MOBILE_TEST")
|
||||
CMAKE_ARGS+=("-DBUILD_PYTHON=OFF")
|
||||
|
|
|
|||
|
|
@ -88,6 +88,12 @@ if [ "${TRACING_BASED}" == 1 ]; then
|
|||
else
|
||||
CMAKE_ARGS+=("-DTRACING_BASED=OFF")
|
||||
fi
|
||||
if [ "${USE_LIGHTWEIGHT_DISPATCH}" == 1 ]; then
|
||||
CMAKE_ARGS+=("-DUSE_LIGHTWEIGHT_DISPATCH=ON")
|
||||
CMAKE_ARGS+=("-DSTATIC_DISPATCH_BACKEND=CPU")
|
||||
else
|
||||
CMAKE_ARGS+=("-DUSE_LIGHTWEIGHT_DISPATCH=OFF")
|
||||
fi
|
||||
|
||||
CMAKE_ARGS+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
|
||||
|
||||
|
|
|
|||
4
setup.py
4
setup.py
|
|
@ -506,6 +506,10 @@ class build_ext(setuptools.command.build_ext.build_ext):
|
|||
report(' -- USE_MPI={}'.format(cmake_cache_vars['USE_OPENMPI']))
|
||||
else:
|
||||
report('-- Building without distributed package')
|
||||
if cmake_cache_vars['STATIC_DISPATCH_BACKEND']:
|
||||
report('-- Using static dispatch with backend {}'.format(cmake_cache_vars['STATIC_DISPATCH_BACKEND']))
|
||||
if cmake_cache_vars['USE_LIGHTWEIGHT_DISPATCH']:
|
||||
report('-- Using lightweight dispatch')
|
||||
|
||||
# Do not use clang to compile extensions if `-fstack-clash-protection` is defined
|
||||
# in system CFLAGS
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ target_include_directories(
|
|||
|
||||
target_link_libraries(test_lite_interpreter_runtime PRIVATE torch gtest backend_with_compiler_runtime)
|
||||
|
||||
if(LINUX)
|
||||
target_link_libraries(test_lite_interpreter_runtime PRIVATE "-Wl,--no-as-needed,$<TARGET_FILE:backend_with_compiler_runtime>,--as-needed")
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS test_lite_interpreter_runtime DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
|
|
|
|||
23
test/mobile/lightweight_dispatch/CMakeLists.txt
Normal file
23
test/mobile/lightweight_dispatch/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
|
||||
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../..)
|
||||
set(TEST_ROOT ${TORCH_ROOT}/test/mobile/lightweight_dispatch)
|
||||
|
||||
add_executable(test_codegen_unboxing
|
||||
${TEST_ROOT}/test_lightweight_dispatch.cpp
|
||||
${TEST_ROOT}/test_codegen_unboxing.cpp
|
||||
)
|
||||
|
||||
target_include_directories(test_codegen_unboxing PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
target_compile_definitions(test_codegen_unboxing PRIVATE USE_GTEST)
|
||||
|
||||
set(TEST_UNBOXING_DEPENDENCIES torch gtest)
|
||||
|
||||
target_link_libraries(test_codegen_unboxing PRIVATE
|
||||
${TEST_UNBOXING_DEPENDENCIES}
|
||||
)
|
||||
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS test_codegen_unboxing DESTINATION bin)
|
||||
endif()
|
||||
55
test/mobile/lightweight_dispatch/build.sh
Executable file
55
test/mobile/lightweight_dispatch/build.sh
Executable file
|
|
@ -0,0 +1,55 @@
|
|||
#!/bin/bash
|
||||
# This script should be called from .jenkins/pytorch/build.sh. Assuming we are at pytorch source root directory.
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
set -ex -o pipefail
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
echo "Build lite interpreter with lightweight dispatch."
|
||||
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR=${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-${PWD}/../}
|
||||
mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
|
||||
|
||||
BUILD_LIBTORCH_PY="$PWD/tools/build_libtorch.py"
|
||||
TEST_SRC_ROOT="$PWD/test/mobile/lightweight_dispatch"
|
||||
|
||||
pushd "$CUSTOM_TEST_ARTIFACT_BUILD_DIR"
|
||||
|
||||
# prepare test
|
||||
python "$TEST_SRC_ROOT/tests_setup.py" setup
|
||||
|
||||
export USE_DISTRIBUTED=0
|
||||
export USE_LIGHTWEIGHT_DISPATCH=1
|
||||
export STATIC_DISPATCH_BACKEND="CPU"
|
||||
export BUILD_LITE_INTERPRETER=1
|
||||
|
||||
python "${BUILD_LIBTORCH_PY}"
|
||||
ret=$?
|
||||
|
||||
if [ "$ret" -ne 0 ]; then
|
||||
echo "Lite interpreter build failed!"
|
||||
exit "$ret"
|
||||
fi
|
||||
|
||||
|
||||
# run test
|
||||
if ! build/bin/test_codegen_unboxing; then
|
||||
echo "test_codegen_unboxing has failure!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# shutdown test
|
||||
python "$TEST_SRC_ROOT/tests_setup.py" shutdown
|
||||
|
||||
# run lite interpreter tests
|
||||
if ! build/bin/test_lite_interpreter_runtime; then
|
||||
echo "test_lite_interpreter_runtime has failure!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
popd
|
||||
|
||||
exit 0
|
||||
195
test/mobile/lightweight_dispatch/test_codegen_unboxing.cpp
Normal file
195
test/mobile/lightweight_dispatch/test_codegen_unboxing.cpp
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
#include <gtest/gtest.h>
|
||||
#include <test/cpp/jit/test_utils.h>
|
||||
#include <torch/csrc/jit/api/module.h>
|
||||
#include <torch/csrc/jit/frontend/resolver.h>
|
||||
#include <torch/csrc/jit/mobile/import.h>
|
||||
#include <torch/csrc/jit/mobile/module.h>
|
||||
// Cover codegen'd unboxing logic for these types:
|
||||
//'Device',
|
||||
//'Device?',
|
||||
//'Dimname',
|
||||
//'Dimname[1]',
|
||||
//'Dimname[]',
|
||||
//'Dimname[]?',
|
||||
//'Generator?',
|
||||
//'Layout?',
|
||||
//'MemoryFormat',
|
||||
//'MemoryFormat?',
|
||||
//'Scalar',
|
||||
//'Scalar?',
|
||||
//'ScalarType',
|
||||
//'ScalarType?',
|
||||
//'Scalar[]',
|
||||
//'Storage',
|
||||
//'Stream',
|
||||
//'Tensor',
|
||||
//'Tensor(a!)',
|
||||
//'Tensor(a!)[]',
|
||||
//'Tensor(a)',
|
||||
//'Tensor(b!)',
|
||||
//'Tensor(c!)',
|
||||
//'Tensor(d!)',
|
||||
//'Tensor?',
|
||||
//'Tensor?[]',
|
||||
//'Tensor[]',
|
||||
//'bool',
|
||||
//'bool?',
|
||||
//'bool[2]',
|
||||
//'bool[3]',
|
||||
//'bool[4]',
|
||||
//'float',
|
||||
//'float?',
|
||||
//'float[]?',
|
||||
//'int',
|
||||
//'int?',
|
||||
//'int[1]',
|
||||
//'int[1]?',
|
||||
//'int[2]',
|
||||
//'int[2]?',
|
||||
//'int[3]',
|
||||
//'int[4]',
|
||||
//'int[5]',
|
||||
//'int[6]',
|
||||
//'int[]',
|
||||
//'int[]?',
|
||||
//'str',
|
||||
//'str?'
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
namespace mobile {
|
||||
// covers int[], ScalarType?, Layout?, Device?, bool?
|
||||
TEST(LiteInterpreterTest, Ones) {
|
||||
// Load check in model: ones.ptl
|
||||
auto testModelFile = "ones.ptl";
|
||||
|
||||
// class Model(torch.nn.Module):
|
||||
// def forward(self, x: int):
|
||||
// a = torch.ones([3, x], dtype=torch.int64, layout=torch.strided, device="cpu")
|
||||
// return a
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
std::vector<c10::IValue> input{c10::IValue(4)};
|
||||
const auto result = bc.forward(input);
|
||||
ASSERT_EQ(result.toTensor().size(0), 3);
|
||||
ASSERT_EQ(result.toTensor().size(1), 4);
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, Index) {
|
||||
// Load check in model: index.ptl
|
||||
auto testModelFile = "index.ptl";
|
||||
|
||||
// class Model(torch.nn.Module):
|
||||
// def forward(self, index):
|
||||
// a = torch.zeros(2, 2)
|
||||
// a[0][1] = 1
|
||||
// a[1][0] = 2
|
||||
// a[1][1] = 3
|
||||
// return a[index]
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
int64_t ind_1 = 0;
|
||||
|
||||
const auto result_1 = bc.forward({at::tensor(ind_1)});
|
||||
|
||||
at::Tensor expected = at::empty({1, 2}, c10::TensorOptions(c10::ScalarType::Float));
|
||||
expected[0][0] = 0;
|
||||
expected[0][1] = 1;
|
||||
|
||||
AT_ASSERT(result_1.toTensor().equal(expected));
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, Gradient) {
|
||||
// Load check in model: gradient.ptl
|
||||
auto testModelFile = "gradient.ptl";
|
||||
|
||||
// class Model(torch.nn.Module):
|
||||
// def forward(self, a: int):
|
||||
// values = torch.tensor([4., 1., 1., 16.], )
|
||||
// if a == 0:
|
||||
// return torch.gradient(values, spacing=torch.scalar_tensor(2., dtype=torch.float64))
|
||||
// elif a == 1:
|
||||
// return torch.gradient(values, spacing=[torch.tensor(1.).item()])
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
|
||||
const auto result_1 = bc.forward({0});
|
||||
at::Tensor expected_1 = at::tensor({-1.5, -0.75, 3.75, 7.5}, c10::TensorOptions(c10::ScalarType::Float));
|
||||
AT_ASSERT(result_1.toList().get(0).toTensor().equal(expected_1));
|
||||
|
||||
const auto result_2 = bc.forward({1});
|
||||
at::Tensor expected_2 = at::tensor({-3.0, -1.5, 7.5, 15.0}, c10::TensorOptions(c10::ScalarType::Float));
|
||||
AT_ASSERT(result_2.toList().get(0).toTensor().equal(expected_2));
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, Upsample) {
|
||||
// Load check in model: upsample.ptl
|
||||
auto testModelFile = "upsample.ptl";
|
||||
|
||||
// model = torch.nn.Upsample(scale_factor=(2.0,), mode="linear")
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
|
||||
const auto result_1 = bc.forward({at::ones({1, 2, 3})});
|
||||
at::Tensor expected_1 = at::ones({1, 2, 6}, c10::TensorOptions(c10::ScalarType::Float));
|
||||
AT_ASSERT(result_1.toTensor().equal(expected_1));
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, IndexTensor) {
|
||||
// Load check in model: Index_Tensor.ptl
|
||||
auto testModelFile = "index_Tensor.ptl";
|
||||
|
||||
// class Model(torch.nn.Module):
|
||||
// def forward(self, index):
|
||||
// values = torch.tensor([4., 1., 1., 16.], )
|
||||
// return values[[index, torch.tensor(0)]]
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
const auto result_1 = bc.forward({at::tensor({1}, c10::TensorOptions(c10::ScalarType::Long))});
|
||||
|
||||
at::Tensor expected_1 = at::tensor({1.}, c10::TensorOptions(c10::ScalarType::Float));
|
||||
AT_ASSERT(result_1.toTensor().equal(expected_1));
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, Conv2d) {
|
||||
// Load check in model: conv2d.ptl
|
||||
auto testModelFile = "conv2d.ptl";
|
||||
|
||||
// model = torch.nn.Conv2d(1, 2, (2, 2), stride=(1, 1), padding=(1, 1))
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
const auto result_1 = bc.forward({at::ones({1, 1, 1, 1})});
|
||||
|
||||
ASSERT_EQ(result_1.toTensor().sizes(), c10::IntArrayRef ({1,2,2,2}));
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, AddTensor) {
|
||||
// Load check in model: add_Tensor.ptl
|
||||
auto testModelFile = "add_Tensor.ptl";
|
||||
|
||||
// class Model(torch.nn.Module):
|
||||
// def forward(self, a):
|
||||
// values = torch.ones(size=[2, 3], names=['N', 'C'])
|
||||
// values[0][0] = a[0]
|
||||
// return values
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
const auto result_1 = bc.forward({at::tensor({1, 2, 3}, c10::TensorOptions(c10::ScalarType::Long))});
|
||||
|
||||
at::Tensor expected_1 = at::tensor({2, 3, 4}, c10::TensorOptions(c10::ScalarType::Long));
|
||||
AT_ASSERT(result_1.toTensor().equal(expected_1));
|
||||
}
|
||||
|
||||
TEST(LiteInterpreterTest, DivideTensor) {
|
||||
// Load check in model: add_Tensor.ptl
|
||||
auto testModelFile = "divide_Tensor.ptl";
|
||||
|
||||
// class Model(torch.nn.Module):
|
||||
// def forward(self, b):
|
||||
// a = torch.tensor(3, dtype=torch.int64)
|
||||
// out = torch.empty(size=[1], dtype=torch.float)
|
||||
// torch.div(b, a, out=out)
|
||||
// return [torch.div(b, a, rounding_mode='trunc'), out]
|
||||
Module bc = _load_for_mobile(testModelFile);
|
||||
const auto result_1 = bc.forward({at::tensor({-12}, c10::TensorOptions(c10::ScalarType::Long))});
|
||||
|
||||
at::Tensor expected_1 = at::tensor({-4}, c10::TensorOptions(c10::ScalarType::Long));
|
||||
at::Tensor expected_2 = at::tensor({-4.}, c10::TensorOptions(c10::ScalarType::Float));
|
||||
AT_ASSERT(result_1.toList().get(0).toTensor().equal(expected_1));
|
||||
AT_ASSERT(result_1.toList().get(1).toTensor().equal(expected_2));
|
||||
}
|
||||
} // namespace mobile
|
||||
} // namespace jit
|
||||
} // namespace torch
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#include <gtest/gtest.h>
|
||||
|
||||
std::string add_negative_flag(const std::string& flag) {
|
||||
std::string filter = ::testing::GTEST_FLAG(filter);
|
||||
if (filter.find('-') == std::string::npos) {
|
||||
filter.push_back('-');
|
||||
} else {
|
||||
filter.push_back(':');
|
||||
}
|
||||
filter += flag;
|
||||
return filter;
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
::testing::GTEST_FLAG(filter) = add_negative_flag("*_CUDA:*_MultiCUDA");
|
||||
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
180
test/mobile/lightweight_dispatch/tests_setup.py
Normal file
180
test/mobile/lightweight_dispatch/tests_setup.py
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
class Setup(object):
|
||||
def setup(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def shutdown(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class FileSetup(object):
|
||||
path = None
|
||||
|
||||
def shutdown(self):
|
||||
if os.path.exists(self.path):
|
||||
os.remove(self.path)
|
||||
pass
|
||||
|
||||
|
||||
class ModelWithDTypeDeviceLayoutPinMemory(FileSetup):
|
||||
path = 'ones.ptl'
|
||||
|
||||
def setup(self):
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, x: int):
|
||||
a = torch.ones(size=[3, x], dtype=torch.int64, layout=torch.strided, device="cpu", pin_memory=False)
|
||||
return a
|
||||
|
||||
model = Model()
|
||||
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
class ModelWithTensorOptional(FileSetup):
|
||||
path = 'index.ptl'
|
||||
|
||||
def setup(self):
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, index):
|
||||
a = torch.zeros(2, 2)
|
||||
a[0][1] = 1
|
||||
a[1][0] = 2
|
||||
a[1][1] = 3
|
||||
return a[index]
|
||||
|
||||
model = Model()
|
||||
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
# gradient.scalarrayint(Tensor self, *, Scalar[] spacing, int? dim=None, int edge_order=1) -> Tensor[]
|
||||
class ModelWithScalarList(FileSetup):
|
||||
path = 'gradient.ptl'
|
||||
|
||||
def setup(self):
|
||||
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, a: int):
|
||||
values = torch.tensor([4., 1., 1., 16.], )
|
||||
if a == 0:
|
||||
return torch.gradient(values, spacing=torch.scalar_tensor(2., dtype=torch.float64))
|
||||
elif a == 1:
|
||||
return torch.gradient(values, spacing=[torch.tensor(1.).item()])
|
||||
|
||||
model = Model()
|
||||
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
# upsample_linear1d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
|
||||
class ModelWithFloatList(FileSetup):
|
||||
path = 'upsample.ptl'
|
||||
|
||||
def setup(self):
|
||||
model = torch.nn.Upsample(scale_factor=(2.0,), mode="linear", align_corners=False, recompute_scale_factor=True)
|
||||
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
# index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
||||
class ModelWithListOfOptionalTensors(FileSetup):
|
||||
path = 'index_Tensor.ptl'
|
||||
|
||||
def setup(self):
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, index):
|
||||
values = torch.tensor([[4., 1., 1., 16.]])
|
||||
return values[torch.tensor(0), index]
|
||||
|
||||
model = Model()
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
# conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1,
|
||||
# int groups=1) -> Tensor
|
||||
class ModelWithArrayOfInt(FileSetup):
|
||||
path = 'conv2d.ptl'
|
||||
|
||||
def setup(self):
|
||||
model = torch.nn.Conv2d(1, 2, (2, 2), stride=(1, 1), padding=(1, 1))
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
# add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
|
||||
# ones_like(Tensor self, *, ScalarType?, dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None,
|
||||
# MemoryFormat? memory_format=None) -> Tensor
|
||||
class ModelWithTensors(FileSetup):
|
||||
path = 'add_Tensor.ptl'
|
||||
|
||||
def setup(self):
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, a):
|
||||
b = torch.ones_like(a)
|
||||
return a + b
|
||||
model = Model()
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
class ModelWithStringOptional(FileSetup):
|
||||
path = 'divide_Tensor.ptl'
|
||||
|
||||
def setup(self):
|
||||
class Model(torch.nn.Module):
|
||||
def forward(self, b):
|
||||
a = torch.tensor(3, dtype=torch.int64)
|
||||
out = torch.empty(size=[1], dtype=torch.float)
|
||||
torch.div(b, a, out=out)
|
||||
return [torch.div(b, a, rounding_mode='trunc'), out]
|
||||
model = Model()
|
||||
# Script the model and save
|
||||
script_model = torch.jit.script(model)
|
||||
script_model._save_for_lite_interpreter(self.path)
|
||||
|
||||
|
||||
tests = [
|
||||
ModelWithDTypeDeviceLayoutPinMemory(),
|
||||
ModelWithTensorOptional(),
|
||||
ModelWithScalarList(),
|
||||
ModelWithFloatList(),
|
||||
ModelWithListOfOptionalTensors(),
|
||||
ModelWithArrayOfInt(),
|
||||
ModelWithTensors(),
|
||||
ModelWithStringOptional(),
|
||||
]
|
||||
|
||||
|
||||
def setup():
|
||||
for test in tests:
|
||||
test.setup()
|
||||
|
||||
|
||||
def shutdown():
|
||||
for test in tests:
|
||||
test.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
command = sys.argv[1]
|
||||
if command == "setup":
|
||||
setup()
|
||||
elif command == "shutdown":
|
||||
shutdown()
|
||||
|
|
@ -209,7 +209,6 @@ Check this module for more information.
|
|||
return f"c10::impl::check_tensor_options_and_extract_memory_format({options}, {memory_format})"
|
||||
except UnsatError:
|
||||
return memory_format
|
||||
|
||||
elif goal == NamedCType("options", BaseCType(tensorOptionsT)):
|
||||
dtype = direct_solve(NamedCType("dtype", OptionalCType(BaseCType(scalarTypeT))))
|
||||
pin_memory = direct_solve(NamedCType("pin_memory", OptionalCType(BaseCType(boolT))))
|
||||
|
|
|
|||
208
tools/codegen/api/unboxing.py
Normal file
208
tools/codegen/api/unboxing.py
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
from typing import List, Tuple
|
||||
|
||||
from tools.codegen.api import cpp
|
||||
from tools.codegen.api.types import Binding, CType, CppSignatureGroup
|
||||
from tools.codegen.model import (
|
||||
Argument,
|
||||
NativeFunction,
|
||||
Type,
|
||||
BaseType,
|
||||
OptionalType,
|
||||
ListType,
|
||||
BaseTy,
|
||||
)
|
||||
|
||||
# This file generates the code for unboxing wrappers, i.e., the glue logic to unbox a boxed operator and convert the
|
||||
# ivalues from stack to correct arguments to the unboxed kernel, based on corresponding JIT schema. This codegen is
|
||||
# an alternative way to generate unboxing wrappers similar to the existing C++ metaprogramming approach but gets the
|
||||
# job done statically. These generated unboxing wrappers will be useful under the scenario where we need to register
|
||||
# a fixed set of operators known at compile time and thus can save some time in runtime initialization phase.
|
||||
#
|
||||
# Here's an example on how the codegen works:
|
||||
#
|
||||
# - Function Schema (source of truth)
|
||||
#
|
||||
# aten::empty.names(int[] size, *, Dimname[]? names,
|
||||
# ScalarType? dtype=None, Layout? layout=None,
|
||||
# Device? device=None, bool? pin_memory=None,
|
||||
# MemoryFormat? memory_format=None) -> Tensor
|
||||
# - Argument Conversion
|
||||
# Generates C++ code to convert an ivalue (from stack) to its underlying C++ type.
|
||||
# - int[] size
|
||||
# ```cpp
|
||||
# const c10::List<c10::IValue> size_list_in = (std::move(peek(stack, 0, 7))).toList();
|
||||
#
|
||||
# std::vector<int64_t> size_vec;
|
||||
# for (c10::IValue size_elem: size_list_in) {
|
||||
# int64_t size_base = size_elem.to<int64_t>();
|
||||
# size_vec.push_back(size_base);
|
||||
# }
|
||||
# at::ArrayRef<int64_t> size_list_out(size_vec);
|
||||
# ~~~~~~~~~~~~~ <-- The converted argument from ivalues in the stack.
|
||||
# Will be passed to unboxed kernel.
|
||||
# ```
|
||||
# - Dimname[]? names
|
||||
# ```cpp
|
||||
# c10::optional<c10::IValue> names_opt = (std::move(peek(stack, 1, 7))).toOptional<c10::IValue>();
|
||||
# c10::optional<at::ArrayRef<at::Dimname>> names_opt_out;
|
||||
# if (names_opt.has_value()) {
|
||||
# ~~~~~~~~~~~ <-- Unwrapping optional shell
|
||||
# const c10::IValue names_opt_in = names_opt.value();
|
||||
# const c10::List<c10::IValue> names_list_in = names_opt_in.toList();
|
||||
#
|
||||
# std::vector<at::Dimname> names_vec;
|
||||
# for (c10::IValue names_elem: names_list_in) {
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~ <-- Unrolling list, then convert elements one by one.
|
||||
# at::Dimname names_base = names_elem.to<at::Dimname>();
|
||||
# names_vec.push_back(names_base);
|
||||
# }
|
||||
# at::ArrayRef<at::Dimname> names_list_out(names_vec);
|
||||
#
|
||||
# names_opt_out = c10::optional<at::ArrayRef<at::Dimname>>(names_list_out);
|
||||
# } else {
|
||||
# names_opt_out = c10::optional<at::ArrayRef<at::Dimname>>();
|
||||
# }
|
||||
# ```
|
||||
# - ScalarType? dtype (similarly for the rest of the arguments)
|
||||
# ```cpp
|
||||
# c10::optional<c10::IValue> dtype_opt = (std::move(peek(stack, 2, 7))).toOptional<c10::IValue>();
|
||||
# c10::optional<at::ScalarType> dtype_opt_out;
|
||||
# if (dtype_opt.has_value()) {
|
||||
# const c10::IValue dtype_opt_in = dtype_opt.value();
|
||||
# at::ScalarType dtype_base = dtype_opt_in.to<at::ScalarType>();
|
||||
# ~~~~~~~~~~~~~~~~~~~~ <-- For base types, convert ivalue to it
|
||||
# directly using ".to<T>()" API.
|
||||
# dtype_opt_out = c10::optional<at::ScalarType>(dtype_base);
|
||||
# } else {
|
||||
# dtype_opt_out = c10::optional<at::ScalarType>();
|
||||
# }
|
||||
# ```
|
||||
#
|
||||
# - Unboxed Kernel Call
|
||||
# ```cpp
|
||||
# auto result_ = torch::empty(
|
||||
# size_list_out,
|
||||
# names_opt_out,
|
||||
# options,
|
||||
# memory_format_opt_out
|
||||
# );
|
||||
# ```
|
||||
#
|
||||
# - Push Result Back to Stack
|
||||
# ```cpp
|
||||
# drop(stack, 7);
|
||||
# pack(stack, std::move(result_));
|
||||
# ```
|
||||
connector = "\n\t"
|
||||
|
||||
|
||||
# Return unboxing function name for a NativeFunction
|
||||
def name(f: NativeFunction) -> str:
|
||||
return f.func.name.unambiguous_name()
|
||||
|
||||
|
||||
# Convert all the arguments in a NativeFunction to C++ code
|
||||
def convert_arguments(f: NativeFunction) -> Tuple[List[Binding], List[str]]:
|
||||
# we need the 'self' argument so method needs to be False
|
||||
args = CppSignatureGroup.from_native_function(f, method=False).most_faithful_signature().arguments()
|
||||
code_list = [f"c10::IValue {args[i].name} = std::move(peek(stack, {i}, {len(args)}));" for i in
|
||||
range(len(args))] + [""]
|
||||
binding_list = []
|
||||
for i, arg in enumerate(args):
|
||||
# expecting only Argument
|
||||
if not isinstance(arg.argument, Argument):
|
||||
raise Exception(f"Unexpected argument type, expecting `Argument` but got {arg}")
|
||||
argument: Argument = arg.argument
|
||||
unboxed_name, _, code, decl = argumenttype_ivalue_convert(
|
||||
argument.type, argument.name, mutable=argument.is_write
|
||||
)
|
||||
code_list.extend(decl)
|
||||
code_list.extend(code)
|
||||
binding_list.append(arg.with_name(unboxed_name))
|
||||
return binding_list, code_list
|
||||
|
||||
|
||||
# Takes in the type, name and mutability corresponding to an argument, and generates a tuple of:
|
||||
# (1) the C++ code necessary to unbox the argument
|
||||
# (2) A Binding corresponding to the newly created unboxed variable, including variable name and its CType
|
||||
def argumenttype_ivalue_convert(t: Type, arg_name: str, *, mutable: bool = False) -> Tuple[str, CType, List[str], List[str]]:
|
||||
ctype = cpp.argumenttype_type(t=t, mutable=mutable, binds=arg_name).type
|
||||
|
||||
if isinstance(t, BaseType):
|
||||
out_name = f"{arg_name}_base"
|
||||
code, decl = _gen_code_base_type(arg_name=arg_name, out_name=out_name, ctype=ctype)
|
||||
elif isinstance(t, OptionalType):
|
||||
out_name = f"{arg_name}_opt_out"
|
||||
code, decl = _gen_code_optional_type(arg_name=arg_name, out_name=out_name, t=t, ctype=ctype)
|
||||
elif isinstance(t, ListType):
|
||||
out_name = f"{arg_name}_list_out"
|
||||
code, decl = _gen_code_list_type(arg_name=arg_name, out_name=out_name, t=t, ctype=ctype)
|
||||
else:
|
||||
raise Exception(f"Cannot handle type {t}. arg_name: {arg_name}")
|
||||
return out_name, ctype, code, decl
|
||||
|
||||
|
||||
def _gen_code_base_type(arg_name: str, out_name: str, ctype: CType) -> Tuple[List[str], List[str]]:
|
||||
return [f"{ctype.cpp_type(strip_ref=True)} {out_name} = {arg_name}.to<{ctype.cpp_type(strip_ref=True)}>();"], []
|
||||
|
||||
|
||||
def _gen_code_optional_type(arg_name: str, out_name: str, t: OptionalType, ctype: CType) -> Tuple[List[str], List[str]]:
|
||||
in_name = f"{arg_name}_opt_in"
|
||||
res_name, _, res_code, decl = argumenttype_ivalue_convert(t.elem, in_name)
|
||||
return f"""
|
||||
c10::optional<c10::IValue> {arg_name}_opt = {arg_name}.toOptional<c10::IValue>();
|
||||
{ctype.cpp_type(strip_ref=True)} {out_name};
|
||||
if ({arg_name}_opt.has_value()) {{
|
||||
const c10::IValue {in_name} = {arg_name}_opt.value();
|
||||
{connector.join(res_code)}
|
||||
{out_name} = {ctype.cpp_type(strip_ref=True)}({res_name});
|
||||
}} else {{
|
||||
{out_name} = {ctype.cpp_type(strip_ref=True)}();
|
||||
}}
|
||||
""".split("\n"), decl
|
||||
|
||||
|
||||
def _gen_code_list_type(arg_name: str, out_name: str, t: ListType, ctype: CType) -> Tuple[List[str], List[str]]:
|
||||
in_name = f"{arg_name}_list_in"
|
||||
elem_name = f"{arg_name}_elem"
|
||||
code = [f"const c10::List<c10::IValue> {in_name} = {arg_name}.toList();"]
|
||||
res_name, res_ctype, res_code, decl = argumenttype_ivalue_convert(t.elem, elem_name)
|
||||
# handle list type with size, e.g., bool[4]
|
||||
if isinstance(t.elem, BaseType) and t.elem.name == BaseTy.bool and t.size:
|
||||
code.extend(
|
||||
f"""
|
||||
{ctype.cpp_type(strip_ref=True)} {out_name} = as_array<{res_ctype.cpp_type(strip_ref=True)}, {t.size}>({in_name});
|
||||
""".split(
|
||||
"\n"
|
||||
)
|
||||
)
|
||||
# we have to use c10::List for optional element. e.g., Tensor?[] -> c10::List<c10::optional<at::Tensor>>
|
||||
elif isinstance(t.elem, OptionalType):
|
||||
code.extend(
|
||||
f"""
|
||||
{ctype.cpp_type(strip_ref=True)} {out_name};
|
||||
for (c10::IValue {elem_name}: {in_name}) {{
|
||||
{connector.join(res_code)}
|
||||
{out_name}.push_back({res_name});
|
||||
}}
|
||||
""".split(
|
||||
"\n"
|
||||
)
|
||||
)
|
||||
else:
|
||||
# use ArrayRef as default.
|
||||
vec_name = arg_name + "_vec"
|
||||
# need to bring vector instantiation out of scope so that ArrayRef has valid data
|
||||
decl.append(f"std::vector<{res_ctype.cpp_type(strip_ref=True)}> {vec_name};")
|
||||
code.extend(
|
||||
f"""
|
||||
for (c10::IValue {elem_name}: {in_name}) {{
|
||||
{connector.join(res_code)}
|
||||
{vec_name}.push_back({res_name});
|
||||
}}
|
||||
{ctype.cpp_type(strip_ref=True)} {out_name}({vec_name});
|
||||
""".split(
|
||||
"\n"
|
||||
)
|
||||
)
|
||||
return code, decl
|
||||
|
|
@ -28,7 +28,7 @@ import tools.codegen.api.structured as structured
|
|||
from tools.codegen.api.translate import translate
|
||||
from tools.codegen.selective_build.selector import SelectiveBuilder
|
||||
from tools.codegen.utils import (
|
||||
Target, concatMap, context, mapMaybe, YamlDumper, YamlLoader, FileManager, assert_never
|
||||
Target, concatMap, context, mapMaybe, YamlDumper, YamlLoader, FileManager, assert_never, make_file_manager
|
||||
)
|
||||
from tools.codegen.context import (method_with_native_function,
|
||||
native_function_manager,
|
||||
|
|
@ -250,8 +250,8 @@ def static_dispatch_extra_headers(backend: Optional[BackendIndex], skip_tensor_i
|
|||
|
||||
|
||||
def static_dispatch(
|
||||
f: NativeFunction, cpp_sig: CppSignature,
|
||||
*, method: bool, backend_index: Optional[BackendIndex]
|
||||
f: NativeFunction, cpp_sig: CppSignature,
|
||||
*, method: bool, backend_index: Optional[BackendIndex]
|
||||
) -> Optional[str]:
|
||||
if backend_index is None or f.manual_kernel_registration:
|
||||
return None
|
||||
|
|
@ -369,7 +369,7 @@ static C10_NOINLINE c10::TypedOperatorHandle<{name}::schema> create_{name}_typed
|
|||
assert_never(self.target)
|
||||
|
||||
|
||||
# Generates Function.h, which provides the functional public C++ API,
|
||||
# Generates Functions.h, which provides the functional public C++ API,
|
||||
# and the scaffolding to call into the dispatcher from these functions.
|
||||
@dataclass(frozen=True)
|
||||
class ComputeFunction:
|
||||
|
|
@ -952,7 +952,8 @@ def compute_registration_declarations(f: NativeFunction, backend_indices: Dict[D
|
|||
comment_data : Dict[str, str] = {
|
||||
'schema': f'aten::{f.func}',
|
||||
# TODO: What exactly is the semantics of the 'dispatch' field?
|
||||
'dispatch': str({k for k, v in backend_indices.items() if v.has_kernel(f)} != {DispatchKey.CompositeImplicitAutograd}),
|
||||
'dispatch': str(
|
||||
{k for k, v in backend_indices.items() if v.has_kernel(f)} != {DispatchKey.CompositeImplicitAutograd}),
|
||||
'default': str(f.has_composite_kernel or has_autogenerated_composite_kernel(f))
|
||||
}
|
||||
return f"""{returns_type} {name}({args_str}); // {json.dumps(comment_data)}
|
||||
|
|
@ -1350,7 +1351,6 @@ def gen_source_files(
|
|||
native_functions: Sequence[NativeFunction],
|
||||
grouped_native_functions: Sequence[Union[NativeFunction, NativeFunctionsGroup]],
|
||||
structured_native_functions: Sequence[NativeFunctionsGroup],
|
||||
static_dispatch_idx: Optional[BackendIndex],
|
||||
selector: SelectiveBuilder,
|
||||
backend_indices: Dict[DispatchKey, BackendIndex],
|
||||
core_fm: FileManager,
|
||||
|
|
@ -1362,6 +1362,7 @@ def gen_source_files(
|
|||
rocm: bool,
|
||||
force_schema_registration: bool,
|
||||
per_operator_headers: bool,
|
||||
skip_dispatcher_op_registration: bool,
|
||||
) -> None:
|
||||
extra_cuda_headers = '''\
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
|
|
@ -1446,7 +1447,7 @@ def gen_source_files(
|
|||
class_method_name=None),
|
||||
grouped_native_functions
|
||||
)),
|
||||
'dispatch_registrations': list(concatMap(
|
||||
'dispatch_registrations': [] if skip_dispatcher_op_registration else list(concatMap(
|
||||
dest.RegisterDispatchKey(
|
||||
backend_index,
|
||||
Target.REGISTRATION,
|
||||
|
|
@ -1507,7 +1508,8 @@ def gen_source_files(
|
|||
if force_schema_registration:
|
||||
schema_selector = SelectiveBuilder.get_nop_selector()
|
||||
cpu_fm.write('RegisterSchema.cpp', lambda: {
|
||||
'schema_registrations': list(mapMaybe(RegisterSchema(schema_selector), native_functions)),
|
||||
'schema_registrations': [] if skip_dispatcher_op_registration
|
||||
else list(mapMaybe(RegisterSchema(schema_selector), native_functions)),
|
||||
})
|
||||
|
||||
def key_func(fn: Union[NativeFunction, NativeFunctionsGroup]) -> str:
|
||||
|
|
@ -1630,6 +1632,10 @@ def main() -> None:
|
|||
parser.add_argument(
|
||||
'--static_dispatch_backend',
|
||||
help='generate static dispatch code for the specific backend (if set)')
|
||||
parser.add_argument(
|
||||
'--skip_dispatcher_op_registration',
|
||||
action='store_true',
|
||||
help='Avoid registering operators into the dispatcher.')
|
||||
parser.add_argument(
|
||||
'--force_schema_registration',
|
||||
action='store_true',
|
||||
|
|
@ -1656,8 +1662,6 @@ def main() -> None:
|
|||
structured_native_functions = [g for g in grouped_native_functions
|
||||
if isinstance(g, NativeFunctionsGroup)]
|
||||
|
||||
template_dir = os.path.join(options.source_path, "templates")
|
||||
|
||||
# NB: It is mandatory to NOT use os.path.join here, as the install directory
|
||||
# will eventually be ingested by cmake, which does not respect Windows style
|
||||
# path slashes. If you switch this to use os.path.join, you'll get an error
|
||||
|
|
@ -1673,18 +1677,11 @@ def main() -> None:
|
|||
ops_install_dir = f'{options.install_dir}/ops'
|
||||
pathlib.Path(ops_install_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def make_file_manager(install_dir: str) -> FileManager:
|
||||
return FileManager(
|
||||
install_dir=install_dir,
|
||||
template_dir=template_dir,
|
||||
dry_run=options.dry_run
|
||||
)
|
||||
|
||||
core_fm = make_file_manager(core_install_dir)
|
||||
cpu_fm = make_file_manager(options.install_dir)
|
||||
cpu_vec_fm = make_file_manager(options.install_dir)
|
||||
cuda_fm = make_file_manager(options.install_dir)
|
||||
ops_fm = make_file_manager(ops_install_dir)
|
||||
core_fm = make_file_manager(options=options, install_dir=core_install_dir)
|
||||
cpu_fm = make_file_manager(options=options)
|
||||
cpu_vec_fm = make_file_manager(options=options)
|
||||
cuda_fm = make_file_manager(options=options)
|
||||
ops_fm = make_file_manager(options=options, install_dir=ops_install_dir)
|
||||
|
||||
extra_cuda_headers = '''\
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
|
|
@ -1721,7 +1718,6 @@ def main() -> None:
|
|||
native_functions=native_functions,
|
||||
grouped_native_functions=grouped_native_functions,
|
||||
structured_native_functions=structured_native_functions,
|
||||
static_dispatch_idx=static_dispatch_idx,
|
||||
selector=selector,
|
||||
backend_indices=backend_indices,
|
||||
core_fm=core_fm,
|
||||
|
|
@ -1733,6 +1729,7 @@ def main() -> None:
|
|||
rocm=options.rocm,
|
||||
force_schema_registration=options.force_schema_registration,
|
||||
per_operator_headers=options.per_operator_headers,
|
||||
skip_dispatcher_op_registration=options.skip_dispatcher_op_registration,
|
||||
)
|
||||
|
||||
if 'headers' in options.generate:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import hashlib
|
|||
import os
|
||||
import re
|
||||
import textwrap
|
||||
from argparse import Namespace
|
||||
from typing import Tuple, List, Iterable, Iterator, Callable, Sequence, TypeVar, Optional, Dict, Any, Union, Set, NoReturn
|
||||
from enum import Enum
|
||||
|
||||
|
|
@ -235,3 +236,10 @@ class FileManager:
|
|||
content = 'set({}\n {})'.format(
|
||||
variable_name, '\n '.join('"' + name + '"' for name in sorted(self.filenames)))
|
||||
self._write_if_changed(filename, content)
|
||||
|
||||
|
||||
# Helper function to generate file manager
|
||||
def make_file_manager(options: Namespace, install_dir: Optional[str] = None) -> FileManager:
|
||||
template_dir = os.path.join(options.source_path, "templates")
|
||||
install_dir = install_dir if install_dir else options.install_dir
|
||||
return FileManager(install_dir=install_dir, template_dir=template_dir, dry_run=options.dry_run)
|
||||
|
|
|
|||
182
tools/jit/gen_unboxing.py
Normal file
182
tools/jit/gen_unboxing.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
# Generates RegisterCodegenUnboxedKernels.cpp, UnboxingFunctions.h and UnboxingFunctions.cpp.
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
from dataclasses import dataclass
|
||||
from tools.codegen.api import unboxing
|
||||
from tools.codegen.api.translate import translate
|
||||
from tools.codegen.api.types import CppSignatureGroup
|
||||
from tools.codegen.api.unboxing import convert_arguments
|
||||
from tools.codegen.context import method_with_native_function
|
||||
from tools.codegen.gen import parse_native_yaml, cpp_string
|
||||
from tools.codegen.model import NativeFunction, NativeFunctionsGroup, Variant
|
||||
from tools.codegen.utils import Target, FileManager, mapMaybe, make_file_manager
|
||||
from typing import Union, Sequence
|
||||
from typing_extensions import Literal
|
||||
|
||||
|
||||
# Generates UnboxingFunctions.h & UnboxingFunctions.cpp.
|
||||
@dataclass(frozen=True)
|
||||
class ComputeUnboxingFunctions:
|
||||
target: Union[Literal[Target.DECLARATION], Literal[Target.DEFINITION]]
|
||||
|
||||
@method_with_native_function
|
||||
def __call__(self, f: NativeFunction) -> str:
|
||||
|
||||
if self.target is Target.DECLARATION:
|
||||
# Note [The ATen Codegen Unboxing API]
|
||||
# Similar to the ATen Operators API, ATen Codegen Unboxing API lives in the at::unboxing namespace, and
|
||||
# will be used by codegen unboxing wrappers (CodegenUnboxingWrappers.cpp).
|
||||
# The Wrappers will be registered into torch::jit::OperatorRegistry using RegisterOperators API.
|
||||
#
|
||||
# Important characteristics about the Codegen Unboxing API:
|
||||
# (1) It follows the OperatorRegistry API.
|
||||
# This is kind of necessary to avoid overhead.
|
||||
# For example: if it followed the C++ API, then all of the faithful C++ factory functions
|
||||
# would need to wrap their arguments into TensorOptions only to unwrap them again.
|
||||
# (2) Under the hood it calls C++ API.
|
||||
return f"""
|
||||
// aten::{f.func}
|
||||
TORCH_API void {f.func.name.unambiguous_name()}(Stack & stack);
|
||||
"""
|
||||
else:
|
||||
sig_group = CppSignatureGroup.from_native_function(
|
||||
f, method=(Variant.method in f.variants)
|
||||
)
|
||||
sig = sig_group.most_faithful_signature()
|
||||
# parse arguments into C++ code
|
||||
binding_list, code_list = convert_arguments(f)
|
||||
|
||||
# for each C++ argument, generate the conversion code
|
||||
code_connector = "\n\t"
|
||||
arg_connector = ", "
|
||||
# function call and push back to stack
|
||||
prefix = "self_base." if sig.method else "at::"
|
||||
translated_args = translate(binding_list, sig.arguments(), method=sig.method)
|
||||
args_str = f"{arg_connector.join(e.expr for e in translated_args)}"
|
||||
if len(f.func.returns) == 0:
|
||||
ret_str = ""
|
||||
push_str = ""
|
||||
else:
|
||||
ret_str = "auto result_ = "
|
||||
push_str = """
|
||||
pack(stack, std::move(result_));
|
||||
"""
|
||||
return f"""
|
||||
// aten::{f.func}
|
||||
TORCH_API void {f.func.name.unambiguous_name()}(Stack & stack) {{
|
||||
{code_connector.join(code_list)}
|
||||
|
||||
drop(stack, {len(binding_list)});
|
||||
|
||||
{ret_str}{prefix}{sig.name()}({args_str});
|
||||
{push_str}
|
||||
}}
|
||||
"""
|
||||
|
||||
|
||||
# Generates RegisterCodegenUnboxedKernels.cpp.
|
||||
@dataclass(frozen=True)
|
||||
class ComputeCodegenUnboxedKernels:
|
||||
@method_with_native_function
|
||||
def __call__(self, f: NativeFunction) -> str:
|
||||
# We unconditionally generate function wrappers,
|
||||
sig_group = CppSignatureGroup.from_native_function(
|
||||
f, method=(Variant.method in f.variants)
|
||||
)
|
||||
|
||||
sig = sig_group.most_faithful_signature()
|
||||
|
||||
# escape double quote in schema, get rid of extra double quotes
|
||||
schema = cpp_string(str(sig.func))[1:-1]
|
||||
|
||||
return f"""
|
||||
OperatorGenerator(
|
||||
TORCH_SELECTIVE_SCHEMA("aten::{schema}"),
|
||||
[](Stack & stack) {{
|
||||
RECORD_FUNCTION("{sig.name()}", std::vector<c10::IValue>());
|
||||
at::unboxing::{unboxing.name(f)}(stack);
|
||||
}},
|
||||
aliasAnalysisFromSchema()
|
||||
),
|
||||
"""
|
||||
|
||||
|
||||
def gen_unboxing(
|
||||
*,
|
||||
native_functions: Sequence[NativeFunction],
|
||||
cpu_fm: FileManager,
|
||||
) -> None:
|
||||
def key_func(fn: Union[NativeFunction, NativeFunctionsGroup]) -> str:
|
||||
return fn.root_name
|
||||
|
||||
cpu_fm.write_sharded(
|
||||
"UnboxingFunctions.cpp",
|
||||
native_functions,
|
||||
key_fn=key_func,
|
||||
env_callable=lambda fn: {
|
||||
"definitions": [ComputeUnboxingFunctions(Target.DEFINITION)(fn)]
|
||||
},
|
||||
num_shards=5,
|
||||
sharded_keys={"definitions"},
|
||||
)
|
||||
cpu_fm.write(
|
||||
"UnboxingFunctions.h",
|
||||
lambda: {
|
||||
"declarations": list(
|
||||
mapMaybe(ComputeUnboxingFunctions(Target.DECLARATION), native_functions)
|
||||
),
|
||||
},
|
||||
)
|
||||
cpu_fm.write_sharded(
|
||||
"RegisterCodegenUnboxedKernels.cpp",
|
||||
native_functions,
|
||||
key_fn=key_func,
|
||||
env_callable=lambda fn: {"unboxed_ops": [ComputeCodegenUnboxedKernels()(fn)]},
|
||||
num_shards=5,
|
||||
sharded_keys={"unboxed_ops"},
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Generate unboxing source files")
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--source-path",
|
||||
help="path to source directory for ATen",
|
||||
default="aten/src/ATen",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--install_dir", help="output directory", default="build/aten/src/ATen"
|
||||
)
|
||||
parser.add_argument(
|
||||
'-o',
|
||||
'--output-dependencies',
|
||||
help='output a list of dependencies into the given file and exit')
|
||||
parser.add_argument(
|
||||
'--dry-run', action='store_true',
|
||||
help='run without writing any files (still updates outputs)')
|
||||
|
||||
options = parser.parse_args()
|
||||
|
||||
native_yaml_path = os.path.join(options.source_path, "native/native_functions.yaml")
|
||||
parsed_yaml = parse_native_yaml(native_yaml_path)
|
||||
native_functions, backend_indices = (
|
||||
parsed_yaml.native_functions,
|
||||
parsed_yaml.backend_indices,
|
||||
)
|
||||
|
||||
cpu_fm = make_file_manager(options=options)
|
||||
gen_unboxing(native_functions=native_functions, cpu_fm=cpu_fm)
|
||||
|
||||
if options.output_dependencies:
|
||||
depfile_path = pathlib.Path(options.output_dependencies).resolve()
|
||||
depfile_name = depfile_path.name
|
||||
depfile_stem = depfile_path.stem
|
||||
|
||||
path = depfile_path.parent / depfile_name
|
||||
cpu_fm.write_outputs(depfile_stem, str(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -278,7 +278,8 @@ class CMake:
|
|||
'ONNX_NAMESPACE',
|
||||
'ATEN_THREADING',
|
||||
'WERROR',
|
||||
'OPENSSL_ROOT_DIR')
|
||||
'OPENSSL_ROOT_DIR',
|
||||
'STATIC_DISPATCH_BACKEND')
|
||||
})
|
||||
|
||||
# Aliases which are lower priority than their canonical option
|
||||
|
|
@ -289,7 +290,6 @@ class CMake:
|
|||
'CMAKE_CUDA_COMPILER': 'CUDA_NVCC_EXECUTABLE',
|
||||
'CUDACXX': 'CUDA_NVCC_EXECUTABLE'
|
||||
}
|
||||
|
||||
for var, val in my_env.items():
|
||||
# We currently pass over all environment variables that start with "BUILD_", "USE_", and "CMAKE_". This is
|
||||
# because we currently have no reliable way to get the list of all build options we have specified in
|
||||
|
|
|
|||
11
tools/setup_helpers/gen_unboxing.py
Normal file
11
tools/setup_helpers/gen_unboxing.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Little stub file to get BUILD.bazel to play along
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.insert(0, root)
|
||||
|
||||
import tools.jit.gen_unboxing
|
||||
|
||||
tools.jit.gen_unboxing.main()
|
||||
Loading…
Reference in New Issue
Block a user