mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[Intel GPU] xpu-ops codegen via backend whitelist (#130082)
# Motivation This PR intends to enhance the codegen to allow generate codes for XPU backend. XPU operators need be registered in an hand-written way currently. Developers have no chance to take the advantage of shared code to handle tensor meta setting (like strides, proxy output, structured kernels). Manually porting code is erro-prone and may lead to high maintaining efforts. We utilize the backend_whitelist argument in `gen.py` to generate XPU needed headers and source codes. # Usage XPU ops lie in `third_pary/torch-xpu-ops`, the codegen process is triggered before the complation of `torch-xpu-ops` We use the following commands to generate XPU operators ` python -m torchgen.gen --source-path path/to/yaml/of/xpu --install-dir build/xpu --per-operator-headers --static-dispatch-backend --backend-whitelist=XPU` The diff lies at `backend-whitelist=XPU`. The backend-whitelist key is an existent argument in torchgen. The input of `gen.py` are code templates and operators yaml. We share the same templates in `aten`. A simplified yaml lies in `third_party/torch-xpu-ops`, which only includes the supported xpu operators. This yaml is a copy-and-modify of `native_functions.yaml`. No extra entry is added, the format is same as the one in `aten` # Result All operators headers are generated in `build/xpu/ATen/ops` independently, which would not affect operators declared/defined by CPU/CUDA or any other backend. XPU operators only include headers in this folder. # Verification * In `third-party/torch-xpu-ops`, we migrate all supported kernels to structured kernels style, where they are registered through `REGISTER_XPU_DISPATCH` or `TORCH_IMPL_FUNC`, and we have UT verification based on `test_ops.py` Pull Request resolved: https://github.com/pytorch/pytorch/pull/130082 Approved by: https://github.com/EikanWang, https://github.com/gujinghui, https://github.com/atalman ghstack dependencies: #130019
This commit is contained in:
parent
aec8bc5e4c
commit
fe4f8e97cd
|
|
@ -1047,6 +1047,7 @@ if(USE_XPU)
|
|||
# 1. Sources in torch-xpu-ops depend on generated ATen headers.
|
||||
# 2. Using add_custom_command in torch-xpu-ops to define sycl device sources
|
||||
# compilation. add_custom_command requires an explicit dependency.
|
||||
list(APPEND ${Caffe2_XPU_INCLUDE} ${TORCH_XPU_OPS_DIR}/src/ATen/)
|
||||
set(TORCH_XPU_OPS_PYTORCH_DEPS ATEN_CPU_FILES_GEN_TARGET)
|
||||
|
||||
add_subdirectory(${TORCH_ROOT}/third_party/torch-xpu-ops
|
||||
|
|
|
|||
|
|
@ -62,6 +62,9 @@ def gen_registration_headers(
|
|||
headers.append("#include <ATen/cuda/EmptyTensor.h>")
|
||||
elif backend_index.dispatch_key == DispatchKey.MPS:
|
||||
headers.append("#include <ATen/mps/EmptyTensor.h>")
|
||||
elif backend_index.dispatch_key == DispatchKey.XPU:
|
||||
# XPU specific, this header resides in third_party/torch-xpu-ops
|
||||
headers.append("#include <ATen/xpu/EmptyTensor.h>")
|
||||
elif per_operator_headers:
|
||||
headers += [
|
||||
"#include <ATen/ops/empty.h>",
|
||||
|
|
@ -87,6 +90,7 @@ def gen_empty_impl_names(
|
|||
DispatchKey.CPU,
|
||||
DispatchKey.CUDA,
|
||||
DispatchKey.MPS,
|
||||
DispatchKey.XPU,
|
||||
):
|
||||
dispatch = str(backend_index.dispatch_key).lower()
|
||||
empty_impl = f"at::detail::empty_{dispatch}"
|
||||
|
|
@ -95,6 +99,7 @@ def gen_empty_impl_names(
|
|||
DispatchKey.CompositeExplicitAutogradNonFunctional,
|
||||
DispatchKey.QuantizedCPU,
|
||||
DispatchKey.QuantizedCUDA,
|
||||
DispatchKey.XPU,
|
||||
):
|
||||
empty_impl = "at::empty"
|
||||
empty_strided_impl = "at::empty_strided"
|
||||
|
|
@ -639,6 +644,7 @@ if (C10_UNLIKELY(maybe_proxy.has_value())) {
|
|||
DispatchKey.CPU,
|
||||
DispatchKey.CUDA,
|
||||
DispatchKey.MPS,
|
||||
DispatchKey.XPU,
|
||||
DispatchKey.CompositeExplicitAutogradNonFunctional,
|
||||
)
|
||||
return f"""{maybe_set_guard_line}
|
||||
|
|
|
|||
|
|
@ -262,7 +262,12 @@ for fk in FUNCTIONALITY_KEYS:
|
|||
)
|
||||
|
||||
|
||||
STRUCTURED_DISPATCH_KEYS = {DispatchKey.MPS, DispatchKey.CUDA, DispatchKey.CPU}
|
||||
STRUCTURED_DISPATCH_KEYS = {
|
||||
DispatchKey.MPS,
|
||||
DispatchKey.CUDA,
|
||||
DispatchKey.CPU,
|
||||
DispatchKey.XPU,
|
||||
}
|
||||
UFUNC_DISPATCH_KEYS = {DispatchKey.CUDA, DispatchKey.CPU}
|
||||
|
||||
# Set of supported dispatch keys
|
||||
|
|
@ -273,6 +278,7 @@ dispatch_keys = [
|
|||
DispatchKey.MkldnnCPU,
|
||||
DispatchKey.CUDA,
|
||||
DispatchKey.MPS,
|
||||
DispatchKey.XPU,
|
||||
DispatchKey.SparseCUDA,
|
||||
DispatchKey.SparseCsrCUDA,
|
||||
DispatchKey.QuantizedCPU,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user