mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
The original author is @YifanShenSZ and the original PR is: #82754 # Summary: Previous reshape [https://github.com/pytorch/pytorch/issues/80981](https://github.com/pytorch/pytorch/pull/80981) is ok for forward, but needs improvement for backward: need to handle "sometimes view sometimes copy" behavior. This pull request fixes it by: 1. add a new alias dispatch key `CompositeImplicitAutogradNestedTensor`, which ideally would work as nested-tensor version of `CompositeImplicitAutograd` 2. register `reshape_nested` to `reshape` by `CompositeImplicitAutogradNestedTensor` Side changes: * add contiguous memory format support to `clone_nested` * add `view_nested` * add `reshape_as_nested` Fix issue [https://github.com/pytorch/pytorch/issues/83041](https://github.com/pytorch/pytorch/issues/83041) Pull Request resolved: https://github.com/pytorch/pytorch/pull/82754 Test Plan: Imported from GitHub, without a `Test Plan:` line. **Static Docs Preview: executorch** |[Full Site](https://our.intern.facebook.com/intern/staticdocs/eph/D39023822/V13/executorch/)| |**Modified Pages**| Reviewed By: albanD Differential Revision: D39023822 Pulled By: drisspg Pull Request resolved: https://github.com/pytorch/pytorch/pull/84154 Approved by: https://github.com/bdhirsh, https://github.com/albanD
342 lines
12 KiB
C++
342 lines
12 KiB
C++
#include <c10/core/DispatchKey.h>
|
|
#include <c10/core/DispatchKeySet.h>
|
|
|
|
#include <unordered_map>
|
|
|
|
namespace c10 {
|
|
|
|
const char* toString(BackendComponent t) {
|
|
switch (t) {
|
|
case BackendComponent::CPUBit:
|
|
return "CPUBit";
|
|
case BackendComponent::CUDABit:
|
|
return "CUDABit";
|
|
case BackendComponent::HIPBit:
|
|
return "HIPBit";
|
|
case BackendComponent::XLABit:
|
|
return "XLABit";
|
|
case BackendComponent::LazyBit:
|
|
return "LazyBit";
|
|
case BackendComponent::MetaBit:
|
|
return "MetaBit";
|
|
case BackendComponent::XPUBit:
|
|
return "XPUBit";
|
|
case BackendComponent::IPUBit:
|
|
return "IPUBit";
|
|
case BackendComponent::MPSBit:
|
|
return "MPSBit";
|
|
case BackendComponent::HPUBit:
|
|
return "HPUBit";
|
|
case BackendComponent::VEBit:
|
|
return "VEBit";
|
|
case BackendComponent::PrivateUse1Bit:
|
|
return "PrivateUse1Bit";
|
|
case BackendComponent::PrivateUse2Bit:
|
|
return "PrivateUse2Bit";
|
|
case BackendComponent::PrivateUse3Bit:
|
|
return "PrivateUse3Bit";
|
|
case BackendComponent::InvalidBit:
|
|
return "InvalidBit";
|
|
default:
|
|
return "UNKNOWN_BACKEND_BIT";
|
|
}
|
|
}
|
|
|
|
BackendComponent toBackendComponent(DeviceType device_type) {
|
|
switch (device_type) {
|
|
#define DO_CASE(device, _) \
|
|
case DeviceType::device: { \
|
|
return toBackendComponent(DispatchKey::device); \
|
|
}
|
|
C10_FORALL_BACKEND_DEVICE_TYPES(DO_CASE, unused)
|
|
#undef DO_CASE
|
|
default:
|
|
return BackendComponent::InvalidBit;
|
|
}
|
|
}
|
|
|
|
const char* toString(DispatchKey t) {
|
|
switch (t) {
|
|
case DispatchKey::Undefined:
|
|
return "Undefined";
|
|
|
|
case DispatchKey::Dense:
|
|
return "Dense";
|
|
case DispatchKey::FPGA:
|
|
return "FPGA";
|
|
case DispatchKey::ORT:
|
|
return "ORT";
|
|
case DispatchKey::Vulkan:
|
|
return "Vulkan";
|
|
case DispatchKey::Metal:
|
|
return "Metal";
|
|
|
|
case DispatchKey::Lazy:
|
|
return "Lazy";
|
|
case DispatchKey::MPS:
|
|
return "MPS";
|
|
case DispatchKey::HPU:
|
|
return "HPU";
|
|
|
|
case DispatchKey::Quantized:
|
|
return "Quantized";
|
|
case DispatchKey::CustomRNGKeyId:
|
|
return "CustomRNGKeyId";
|
|
case DispatchKey::MkldnnCPU:
|
|
return "MkldnnCPU";
|
|
|
|
case DispatchKey::Sparse:
|
|
return "Sparse";
|
|
case DispatchKey::SparseCsrCPU:
|
|
return "SparseCsrCPU";
|
|
case DispatchKey::SparseCsrCUDA:
|
|
return "SparseCsrCUDA";
|
|
|
|
case DispatchKey::NestedTensor:
|
|
return "NestedTensor";
|
|
|
|
case DispatchKey::BackendSelect:
|
|
return "BackendSelect";
|
|
|
|
case DispatchKey::Python:
|
|
return "Python";
|
|
|
|
case DispatchKey::Fake:
|
|
return "Fake";
|
|
case DispatchKey::FuncTorchDynamicLayerBackMode:
|
|
return "FuncTorchDynamicLayerBackMode";
|
|
|
|
case DispatchKey::Functionalize:
|
|
return "Functionalize";
|
|
|
|
case DispatchKey::Named:
|
|
return "Named";
|
|
|
|
case DispatchKey::Conjugate:
|
|
return "Conjugate";
|
|
case DispatchKey::Negative:
|
|
return "Negative";
|
|
case DispatchKey::ZeroTensor:
|
|
return "ZeroTensor";
|
|
|
|
case DispatchKey::ADInplaceOrView:
|
|
return "ADInplaceOrView";
|
|
|
|
case DispatchKey::AutogradOther:
|
|
return "AutogradOther";
|
|
case DispatchKey::AutogradFunctionality:
|
|
return "AutogradFunctionality";
|
|
case DispatchKey::AutogradNestedTensor:
|
|
return "AutogradNestedTensor";
|
|
|
|
case DispatchKey::Tracer:
|
|
return "Tracer";
|
|
|
|
case DispatchKey::AutocastCPU:
|
|
return "AutocastCPU";
|
|
case DispatchKey::AutocastXPU:
|
|
return "AutocastXPU";
|
|
case DispatchKey::AutocastCUDA:
|
|
return "AutocastCUDA";
|
|
|
|
case DispatchKey::FuncTorchBatched:
|
|
return "FuncTorchBatched";
|
|
case DispatchKey::FuncTorchVmapMode:
|
|
return "FuncTorchVmapMode";
|
|
|
|
case DispatchKey::Batched:
|
|
return "Batched";
|
|
case DispatchKey::VmapMode:
|
|
return "VmapMode";
|
|
|
|
case DispatchKey::FuncTorchGradWrapper:
|
|
return "FuncTorchGradWrapper";
|
|
|
|
case DispatchKey::DeferredInit:
|
|
return "DeferredInit";
|
|
case DispatchKey::PythonTLSSnapshot:
|
|
return "PythonTLSSnapshot";
|
|
|
|
// Note [Out-of-tree vmap+grad prototype]
|
|
// The following keys are used in the implementation of the out-of-tree
|
|
// composable functions transforms (vmap+grad) prototype that lives at
|
|
// https://github.com/zou3519/functorch
|
|
// We plan on eventually upstreaming the prototype into core, at which
|
|
// point it will have a different design that should use fewer keys.
|
|
case DispatchKey::FuncTorchDynamicLayerFrontMode:
|
|
return "FuncTorchDynamicLayerFrontMode";
|
|
|
|
case DispatchKey::TESTING_ONLY_GenericWrapper:
|
|
return "TESTING_ONLY_GenericWrapper";
|
|
|
|
case DispatchKey::TESTING_ONLY_GenericMode:
|
|
return "TESTING_ONLY_GenericMode";
|
|
|
|
// Aliases
|
|
|
|
case DispatchKey::Autograd:
|
|
return "Autograd";
|
|
case DispatchKey::CompositeImplicitAutograd:
|
|
return "CompositeImplicitAutograd";
|
|
case DispatchKey::CompositeImplicitAutogradNestedTensor:
|
|
return "CompositeImplicitAutogradNestedTensor";
|
|
case DispatchKey::CompositeExplicitAutograd:
|
|
return "CompositeExplicitAutograd";
|
|
case DispatchKey::CompositeExplicitAutogradNonFunctional:
|
|
return "CompositeExplicitAutogradNonFunctional";
|
|
|
|
// Per-backend dispatch keys
|
|
|
|
default:
|
|
auto bc = toBackendComponent(t);
|
|
auto fk = toFunctionalityKey(t);
|
|
|
|
switch (fk) {
|
|
#define ENTRY(backend, functionality) \
|
|
case BackendComponent::backend##Bit: \
|
|
return #functionality #backend;
|
|
|
|
#define FORALL_BC(dkname, prefix) \
|
|
case DispatchKey::dkname: \
|
|
switch (bc) { \
|
|
C10_FORALL_BACKEND_COMPONENTS(ENTRY, prefix) \
|
|
default: \
|
|
return #prefix "Unknown"; \
|
|
}
|
|
|
|
C10_FORALL_FUNCTIONALITY_KEYS(FORALL_BC)
|
|
|
|
default:
|
|
switch (bc) {
|
|
C10_FORALL_BACKEND_COMPONENTS(ENTRY, Unknown)
|
|
default:
|
|
return "UnknownUnknown";
|
|
}
|
|
|
|
#undef FORALL_BC
|
|
#undef ENTRY
|
|
}
|
|
}
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& str, DispatchKey rhs) {
|
|
return str << toString(rhs);
|
|
}
|
|
std::ostream& operator<<(std::ostream& str, BackendComponent rhs) {
|
|
return str << toString(rhs);
|
|
}
|
|
|
|
DispatchKey getAutogradKeyFromBackend(BackendComponent k) {
|
|
// We want this to return an autograd key. We're relying on the fact that
|
|
// getAutogradRelatedKeySetFromBackend returns an autograd key +
|
|
// ADInplaceOrView, and autograd has higher precedence. The core mapping from
|
|
// backend -> autograd key lives in `getAutogradRelatedKeySetFromBackend`
|
|
// instead of here for performance. `getAutogradRelatedKeySetFromBackend` is a
|
|
// hotpath function, and we want to make sure that it doesn't have to
|
|
// construct any DispatchKeySets at runtime.
|
|
return getAutogradRelatedKeySetFromBackend(k).highestPriorityTypeId();
|
|
}
|
|
|
|
c10::DispatchKey parseDispatchKey(const std::string& k) {
|
|
static std::unordered_map<std::string, c10::DispatchKey> key_map = {
|
|
{"Undefined", c10::DispatchKey::Undefined},
|
|
{"Dense", c10::DispatchKey::Dense},
|
|
{"FPGA", c10::DispatchKey::FPGA},
|
|
{"ORT", c10::DispatchKey::ORT},
|
|
{"MPS", c10::DispatchKey::MPS},
|
|
{"Vulkan", c10::DispatchKey::Vulkan},
|
|
{"Metal", c10::DispatchKey::Metal},
|
|
{"VE", c10::DispatchKey::VE},
|
|
{"Meta", c10::DispatchKey::Meta},
|
|
{"Quantized", c10::DispatchKey::Quantized},
|
|
{"CustomRNGKeyId", c10::DispatchKey::CustomRNGKeyId},
|
|
{"MkldnnCPU", c10::DispatchKey::MkldnnCPU},
|
|
{"Sparse", c10::DispatchKey::Sparse},
|
|
{"SparseCsrCPU", c10::DispatchKey::SparseCsrCPU},
|
|
{"SparseCsrCUDA", c10::DispatchKey::SparseCsrCUDA},
|
|
{"BackendSelect", c10::DispatchKey::BackendSelect},
|
|
{"Python", c10::DispatchKey::Python},
|
|
{"PythonTLSSnapshot", c10::DispatchKey::PythonTLSSnapshot},
|
|
{"Fake", c10::DispatchKey::Fake},
|
|
{"Named", c10::DispatchKey::Named},
|
|
{"Conjugate", c10::DispatchKey::Conjugate},
|
|
{"Negative", c10::DispatchKey::Negative},
|
|
{"ZeroTensor", c10::DispatchKey::ZeroTensor},
|
|
{"FuncTorchDynamicLayerBackMode",
|
|
c10::DispatchKey::FuncTorchDynamicLayerBackMode},
|
|
{"ADInplaceOrView", c10::DispatchKey::ADInplaceOrView},
|
|
{"AutogradOther", c10::DispatchKey::AutogradOther},
|
|
{"AutogradFunctionality", c10::DispatchKey::AutogradFunctionality},
|
|
{"AutogradNestedTensor", c10::DispatchKey::AutogradNestedTensor},
|
|
{"Tracer", c10::DispatchKey::Tracer},
|
|
{"AutocastCPU", c10::DispatchKey::AutocastCPU},
|
|
{"AutocastXPU", c10::DispatchKey::AutocastXPU},
|
|
{"AutocastCUDA", c10::DispatchKey::AutocastCUDA},
|
|
{"FuncTorchBatched", c10::DispatchKey::FuncTorchBatched},
|
|
{"FuncTorchVmapMode", c10::DispatchKey::FuncTorchVmapMode},
|
|
{"Batched", c10::DispatchKey::Batched},
|
|
{"VmapMode", c10::DispatchKey::VmapMode},
|
|
{"DeferredInit", c10::DispatchKey::DeferredInit},
|
|
{"FuncTorchGradWrapper", c10::DispatchKey::FuncTorchGradWrapper},
|
|
{"FuncTorchDynamicLayerFrontMode",
|
|
c10::DispatchKey::FuncTorchDynamicLayerFrontMode},
|
|
{"TESTING_ONLY_GenericWrapper",
|
|
c10::DispatchKey::TESTING_ONLY_GenericWrapper},
|
|
{"TESTING_ONLY_GenericMode", c10::DispatchKey::TESTING_ONLY_GenericMode},
|
|
|
|
{"CPU", c10::DispatchKey::CPU},
|
|
{"CUDA", c10::DispatchKey::CUDA},
|
|
{"HIP", c10::DispatchKey::HIP},
|
|
{"XLA", c10::DispatchKey::XLA},
|
|
{"MPS", c10::DispatchKey::MPS},
|
|
{"XPU", c10::DispatchKey::XPU},
|
|
{"IPU", c10::DispatchKey::IPU},
|
|
{"HPU", c10::DispatchKey::HPU},
|
|
{"Lazy", c10::DispatchKey::Lazy},
|
|
{"NestedTensor", c10::DispatchKey::NestedTensor},
|
|
{"NestedTensorCPU", c10::DispatchKey::NestedTensorCPU},
|
|
{"NestedTensorCUDA", c10::DispatchKey::NestedTensorCUDA},
|
|
{"PrivateUse1", c10::DispatchKey::PrivateUse1},
|
|
{"PrivateUse2", c10::DispatchKey::PrivateUse2},
|
|
{"PrivateUse3", c10::DispatchKey::PrivateUse3},
|
|
|
|
{"QuantizedCPU", c10::DispatchKey::QuantizedCPU},
|
|
{"QuantizedCUDA", c10::DispatchKey::QuantizedCUDA},
|
|
{"QuantizedXPU", c10::DispatchKey::QuantizedXPU},
|
|
|
|
{"SparseCPU", c10::DispatchKey::SparseCPU},
|
|
{"SparseCUDA", c10::DispatchKey::SparseCUDA},
|
|
{"SparseHIP", c10::DispatchKey::SparseHIP},
|
|
{"SparseXPU", c10::DispatchKey::SparseXPU},
|
|
{"SparseVE", c10::DispatchKey::SparseVE},
|
|
|
|
{"AutogradCPU", c10::DispatchKey::AutogradCPU},
|
|
{"AutogradCUDA", c10::DispatchKey::AutogradCUDA},
|
|
{"AutogradXLA", c10::DispatchKey::AutogradXLA},
|
|
{"AutogradLazy", c10::DispatchKey::AutogradLazy},
|
|
{"AutogradMeta", c10::DispatchKey::AutogradMeta},
|
|
{"AutogradIPU", c10::DispatchKey::AutogradIPU},
|
|
{"AutogradXPU", c10::DispatchKey::AutogradXPU},
|
|
{"AutogradMPS", c10::DispatchKey::AutogradMPS},
|
|
{"AutogradHPU", c10::DispatchKey::AutogradHPU},
|
|
{"AutogradPrivateUse1", c10::DispatchKey::AutogradPrivateUse1},
|
|
{"AutogradPrivateUse2", c10::DispatchKey::AutogradPrivateUse2},
|
|
{"AutogradPrivateUse3", c10::DispatchKey::AutogradPrivateUse3},
|
|
|
|
{"Autograd", c10::DispatchKey::Autograd},
|
|
{"CompositeImplicitAutograd",
|
|
c10::DispatchKey::CompositeImplicitAutograd},
|
|
{"CompositeImplicitAutogradNestedTensor",
|
|
c10::DispatchKey::CompositeImplicitAutogradNestedTensor},
|
|
{"CompositeExplicitAutograd",
|
|
c10::DispatchKey::CompositeExplicitAutograd},
|
|
{"CompositeExplicitAutogradNonFunctional",
|
|
c10::DispatchKey::CompositeExplicitAutogradNonFunctional},
|
|
};
|
|
auto it = key_map.find(k);
|
|
TORCH_CHECK(it != key_map.end(), "could not parse dispatch key: ", k);
|
|
return it->second;
|
|
}
|
|
|
|
} // namespace c10
|