mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/46112 ### Summary This PR adds the support of running torchscript models on iOS GPU via Metal (Inference only). The feature is currently in prototype state, API changes are expected. The tutorial and the documents will be added once it goes to beta. allow-large-files - Users API ``` auto module = torch::jit::load(model); module.eval(); at::Tensor input = at::ones({1,3,224,224}, at::ScalarType::Float).metal(); auto output = module.forward({input}).toTensor().cpu(); ``` - Supported Models - Person Segmentation v106 (FB Internal) - Mobilenetv2 - Supported Operators - aten::conv2d - aten::addmm - aten::add.Tensor - aten::sub.Tensor - aten::mul.Tensor - aten::relu - aten::hardtanh - aten::hardtanh_ - aten::sigmoid - aten::max_pool2d - aten::adaptive_avg_pool2d - aten::reshape - aten::t - aten::view - aten::log_softmax.int - aten::upsample_nearest2d.vec - Supported Devices - Apple A9 and above - iOS 10.2 and above - CMake scripts - `IOS_ARCH=arm64 ./scripts/build_ios.sh -DUSE_METAL=ON` ### Test Plan - Circle CI ghstack-source-id: 114155638 Test Plan: 1. Sandcastle CI 2. Circle CI Reviewed By: dreiss Differential Revision: D23236555 fbshipit-source-id: 98ffc48b837e308bc678c37a9a5fd8ae72d11625
140 lines
3.4 KiB
C++
140 lines
3.4 KiB
C++
#include <c10/core/DispatchKey.h>
|
|
|
|
namespace c10 {
|
|
|
|
const char* toString(DispatchKey t) {
|
|
switch (t) {
|
|
case DispatchKey::Undefined:
|
|
return "Undefined";
|
|
|
|
case DispatchKey::CPU:
|
|
return "CPU";
|
|
case DispatchKey::CUDA:
|
|
return "CUDA";
|
|
case DispatchKey::HIP:
|
|
return "HIP";
|
|
case DispatchKey::FPGA:
|
|
return "FPGA";
|
|
case DispatchKey::MSNPU:
|
|
return "MSNPU";
|
|
case DispatchKey::XLA:
|
|
return "XLA";
|
|
case DispatchKey::Vulkan:
|
|
return "Vulkan";
|
|
case DispatchKey::Metal:
|
|
return "Metal";
|
|
case DispatchKey::MKLDNN:
|
|
return "MKLDNN";
|
|
case DispatchKey::OpenGL:
|
|
return "OpenGL";
|
|
case DispatchKey::OpenCL:
|
|
return "OpenCL";
|
|
case DispatchKey::IDEEP:
|
|
return "IDEEP";
|
|
case DispatchKey::QuantizedCPU:
|
|
return "QuantizedCPU";
|
|
case DispatchKey::QuantizedCUDA:
|
|
return "QuantizedCUDA";
|
|
|
|
case DispatchKey::ComplexCPU:
|
|
return "ComplexCPU";
|
|
case DispatchKey::ComplexCUDA:
|
|
return "ComplexCUDA";
|
|
|
|
case DispatchKey::CustomRNGKeyId:
|
|
return "CustomRNGKeyId";
|
|
|
|
case DispatchKey::MkldnnCPU:
|
|
return "MkldnnCPU";
|
|
case DispatchKey::SparseCPU:
|
|
return "SparseCPU";
|
|
case DispatchKey::SparseCUDA:
|
|
return "SparseCUDA";
|
|
case DispatchKey::SparseHIP:
|
|
return "SparseHIP";
|
|
|
|
case DispatchKey::PrivateUse1:
|
|
return "PrivateUse1";
|
|
case DispatchKey::PrivateUse2:
|
|
return "PrivateUse2";
|
|
case DispatchKey::PrivateUse3:
|
|
return "PrivateUse3";
|
|
|
|
case DispatchKey::Meta:
|
|
return "Meta";
|
|
|
|
case DispatchKey::Autograd:
|
|
return "Autograd";
|
|
case DispatchKey::AutogradCPU:
|
|
return "AutogradCPU";
|
|
case DispatchKey::AutogradCUDA:
|
|
return "AutogradCUDA";
|
|
case DispatchKey::AutogradXLA:
|
|
return "AutogradXLA";
|
|
case DispatchKey::AutogradPrivateUse1:
|
|
return "AutogradPrivateUse1";
|
|
case DispatchKey::AutogradPrivateUse2:
|
|
return "AutogradPrivateUse2";
|
|
case DispatchKey::AutogradPrivateUse3:
|
|
return "AutogradPrivateUse3";
|
|
case DispatchKey::AutogradOther:
|
|
return "AutogradOther";
|
|
case DispatchKey::BackendSelect:
|
|
return "BackendSelect";
|
|
case DispatchKey::Named:
|
|
return "Named";
|
|
|
|
case DispatchKey::Tracer:
|
|
return "Tracer";
|
|
|
|
case DispatchKey::Autocast:
|
|
return "Autocast";
|
|
|
|
case DispatchKey::Batched:
|
|
return "Batched";
|
|
|
|
case DispatchKey::VmapMode:
|
|
return "VmapMode";
|
|
|
|
case DispatchKey::Math:
|
|
return "Math";
|
|
|
|
case DispatchKey::DefaultBackend:
|
|
return "DefaultBackend";
|
|
|
|
case DispatchKey::TESTING_ONLY_GenericWrapper:
|
|
return "TESTING_ONLY_GenericWrapper";
|
|
|
|
case DispatchKey::TESTING_ONLY_GenericMode:
|
|
return "TESTING_ONLY_GenericMode";
|
|
|
|
default:
|
|
return "UNKNOWN_TENSOR_TYPE_ID";
|
|
}
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& str, DispatchKey rhs) {
|
|
return str << toString(rhs);
|
|
}
|
|
|
|
DispatchKey getAutogradKeyFromBackend(DispatchKey t) {
|
|
switch (t) {
|
|
case DispatchKey::CPU:
|
|
return DispatchKey::AutogradCPU;
|
|
case DispatchKey::CUDA:
|
|
return DispatchKey::AutogradCUDA;
|
|
case DispatchKey::XLA:
|
|
return DispatchKey::AutogradXLA;
|
|
case DispatchKey::PrivateUse1:
|
|
return DispatchKey::AutogradPrivateUse1;
|
|
case DispatchKey::PrivateUse2:
|
|
return DispatchKey::AutogradPrivateUse2;
|
|
case DispatchKey::PrivateUse3:
|
|
return DispatchKey::AutogradPrivateUse3;
|
|
default:
|
|
return DispatchKey::AutogradOther;
|
|
}
|
|
}
|
|
|
|
} // namespace c10
|