mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Support to ONNXIFI op (#8749)
Summary: This PR adds basic support to ONNXIFI op. Closes https://github.com/pytorch/pytorch/pull/8749 Reviewed By: Maratyszcza Differential Revision: D8665739 Pulled By: yinghai fbshipit-source-id: 961916f9e1a4a26390b73c4b648d177883143a22
This commit is contained in:
parent
37e526e1a8
commit
c2a89b69b9
22
caffe2/onnx/onnxifi_init.cc
Normal file
22
caffe2/onnx/onnxifi_init.cc
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#include "caffe2/onnx/onnxifi_init.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace onnx {
|
||||
onnxifi_library* initOnnxifiLibrary() {
|
||||
static std::once_flag once;
|
||||
static onnxifi_library core{};
|
||||
std::call_once(once, []() {
|
||||
auto ret =
|
||||
onnxifi_load(ONNXIFI_LOADER_FLAG_VERSION_1_0, nullptr, nullptr, &core);
|
||||
if (!ret) {
|
||||
CAFFE_THROW("Cannot load onnxifi lib");
|
||||
}
|
||||
});
|
||||
return &core;
|
||||
}
|
||||
} // namespace onnx
|
||||
} // namespace caffe2
|
||||
9
caffe2/onnx/onnxifi_init.h
Normal file
9
caffe2/onnx/onnxifi_init.h
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "onnx/onnxifi_loader.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace onnx {
|
||||
onnxifi_library* initOnnxifiLibrary();
|
||||
}
|
||||
} // namespace caffe2
|
||||
150
caffe2/operators/onnxifi_op.cc
Normal file
150
caffe2/operators/onnxifi_op.cc
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#include "caffe2/operators/onnxifi_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
|
||||
void BlobToTensorDescriptor(
|
||||
const std::string& name,
|
||||
Workspace* ws,
|
||||
onnxTensorDescriptor* desc,
|
||||
std::vector<std::vector<uint64_t>>* shapes) {
|
||||
const Blob* blob = ws->GetBlob(name);
|
||||
CAFFE_ENFORCE(blob, "Blob ", name, " doesn't exist");
|
||||
|
||||
// Memory type
|
||||
// We only allow weights to be CPU tensor for now
|
||||
CAFFE_ENFORCE(
|
||||
blob->template IsType<TensorCPU>(),
|
||||
"Initialization blob ",
|
||||
name,
|
||||
" needs to be TensorCPU");
|
||||
desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU;
|
||||
|
||||
// Data type
|
||||
const auto& cpu_tensor = blob->template Get<TensorCPU>();
|
||||
if (cpu_tensor.template IsType<float>()) {
|
||||
desc->dataType = ONNXIFI_DATATYPE_FLOAT32;
|
||||
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<float>());
|
||||
} else if (cpu_tensor.template IsType<int64_t>()) {
|
||||
desc->dataType = ONNXIFI_DATATYPE_INT64;
|
||||
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int64_t>());
|
||||
} else if (cpu_tensor.template IsType<int32_t>()) {
|
||||
desc->dataType = ONNXIFI_DATATYPE_INT32;
|
||||
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>());
|
||||
}
|
||||
|
||||
// Set dims
|
||||
const auto& shape = cpu_tensor.dims();
|
||||
desc->dimensions = shape.size();
|
||||
shapes->emplace_back(shape.cbegin(), shape.cend());
|
||||
desc->shape = shapes->back().data();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
std::vector<onnxTensorDescriptor>
|
||||
OnnxifiOp<float, CPUContext>::BuildInitializationList(
|
||||
Workspace* ws,
|
||||
std::unordered_set<std::string>* initialization_list,
|
||||
std::vector<std::string>* weight_names,
|
||||
std::vector<std::vector<uint64_t>>* weight_shapes) {
|
||||
const std::vector<string>& ws_blobs = ws->Blobs();
|
||||
std::vector<onnxTensorDescriptor> descs;
|
||||
for (const auto& s : ws_blobs) {
|
||||
auto it = initialization_list->find(s);
|
||||
if (it != initialization_list->end()) {
|
||||
weight_names->emplace_back(s);
|
||||
onnxTensorDescriptor tensor_desc;
|
||||
tensor_desc.name = weight_names->back().c_str();
|
||||
BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes);
|
||||
descs.push_back(tensor_desc);
|
||||
initialization_list->erase(it);
|
||||
}
|
||||
}
|
||||
CAFFE_ENFORCE(
|
||||
initialization_list->empty(), "Unfulfilled initialization list");
|
||||
return descs;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool OnnxifiOp<float, CPUContext>::RunOnDevice() {
|
||||
for (unsigned i = 0U; i < InputSize(); ++i) {
|
||||
const auto& input_tensor = Input(i);
|
||||
const auto& tensor_dims = input_tensor.dims();
|
||||
auto& tensor_descriptor = input_desc_.at(i);
|
||||
tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32;
|
||||
tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
|
||||
tensor_descriptor.dimensions = tensor_dims.size();
|
||||
input_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
|
||||
tensor_descriptor.shape = input_shapes_.back().data();
|
||||
tensor_descriptor.buffer =
|
||||
reinterpret_cast<onnxPointer>(input_tensor.data<float>());
|
||||
}
|
||||
|
||||
for (unsigned i = 0U; i < OutputSize(); ++i) {
|
||||
auto* output_tensor = Output(i);
|
||||
std::vector<TIndex> tensor_dims;
|
||||
SetOutputShape(i, &tensor_dims);
|
||||
output_tensor->Resize(tensor_dims);
|
||||
auto& tensor_descriptor = output_desc_.at(i);
|
||||
tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32;
|
||||
tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
|
||||
tensor_descriptor.dimensions = tensor_dims.size();
|
||||
output_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
|
||||
tensor_descriptor.shape = output_shapes_.back().data();
|
||||
tensor_descriptor.buffer =
|
||||
reinterpret_cast<onnxPointer>(output_tensor->mutable_data<float>());
|
||||
}
|
||||
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxSetGraphIO(
|
||||
graph_,
|
||||
input_desc_.size(),
|
||||
input_desc_.data(),
|
||||
output_desc_.size(),
|
||||
output_desc_.data()),
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
|
||||
onnxMemoryFence input_fence;
|
||||
input_fence.event = nullptr;
|
||||
input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxInitEvent(backend_, input_fence.event), ONNXIFI_STATUS_SUCCESS);
|
||||
onnxMemoryFence output_fence;
|
||||
output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
|
||||
output_fence.event = nullptr;
|
||||
|
||||
// Call the asycn run on backend, singal event on input fence and wait for the
|
||||
// event on output fence
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxRunGraph(graph_, &input_fence, &output_fence),
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
|
||||
|
||||
// Destroy the event objects
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<float, CPUContext>);
|
||||
OPERATOR_SCHEMA(Onnxifi)
|
||||
.NumInputs(0, INT_MAX)
|
||||
.NumOutputs(0, INT_MAX)
|
||||
.SetDoc(R"DOC(
|
||||
The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend
|
||||
)DOC")
|
||||
.Arg(
|
||||
"onnx_model",
|
||||
"(string default=\"\") Serialized ONNX model to be converted to backend representation")
|
||||
.Arg(
|
||||
"initializers",
|
||||
"Initialization pair indicating the mapping of the name between NetDef and ONNX model");
|
||||
} // namespace caffe2
|
||||
175
caffe2/operators/onnxifi_op.h
Normal file
175
caffe2/operators/onnxifi_op.h
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "onnx/onnx_pb.h"
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/onnx/onnxifi_init.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename T, typename Context>
|
||||
class OnnxifiOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
OnnxifiOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {
|
||||
lib_ = onnx::initOnnxifiLibrary();
|
||||
CAFFE_ENFORCE(lib_, "Cannot initialize ONNXIFI library");
|
||||
auto onnx_model_str =
|
||||
OperatorBase::GetSingleArgument<std::string>("onnx_model", "");
|
||||
CAFFE_ENFORCE(!onnx_model_str.empty(), "onnx_model cannot be empty");
|
||||
|
||||
// Setup input/output descriptor templates
|
||||
for (const auto& input : operator_def.input()) {
|
||||
input_desc_.push_back(onnxTensorDescriptor());
|
||||
input_desc_.back().name = input.c_str();
|
||||
}
|
||||
int output_idx = 0;
|
||||
for (const auto& output : operator_def.output()) {
|
||||
output_desc_.push_back(onnxTensorDescriptor());
|
||||
output_desc_.back().name = output.c_str();
|
||||
|
||||
// For output, we try to get its output size hint
|
||||
const std::string key = MakeString("output_size_hint_", output_idx);
|
||||
auto output_size_hint = OperatorBase::GetRepeatedArgument<int>(key);
|
||||
if (!output_size_hint.empty()) {
|
||||
std::vector<TIndex> dims;
|
||||
for (const auto v : output_size_hint) {
|
||||
dims.push_back(v);
|
||||
}
|
||||
output_size_hints_.emplace(output_idx, std::move(dims));
|
||||
}
|
||||
++output_idx;
|
||||
}
|
||||
|
||||
// Encode arguments starting with "custom_" to backend
|
||||
std::vector<uint64_t> property_pointers;
|
||||
std::vector<int64_t> int_args;
|
||||
std::vector<float> float_args;
|
||||
BuildPropertyList(operator_def, &property_pointers, &int_args, &float_args);
|
||||
|
||||
// Pull the weights from workspace and feed it to the backend through
|
||||
// setGraphIO. Notice that since we may have rewritten the net, we need to
|
||||
// map the weight names
|
||||
auto initializers =
|
||||
OperatorBase::GetRepeatedArgument<std::string>("initializers");
|
||||
CAFFE_ENFORCE_EQ(
|
||||
initializers.size() % 2, 0, "initializers should come in pairs");
|
||||
std::unordered_set<std::string> initializer_set;
|
||||
std::unordered_map<std::string, std::string> input_mapping;
|
||||
for (auto it = initializers.begin(); it != initializers.end(); ++it) {
|
||||
auto key = *it++;
|
||||
input_mapping.emplace(key, *it);
|
||||
initializer_set.emplace(key);
|
||||
}
|
||||
Workspace mapped_ws(ws, input_mapping);
|
||||
std::vector<std::string> weight_names;
|
||||
std::vector<std::vector<uint64_t>> weight_shapes;
|
||||
auto weight_descs = BuildInitializationList(
|
||||
&mapped_ws, &initializer_set, &weight_names, &weight_shapes);
|
||||
|
||||
::ONNX_NAMESPACE::ModelProto onnx_model;
|
||||
ParseProtoFromLargeString(onnx_model_str, &onnx_model);
|
||||
onnx_model_str.clear();
|
||||
onnx_model.SerializeToString(&onnx_model_str);
|
||||
|
||||
// Build the Onnxifi engine
|
||||
// TODO: In spec, backends are hot-pluggable, so two calls to
|
||||
// onnxGetBackendIDs may result in different number of backend. And we
|
||||
// should retry until it get consistent. For now, we don't do that.
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxGetBackendIDs(nullptr, &num_backends_),
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
backend_ids_.resize(num_backends_);
|
||||
size_t num_backends = 0;
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxGetBackendIDs(backend_ids_.data(), &num_backends),
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
|
||||
CAFFE_ENFORCE_LT(
|
||||
num_backends_, 0, "At least 1 onnxifi backend should be available");
|
||||
|
||||
// TODO: choose backend id
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxInitBackend(
|
||||
backend_ids_[0], property_pointers.data(), &backend_),
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
lib_->onnxInitGraph(
|
||||
backend_,
|
||||
onnx_model_str.size(),
|
||||
(void*)(onnx_model_str.c_str()),
|
||||
weight_descs.size(),
|
||||
weight_descs.data(),
|
||||
&graph_),
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
~OnnxifiOp() {
|
||||
if (graph_) {
|
||||
if (lib_->onnxReleaseGraph(graph_) != ONNXIFI_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "Error when calling onnxReleaseGraph";
|
||||
}
|
||||
graph_ = nullptr;
|
||||
}
|
||||
if (backend_) {
|
||||
if (lib_->onnxReleaseBackend(backend_) != ONNXIFI_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "Error when calling onnxReleaseBackend";
|
||||
}
|
||||
backend_ = nullptr;
|
||||
}
|
||||
for (unsigned i = 0; i < num_backends_; ++i) {
|
||||
if (lib_->onnxReleaseBackendID(backend_ids_[i]) != ONNXIFI_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "Error when calling onnxReleaseBackendID";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
void SetOutputShape(int output_idx, std::vector<TIndex>* dims) {
|
||||
const auto it = output_size_hints_.find(output_idx);
|
||||
if (it != output_size_hints_.end()) {
|
||||
*dims = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
void BuildPropertyList(
|
||||
const OperatorDef& /* unused */,
|
||||
std::vector<uint64_t>* property_list,
|
||||
std::vector<int64_t>* /* unused */,
|
||||
std::vector<float>* /* unused */) {
|
||||
property_list->push_back(ONNXIFI_BACKEND_PROPERTY_NONE);
|
||||
}
|
||||
|
||||
std::vector<onnxTensorDescriptor> BuildInitializationList(
|
||||
Workspace* ws,
|
||||
std::unordered_set<std::string>* initialization_list,
|
||||
std::vector<std::string>* weight_names,
|
||||
std::vector<std::vector<uint64_t>>* weight_shapes);
|
||||
|
||||
// pointer to loaded onnxifi library
|
||||
onnxifi_library* lib_{nullptr};
|
||||
|
||||
std::vector<onnxBackendID> backend_ids_;
|
||||
onnxBackend backend_{nullptr};
|
||||
onnxGraph graph_{nullptr};
|
||||
size_t num_backends_{0};
|
||||
|
||||
// input/output descriptors
|
||||
std::vector<onnxTensorDescriptor> input_desc_;
|
||||
std::vector<onnxTensorDescriptor> output_desc_;
|
||||
std::vector<std::vector<uint64_t>> input_shapes_;
|
||||
std::vector<std::vector<uint64_t>> output_shapes_;
|
||||
|
||||
// output shape hints
|
||||
std::unordered_map<int, std::vector<TIndex>> output_size_hints_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
@ -1,9 +1,12 @@
|
|||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/opt/backend_cutting.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace {
|
||||
using caffe2::StartsWith;
|
||||
|
||||
void AddConv(caffe2::NetDef* net, int tick) {
|
||||
auto* op = net->add_op();
|
||||
op->set_type("MyConv");
|
||||
|
|
@ -13,11 +16,6 @@ namespace {
|
|||
op->add_output("N" + caffe2::to_string(tick+1));
|
||||
}
|
||||
|
||||
bool StartsWith(const std::string& str, const std::string& prefix) {
|
||||
return std::mismatch(prefix.begin(), prefix.end(), str.begin()).first ==
|
||||
prefix.end();
|
||||
}
|
||||
|
||||
bool Supports(const caffe2::OperatorDef& op) {
|
||||
return StartsWith(op.type(), "MyConv") || StartsWith(op.type(), "MyRelu") ||
|
||||
StartsWith(op.type(), "Concat");
|
||||
|
|
@ -66,7 +64,7 @@ TEST(BackendCuttingTest, line) {
|
|||
|
||||
// X0 -> CopyIn -> MyConv -\
|
||||
// > Concat -> CopyOut -> Y
|
||||
// N2 -> MyConv -> MyRelu -/
|
||||
// N2 -> MyConv -> MyRelu -/
|
||||
TEST(BackendCuttingTest, convergedPaths) {
|
||||
caffe2::NetDef net;
|
||||
net.add_external_input("X0");
|
||||
|
|
@ -100,7 +98,7 @@ TEST(BackendCuttingTest, convergedPaths) {
|
|||
|
||||
// -> Random -> Relu -> MyConv4
|
||||
// / \
|
||||
// N0 -> MyConv -> MyRelu -> MyConv2 ---------- > Concat -> CopyOut -> Y
|
||||
// N0 -> MyConv -> MyRelu -> MyConv2 ---------- > Concat -> CopyOut -> Y
|
||||
TEST(BackendCuttingTest, skipPath) {
|
||||
caffe2::NetDef net;
|
||||
net.add_external_input("N0");
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
|
@ -13,6 +14,11 @@ std::string trim(const std::string& str);
|
|||
size_t editDistance(
|
||||
const std::string& s1, const std::string& s2, size_t max_distance = 0);
|
||||
|
||||
inline bool StartsWith(const std::string& str, const std::string& prefix) {
|
||||
return std::mismatch(prefix.begin(), prefix.end(), str.begin()).first ==
|
||||
prefix.end();
|
||||
}
|
||||
|
||||
int32_t editDistanceHelper(const char* s1,
|
||||
size_t s1_len,
|
||||
const char* s2,
|
||||
|
|
|
|||
|
|
@ -732,6 +732,7 @@ if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
|||
caffe2_interface_library(onnx onnx_library)
|
||||
endif()
|
||||
list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS onnx_library)
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS onnxifi_loader)
|
||||
# Recover the build shared libs option.
|
||||
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
|
||||
endif()
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user