Support to ONNXIFI op (#8749)

Summary:
This PR adds basic support to ONNXIFI op.
Closes https://github.com/pytorch/pytorch/pull/8749

Reviewed By: Maratyszcza

Differential Revision: D8665739

Pulled By: yinghai

fbshipit-source-id: 961916f9e1a4a26390b73c4b648d177883143a22
This commit is contained in:
Yinghai Lu 2018-06-29 08:54:05 -07:00 committed by Facebook Github Bot
parent 37e526e1a8
commit c2a89b69b9
7 changed files with 368 additions and 7 deletions

View File

@ -0,0 +1,22 @@
#include "caffe2/onnx/onnxifi_init.h"
#include <mutex>
#include "caffe2/core/logging.h"
namespace caffe2 {
namespace onnx {
onnxifi_library* initOnnxifiLibrary() {
static std::once_flag once;
static onnxifi_library core{};
std::call_once(once, []() {
auto ret =
onnxifi_load(ONNXIFI_LOADER_FLAG_VERSION_1_0, nullptr, nullptr, &core);
if (!ret) {
CAFFE_THROW("Cannot load onnxifi lib");
}
});
return &core;
}
} // namespace onnx
} // namespace caffe2

View File

@ -0,0 +1,9 @@
#pragma once
#include "onnx/onnxifi_loader.h"
namespace caffe2 {
namespace onnx {
onnxifi_library* initOnnxifiLibrary();
}
} // namespace caffe2

View File

@ -0,0 +1,150 @@
#include "caffe2/operators/onnxifi_op.h"
namespace caffe2 {
namespace {
void BlobToTensorDescriptor(
const std::string& name,
Workspace* ws,
onnxTensorDescriptor* desc,
std::vector<std::vector<uint64_t>>* shapes) {
const Blob* blob = ws->GetBlob(name);
CAFFE_ENFORCE(blob, "Blob ", name, " doesn't exist");
// Memory type
// We only allow weights to be CPU tensor for now
CAFFE_ENFORCE(
blob->template IsType<TensorCPU>(),
"Initialization blob ",
name,
" needs to be TensorCPU");
desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU;
// Data type
const auto& cpu_tensor = blob->template Get<TensorCPU>();
if (cpu_tensor.template IsType<float>()) {
desc->dataType = ONNXIFI_DATATYPE_FLOAT32;
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<float>());
} else if (cpu_tensor.template IsType<int64_t>()) {
desc->dataType = ONNXIFI_DATATYPE_INT64;
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int64_t>());
} else if (cpu_tensor.template IsType<int32_t>()) {
desc->dataType = ONNXIFI_DATATYPE_INT32;
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>());
}
// Set dims
const auto& shape = cpu_tensor.dims();
desc->dimensions = shape.size();
shapes->emplace_back(shape.cbegin(), shape.cend());
desc->shape = shapes->back().data();
}
} // namespace
template <>
std::vector<onnxTensorDescriptor>
OnnxifiOp<float, CPUContext>::BuildInitializationList(
Workspace* ws,
std::unordered_set<std::string>* initialization_list,
std::vector<std::string>* weight_names,
std::vector<std::vector<uint64_t>>* weight_shapes) {
const std::vector<string>& ws_blobs = ws->Blobs();
std::vector<onnxTensorDescriptor> descs;
for (const auto& s : ws_blobs) {
auto it = initialization_list->find(s);
if (it != initialization_list->end()) {
weight_names->emplace_back(s);
onnxTensorDescriptor tensor_desc;
tensor_desc.name = weight_names->back().c_str();
BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes);
descs.push_back(tensor_desc);
initialization_list->erase(it);
}
}
CAFFE_ENFORCE(
initialization_list->empty(), "Unfulfilled initialization list");
return descs;
}
template <>
bool OnnxifiOp<float, CPUContext>::RunOnDevice() {
for (unsigned i = 0U; i < InputSize(); ++i) {
const auto& input_tensor = Input(i);
const auto& tensor_dims = input_tensor.dims();
auto& tensor_descriptor = input_desc_.at(i);
tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32;
tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
tensor_descriptor.dimensions = tensor_dims.size();
input_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
tensor_descriptor.shape = input_shapes_.back().data();
tensor_descriptor.buffer =
reinterpret_cast<onnxPointer>(input_tensor.data<float>());
}
for (unsigned i = 0U; i < OutputSize(); ++i) {
auto* output_tensor = Output(i);
std::vector<TIndex> tensor_dims;
SetOutputShape(i, &tensor_dims);
output_tensor->Resize(tensor_dims);
auto& tensor_descriptor = output_desc_.at(i);
tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32;
tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
tensor_descriptor.dimensions = tensor_dims.size();
output_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
tensor_descriptor.shape = output_shapes_.back().data();
tensor_descriptor.buffer =
reinterpret_cast<onnxPointer>(output_tensor->mutable_data<float>());
}
CAFFE_ENFORCE_EQ(
lib_->onnxSetGraphIO(
graph_,
input_desc_.size(),
input_desc_.data(),
output_desc_.size(),
output_desc_.data()),
ONNXIFI_STATUS_SUCCESS);
onnxMemoryFence input_fence;
input_fence.event = nullptr;
input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
CAFFE_ENFORCE_EQ(
lib_->onnxInitEvent(backend_, input_fence.event), ONNXIFI_STATUS_SUCCESS);
onnxMemoryFence output_fence;
output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
output_fence.event = nullptr;
// Call the asycn run on backend, singal event on input fence and wait for the
// event on output fence
CAFFE_ENFORCE_EQ(
lib_->onnxRunGraph(graph_, &input_fence, &output_fence),
ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
// Destroy the event objects
CAFFE_ENFORCE_EQ(
lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
return true;
}
REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<float, CPUContext>);
OPERATOR_SCHEMA(Onnxifi)
.NumInputs(0, INT_MAX)
.NumOutputs(0, INT_MAX)
.SetDoc(R"DOC(
The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend
)DOC")
.Arg(
"onnx_model",
"(string default=\"\") Serialized ONNX model to be converted to backend representation")
.Arg(
"initializers",
"Initialization pair indicating the mapping of the name between NetDef and ONNX model");
} // namespace caffe2

View File

@ -0,0 +1,175 @@
#pragma once
#include <unordered_map>
#include "onnx/onnx_pb.h"
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/onnx/onnxifi_init.h"
#include "caffe2/utils/string_utils.h"
namespace caffe2 {
template <typename T, typename Context>
class OnnxifiOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
OnnxifiOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws) {
lib_ = onnx::initOnnxifiLibrary();
CAFFE_ENFORCE(lib_, "Cannot initialize ONNXIFI library");
auto onnx_model_str =
OperatorBase::GetSingleArgument<std::string>("onnx_model", "");
CAFFE_ENFORCE(!onnx_model_str.empty(), "onnx_model cannot be empty");
// Setup input/output descriptor templates
for (const auto& input : operator_def.input()) {
input_desc_.push_back(onnxTensorDescriptor());
input_desc_.back().name = input.c_str();
}
int output_idx = 0;
for (const auto& output : operator_def.output()) {
output_desc_.push_back(onnxTensorDescriptor());
output_desc_.back().name = output.c_str();
// For output, we try to get its output size hint
const std::string key = MakeString("output_size_hint_", output_idx);
auto output_size_hint = OperatorBase::GetRepeatedArgument<int>(key);
if (!output_size_hint.empty()) {
std::vector<TIndex> dims;
for (const auto v : output_size_hint) {
dims.push_back(v);
}
output_size_hints_.emplace(output_idx, std::move(dims));
}
++output_idx;
}
// Encode arguments starting with "custom_" to backend
std::vector<uint64_t> property_pointers;
std::vector<int64_t> int_args;
std::vector<float> float_args;
BuildPropertyList(operator_def, &property_pointers, &int_args, &float_args);
// Pull the weights from workspace and feed it to the backend through
// setGraphIO. Notice that since we may have rewritten the net, we need to
// map the weight names
auto initializers =
OperatorBase::GetRepeatedArgument<std::string>("initializers");
CAFFE_ENFORCE_EQ(
initializers.size() % 2, 0, "initializers should come in pairs");
std::unordered_set<std::string> initializer_set;
std::unordered_map<std::string, std::string> input_mapping;
for (auto it = initializers.begin(); it != initializers.end(); ++it) {
auto key = *it++;
input_mapping.emplace(key, *it);
initializer_set.emplace(key);
}
Workspace mapped_ws(ws, input_mapping);
std::vector<std::string> weight_names;
std::vector<std::vector<uint64_t>> weight_shapes;
auto weight_descs = BuildInitializationList(
&mapped_ws, &initializer_set, &weight_names, &weight_shapes);
::ONNX_NAMESPACE::ModelProto onnx_model;
ParseProtoFromLargeString(onnx_model_str, &onnx_model);
onnx_model_str.clear();
onnx_model.SerializeToString(&onnx_model_str);
// Build the Onnxifi engine
// TODO: In spec, backends are hot-pluggable, so two calls to
// onnxGetBackendIDs may result in different number of backend. And we
// should retry until it get consistent. For now, we don't do that.
CAFFE_ENFORCE_EQ(
lib_->onnxGetBackendIDs(nullptr, &num_backends_),
ONNXIFI_STATUS_SUCCESS);
backend_ids_.resize(num_backends_);
size_t num_backends = 0;
CAFFE_ENFORCE_EQ(
lib_->onnxGetBackendIDs(backend_ids_.data(), &num_backends),
ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_LT(
num_backends_, 0, "At least 1 onnxifi backend should be available");
// TODO: choose backend id
CAFFE_ENFORCE_EQ(
lib_->onnxInitBackend(
backend_ids_[0], property_pointers.data(), &backend_),
ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxInitGraph(
backend_,
onnx_model_str.size(),
(void*)(onnx_model_str.c_str()),
weight_descs.size(),
weight_descs.data(),
&graph_),
ONNXIFI_STATUS_SUCCESS);
}
~OnnxifiOp() {
if (graph_) {
if (lib_->onnxReleaseGraph(graph_) != ONNXIFI_STATUS_SUCCESS) {
LOG(ERROR) << "Error when calling onnxReleaseGraph";
}
graph_ = nullptr;
}
if (backend_) {
if (lib_->onnxReleaseBackend(backend_) != ONNXIFI_STATUS_SUCCESS) {
LOG(ERROR) << "Error when calling onnxReleaseBackend";
}
backend_ = nullptr;
}
for (unsigned i = 0; i < num_backends_; ++i) {
if (lib_->onnxReleaseBackendID(backend_ids_[i]) != ONNXIFI_STATUS_SUCCESS) {
LOG(ERROR) << "Error when calling onnxReleaseBackendID";
}
}
}
bool RunOnDevice() override;
private:
void SetOutputShape(int output_idx, std::vector<TIndex>* dims) {
const auto it = output_size_hints_.find(output_idx);
if (it != output_size_hints_.end()) {
*dims = it->second;
}
}
void BuildPropertyList(
const OperatorDef& /* unused */,
std::vector<uint64_t>* property_list,
std::vector<int64_t>* /* unused */,
std::vector<float>* /* unused */) {
property_list->push_back(ONNXIFI_BACKEND_PROPERTY_NONE);
}
std::vector<onnxTensorDescriptor> BuildInitializationList(
Workspace* ws,
std::unordered_set<std::string>* initialization_list,
std::vector<std::string>* weight_names,
std::vector<std::vector<uint64_t>>* weight_shapes);
// pointer to loaded onnxifi library
onnxifi_library* lib_{nullptr};
std::vector<onnxBackendID> backend_ids_;
onnxBackend backend_{nullptr};
onnxGraph graph_{nullptr};
size_t num_backends_{0};
// input/output descriptors
std::vector<onnxTensorDescriptor> input_desc_;
std::vector<onnxTensorDescriptor> output_desc_;
std::vector<std::vector<uint64_t>> input_shapes_;
std::vector<std::vector<uint64_t>> output_shapes_;
// output shape hints
std::unordered_map<int, std::vector<TIndex>> output_size_hints_;
};
} // namespace caffe2

View File

@ -1,9 +1,12 @@
#include "caffe2/core/common.h"
#include "caffe2/opt/backend_cutting.h"
#include "caffe2/utils/string_utils.h"
#include <gtest/gtest.h>
namespace {
using caffe2::StartsWith;
void AddConv(caffe2::NetDef* net, int tick) {
auto* op = net->add_op();
op->set_type("MyConv");
@ -13,11 +16,6 @@ namespace {
op->add_output("N" + caffe2::to_string(tick+1));
}
bool StartsWith(const std::string& str, const std::string& prefix) {
return std::mismatch(prefix.begin(), prefix.end(), str.begin()).first ==
prefix.end();
}
bool Supports(const caffe2::OperatorDef& op) {
return StartsWith(op.type(), "MyConv") || StartsWith(op.type(), "MyRelu") ||
StartsWith(op.type(), "Concat");
@ -66,7 +64,7 @@ TEST(BackendCuttingTest, line) {
// X0 -> CopyIn -> MyConv -\
// > Concat -> CopyOut -> Y
// N2 -> MyConv -> MyRelu -/
// N2 -> MyConv -> MyRelu -/
TEST(BackendCuttingTest, convergedPaths) {
caffe2::NetDef net;
net.add_external_input("X0");
@ -100,7 +98,7 @@ TEST(BackendCuttingTest, convergedPaths) {
// -> Random -> Relu -> MyConv4
// / \
// N0 -> MyConv -> MyRelu -> MyConv2 ---------- > Concat -> CopyOut -> Y
// N0 -> MyConv -> MyRelu -> MyConv2 ---------- > Concat -> CopyOut -> Y
TEST(BackendCuttingTest, skipPath) {
caffe2::NetDef net;
net.add_external_input("N0");

View File

@ -1,5 +1,6 @@
#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
@ -13,6 +14,11 @@ std::string trim(const std::string& str);
size_t editDistance(
const std::string& s1, const std::string& s2, size_t max_distance = 0);
inline bool StartsWith(const std::string& str, const std::string& prefix) {
return std::mismatch(prefix.begin(), prefix.end(), str.begin()).first ==
prefix.end();
}
int32_t editDistanceHelper(const char* s1,
size_t s1_len,
const char* s2,

View File

@ -732,6 +732,7 @@ if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
caffe2_interface_library(onnx onnx_library)
endif()
list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS onnx_library)
list(APPEND Caffe2_DEPENDENCY_LIBS onnxifi_loader)
# Recover the build shared libs option.
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
endif()