mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-06 12:19:50 +01:00
Extended Reduce layer support in new DNN engine #27816 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2682 lines
103 KiB
C++
2682 lines
103 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#include "../precomp.hpp"
|
|
#include "../net_impl.hpp"
|
|
|
|
#include <opencv2/dnn/shape_utils.hpp>
|
|
#include <opencv2/dnn/layer_reg.private.hpp>
|
|
#include <opencv2/core/utils/filesystem.hpp>
|
|
#include <opencv2/core/utils/fp_control_utils.hpp>
|
|
#include <opencv2/core/utils/logger.defines.hpp>
|
|
#undef CV_LOG_STRIP_LEVEL
|
|
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
|
|
#include <opencv2/core/utils/logger.hpp>
|
|
|
|
#include <opencv2/core/utils/configuration.private.hpp>
|
|
|
|
#ifdef HAVE_PROTOBUF
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <limits>
|
|
#include <set>
|
|
#include <string>
|
|
|
|
#if defined _MSC_VER && _MSC_VER < 1910/*MSVS 2017*/
|
|
#pragma warning(push)
|
|
#pragma warning(disable: 4503) // decorated name length exceeded, name was truncated
|
|
#endif
|
|
|
|
#if defined(__GNUC__) && __GNUC__ >= 5
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
|
#endif
|
|
#include "opencv-onnx.pb.h"
|
|
#if defined(__GNUC__) && __GNUC__ >= 5
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
#include "onnx_graph_simplifier.hpp"
|
|
#endif
|
|
|
|
namespace cv {
|
|
namespace dnn {
|
|
CV__DNN_INLINE_NS_BEGIN
|
|
|
|
extern bool DNN_DIAGNOSTICS_RUN;
|
|
|
|
#ifdef HAVE_PROTOBUF
|
|
|
|
template <typename T>
|
|
static T getScalarFromMat(Mat m)
|
|
{
|
|
CV_Assert(m.total() == 1);
|
|
return m.at<T>(0);
|
|
}
|
|
|
|
|
|
|
|
static int dataType2cv(int dt)
|
|
{
|
|
return
|
|
dt == opencv_onnx::TensorProto_DataType_UINT8 ? CV_8U :
|
|
dt == opencv_onnx::TensorProto_DataType_INT8 ? CV_8S :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT16 ? CV_16U :
|
|
dt == opencv_onnx::TensorProto_DataType_INT16 ? CV_16S :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT32 ? CV_32U :
|
|
dt == opencv_onnx::TensorProto_DataType_INT32 ? CV_32S :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT64 ? CV_64U :
|
|
dt == opencv_onnx::TensorProto_DataType_INT64 ? CV_64S :
|
|
dt == opencv_onnx::TensorProto_DataType_FLOAT ? CV_32F :
|
|
dt == opencv_onnx::TensorProto_DataType_DOUBLE ? CV_64F :
|
|
dt == opencv_onnx::TensorProto_DataType_FLOAT16 ? CV_16F :
|
|
dt == opencv_onnx::TensorProto_DataType_BFLOAT16 ? CV_16BF :
|
|
dt == opencv_onnx::TensorProto_DataType_COMPLEX64 ? CV_32FC2 :
|
|
dt == opencv_onnx::TensorProto_DataType_COMPLEX128 ? CV_64FC2 :
|
|
dt == opencv_onnx::TensorProto_DataType_BOOL ? CV_Bool : -1;
|
|
}
|
|
|
|
|
|
static std::string dataType2str(int dt)
|
|
{
|
|
const char* str =
|
|
dt == opencv_onnx::TensorProto_DataType_UNDEFINED ? "UNDEFINED" :
|
|
dt == opencv_onnx::TensorProto_DataType_STRING ? "STRING" :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT8 ? "UINT8" :
|
|
dt == opencv_onnx::TensorProto_DataType_INT8 ? "INT8" :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT16 ? "UINT16" :
|
|
dt == opencv_onnx::TensorProto_DataType_INT16 ? "INT16" :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT32 ? "UINT32" :
|
|
dt == opencv_onnx::TensorProto_DataType_INT32 ? "INT32" :
|
|
dt == opencv_onnx::TensorProto_DataType_UINT64 ? "UINT64" :
|
|
dt == opencv_onnx::TensorProto_DataType_INT64 ? "INT64" :
|
|
dt == opencv_onnx::TensorProto_DataType_FLOAT ? "FLOAT" :
|
|
dt == opencv_onnx::TensorProto_DataType_FLOAT16 ? "FLOAT16" :
|
|
dt == opencv_onnx::TensorProto_DataType_BFLOAT16 ? "BFLOAT16" :
|
|
dt == opencv_onnx::TensorProto_DataType_BOOL ? "BOOL" :
|
|
dt == opencv_onnx::TensorProto_DataType_COMPLEX64 ? "COMPLEX64" :
|
|
dt == opencv_onnx::TensorProto_DataType_COMPLEX128 ? "COMPLEX128" : nullptr;
|
|
if (!str)
|
|
return format("<unknown_type #%d>", (int)dt);
|
|
return std::string(str);
|
|
}
|
|
|
|
static Mat getMatFromTensor2(const opencv_onnx::TensorProto& tensor_proto, const std::string base_path="")
|
|
{
|
|
Mat m = getMatFromTensor(tensor_proto, false, base_path);
|
|
m.size.dims = m.dims = (int)tensor_proto.dims_size();
|
|
return m;
|
|
}
|
|
|
|
class ONNXImporter2
|
|
{
|
|
public:
|
|
ONNXImporter2();
|
|
|
|
Net parseFile(const char *onnxFile);
|
|
Net parseBuffer(const void* buffer, size_t sizeBuffer);
|
|
|
|
protected:
|
|
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
|
|
opencv_onnx::ModelProto model_proto;
|
|
|
|
Net parseModel();
|
|
Ptr<Graph> parseGraph(opencv_onnx::GraphProto* graph_proto, bool mainGraph);
|
|
void parseNode(const opencv_onnx::NodeProto& node_proto);
|
|
bool parseValueInfo(const opencv_onnx::ValueInfoProto& valueInfoProto, ArgData& data);
|
|
Mat parseTensor(const opencv_onnx::TensorProto& tensorProto);
|
|
void rememberMissingOp(const std::string& opname);
|
|
|
|
LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
|
|
|
|
void addLayer(LayerParams& layerParams,
|
|
const opencv_onnx::NodeProto& node_proto,
|
|
int max_inputs = std::numeric_limits<int>::max());
|
|
void setParamsDtype(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
|
|
void raiseError() {
|
|
have_errors = true;
|
|
}
|
|
|
|
Net net;
|
|
Net::Impl* netimpl;
|
|
std::string onnxFilename;
|
|
std::string onnxBasePath;
|
|
Ptr<Graph> curr_graph;
|
|
opencv_onnx::GraphProto* curr_graph_proto;
|
|
std::vector<Ptr<Layer> > curr_prog;
|
|
std::vector<Arg> node_inputs, node_outputs;
|
|
|
|
std::string framework_name;
|
|
std::set<std::string> missing_ops;
|
|
|
|
// Used when Onnx does not contain node names.
|
|
// In this case each node is assigned a name 'onnx_node!<current global_node_idx value>'
|
|
int global_node_idx;
|
|
bool have_errors;
|
|
|
|
typedef void (ONNXImporter2::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
typedef std::map<std::string, ONNXImporterNodeParser> DispatchMap;
|
|
typedef std::map<std::string, DispatchMap> DomainDispatchMap;
|
|
|
|
DomainDispatchMap domain_dispatch_map;
|
|
std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto);
|
|
const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto);
|
|
void buildDispatchMap_ONNX_AI(int opset_version);
|
|
void buildDispatchMap_COM_MICROSOFT(int opset_version);
|
|
|
|
// Domain: 'ai.onnx' (default)
|
|
// URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
|
|
void parseAbs (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseArgMinMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseCast2 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseCastLike (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseIf (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseConstantOfShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseDepthSpaceOps (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseEinsum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseElementWise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseGatherElements (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseGlobalPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseGRU (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseLayerNorm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseRange (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseNonZero (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseTrilu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseIsNaN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseIsInf (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseDet (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseGridSample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseSize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseUnique (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseResize2 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseScatter (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseTile (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseNonMaxSuprression (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseTopK2 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseBitShift (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseBitwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseBitwiseNot (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
|
|
// Domain: com.microsoft
|
|
// URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
|
|
void parseAttention (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseDequantizeLinear (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseQuantizeLinear (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
//void parseQSoftmax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
|
|
|
|
int onnx_opset; // OperatorSetIdProto for 'onnx' domain
|
|
std::map<std::string, int> onnx_opset_map; // map from OperatorSetIdProto
|
|
void parseOperatorSet();
|
|
|
|
const std::string str_domain_ai_onnx = "ai.onnx";
|
|
|
|
bool useLegacyNames;
|
|
bool getParamUseLegacyNames()
|
|
{
|
|
//bool param = utils::getConfigurationParameterBool("OPENCV_DNN_ONNX_USE_LEGACY_NAMES", false);
|
|
//return param;
|
|
return true;
|
|
}
|
|
std::string extractNodeName(const opencv_onnx::NodeProto& node_proto);
|
|
};
|
|
|
|
ONNXImporter2::ONNXImporter2() :
|
|
onnx_opset(0),
|
|
useLegacyNames(getParamUseLegacyNames())
|
|
{
|
|
netimpl = net.getImpl();
|
|
}
|
|
|
|
Net ONNXImporter2::parseFile(const char *onnxFilename_)
|
|
{
|
|
CV_Assert(onnxFilename_);
|
|
onnxFilename = onnxFilename_;
|
|
|
|
onnxBasePath = utils::fs::getParent(onnxFilename_);
|
|
|
|
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFilename);
|
|
|
|
std::fstream input(onnxFilename, std::ios::in | std::ios::binary);
|
|
if (!input)
|
|
{
|
|
CV_Error(Error::StsBadArg, format("Can't read ONNX file: %s", onnxFilename_));
|
|
}
|
|
|
|
if (!model_proto.ParseFromIstream(&input))
|
|
{
|
|
CV_Error(Error::StsUnsupportedFormat, format("Failed to parse ONNX model: %s", onnxFilename_));
|
|
}
|
|
|
|
return parseModel();
|
|
}
|
|
|
|
Net ONNXImporter2::parseBuffer(const void* buffer, size_t sizeBuffer)
|
|
{
|
|
onnxFilename = std::string();
|
|
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
|
|
|
|
struct _Buf: public std::streambuf
|
|
{
|
|
_Buf(const void* buffer, size_t sizeBuffer)
|
|
{
|
|
char* p = (char*)buffer;
|
|
setg(p, p, p + sizeBuffer);
|
|
}
|
|
};
|
|
|
|
_Buf buf(buffer, sizeBuffer);
|
|
std::istream input(&buf);
|
|
|
|
if (!model_proto.ParseFromIstream(&input))
|
|
CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
|
|
|
|
return parseModel();
|
|
}
|
|
|
|
|
|
inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
|
|
{
|
|
if (layerParams.has(oldKey)) {
|
|
layerParams.set(newKey, layerParams.get(oldKey));
|
|
layerParams.erase(oldKey);
|
|
}
|
|
}
|
|
|
|
/*static void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
|
|
std::vector<Mat>& outputs)
|
|
{
|
|
Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
|
|
CV_Assert((bool)layer);
|
|
|
|
std::vector<MatShape> inpShapes(inputs.size());
|
|
std::vector<MatType> inpTypes(inputs.size());
|
|
for (size_t i = 0; i < inputs.size(); ++i)
|
|
{
|
|
inpShapes[i] = shape(inputs[i]);
|
|
inpTypes[i] = inputs[i].type();
|
|
}
|
|
|
|
std::vector<MatShape> outShapes, internalShapes;
|
|
std::vector<MatType> outTypes, internalTypes;
|
|
layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
|
|
layer->getTypes(inpTypes, outShapes.size(), internalShapes.size(), outTypes, internalTypes);
|
|
|
|
std::vector<Mat> internals(internalShapes.size());
|
|
outputs.resize(outShapes.size());
|
|
for (size_t i = 0; i < outShapes.size(); ++i)
|
|
outputs[i].create(outShapes[i], outTypes[i]);
|
|
for (size_t i = 0; i < internalShapes.size(); ++i)
|
|
internals[i].create(internalShapes[i], internalTypes[i]);
|
|
|
|
layer->finalize(inputs, outputs);
|
|
layer->forward(inputs, outputs, internals);
|
|
}*/
|
|
|
|
/*std::map<std::string, Mat> ONNXImporter2::getGraphTensors(
|
|
const opencv_onnx::GraphProto& graph_proto)
|
|
{
|
|
std::map<std::string, Mat> layers_weights;
|
|
|
|
for (int i = 0; i < graph_proto.initializer_size(); i++)
|
|
{
|
|
const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
|
|
dumpTensorProto(i, tensor_proto, "initializer");
|
|
Mat mat = getMatFromTensor2(tensor_proto);
|
|
releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto)); // drop already loaded data
|
|
|
|
if (DNN_DIAGNOSTICS_RUN && mat.empty())
|
|
continue;
|
|
|
|
layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
|
|
constBlobsExtraInfo.insert(std::make_pair(tensor_proto.name(), TensorInfo(tensor_proto.dims_size())));
|
|
}
|
|
return layers_weights;
|
|
}*/
|
|
|
|
static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
|
|
std::vector<int32_t> dst(src.size());
|
|
convertInt64ToInt32(src, dst, src.size());
|
|
return DictValue::arrayInt(&dst[0], src.size());
|
|
}
|
|
|
|
static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) {
|
|
return DictValue::arrayString(src.begin(), static_cast<int>(src.size()));
|
|
}
|
|
|
|
LayerParams ONNXImporter2::getLayerParams(const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
LayerParams lp;
|
|
for(int i = 0; i < node_proto.attribute_size(); i++)
|
|
{
|
|
opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
|
|
std::string attribute_name = attribute_proto.name();
|
|
|
|
try
|
|
{
|
|
if(attribute_name == "kernel_shape")
|
|
{
|
|
CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
|
|
lp.set("kernel_size", parse(attribute_proto.ints()));
|
|
}
|
|
else if(attribute_name == "strides")
|
|
{
|
|
CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
|
|
lp.set("stride", parse(attribute_proto.ints()));
|
|
}
|
|
else if(attribute_name == "pads")
|
|
{
|
|
if (node_proto.op_type() == "Pad")
|
|
{
|
|
// Padding layer.
|
|
// Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
|
|
// We need to shuffle it to begin0, end0, begin1, end1, ...
|
|
CV_Assert(attribute_proto.ints_size() % 2 == 0);
|
|
const int dims = attribute_proto.ints_size() / 2;
|
|
std::vector<int32_t> paddings;
|
|
paddings.reserve(attribute_proto.ints_size());
|
|
for (int i = 0; i < dims; ++i)
|
|
{
|
|
paddings.push_back(attribute_proto.ints(i));
|
|
paddings.push_back(attribute_proto.ints(dims + i));
|
|
}
|
|
lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
|
|
}
|
|
else
|
|
{
|
|
// Convolution or pooling.
|
|
CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
|
|
lp.set("pad", parse(attribute_proto.ints()));
|
|
}
|
|
}
|
|
else if(attribute_name == "auto_pad")
|
|
{
|
|
if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
|
|
lp.set("pad_mode", "SAME");
|
|
}
|
|
else if (attribute_proto.s() == "VALID") {
|
|
lp.set("pad_mode", "VALID");
|
|
}
|
|
}
|
|
else if(attribute_name == "dilations")
|
|
{
|
|
CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
|
|
lp.set("dilation", parse(attribute_proto.ints()));
|
|
}
|
|
else if(attribute_name == "activations" && node_proto.op_type() == "LSTM")
|
|
{
|
|
lp.set(attribute_name, parseStr(attribute_proto.strings()));
|
|
}
|
|
else if (attribute_proto.has_i())
|
|
{
|
|
::google::protobuf::int64 src = attribute_proto.i();
|
|
if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
|
|
CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
|
|
else
|
|
lp.set(attribute_name, saturate_cast<int32_t>(src));
|
|
}
|
|
else if (attribute_proto.has_f())
|
|
{
|
|
lp.set(attribute_name, attribute_proto.f());
|
|
}
|
|
else if (attribute_proto.has_s())
|
|
{
|
|
lp.set(attribute_name, attribute_proto.s());
|
|
}
|
|
else if (attribute_proto.floats_size() > 0)
|
|
{
|
|
lp.set(attribute_name, DictValue::arrayReal(
|
|
attribute_proto.floats().data(), attribute_proto.floats_size()));
|
|
}
|
|
else if (attribute_proto.ints_size() > 0)
|
|
{
|
|
lp.set(attribute_name, parse(attribute_proto.ints()));
|
|
}
|
|
else if (attribute_proto.has_t())
|
|
{
|
|
opencv_onnx::TensorProto tensor = attribute_proto.t();
|
|
Mat blob = getMatFromTensor2(tensor);
|
|
lp.blobs.push_back(blob);
|
|
lp.set("original_dims_of_mat", tensor.dims_size());
|
|
}
|
|
else if (attribute_proto.has_g())
|
|
{
|
|
// CV_Error(Error::StsNotImplemented, format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
|
|
continue;
|
|
|
|
}
|
|
else if (attribute_proto.graphs_size() > 0)
|
|
{
|
|
CV_Error(Error::StsNotImplemented,
|
|
format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
|
|
attribute_name.c_str(), attribute_proto.graphs_size())
|
|
);
|
|
}
|
|
else if (attribute_proto.strings_size() > 0)
|
|
{
|
|
std::string msg = format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
|
|
attribute_name.c_str(), attribute_proto.strings_size());
|
|
CV_LOG_ERROR(NULL, msg);
|
|
for (int i = 0; i < attribute_proto.strings_size(); i++)
|
|
{
|
|
CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
|
|
}
|
|
CV_Error(Error::StsNotImplemented, msg);
|
|
}
|
|
else if (attribute_proto.tensors_size() > 0)
|
|
{
|
|
CV_Error(Error::StsNotImplemented,
|
|
format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
|
|
attribute_name.c_str(), attribute_proto.tensors_size())
|
|
);
|
|
}
|
|
else
|
|
{
|
|
CV_Error(Error::StsNotImplemented, format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
|
|
}
|
|
}
|
|
catch (const cv::Exception& e)
|
|
{
|
|
CV_UNUSED(e);
|
|
if (DNN_DIAGNOSTICS_RUN)
|
|
{
|
|
CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str()
|
|
);
|
|
continue;
|
|
}
|
|
throw;
|
|
}
|
|
}
|
|
return lp;
|
|
}
|
|
|
|
void ONNXImporter2::parseOperatorSet()
|
|
{
|
|
int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
|
|
if (ir_version < 3)
|
|
return;
|
|
|
|
int opset_size = model_proto.opset_import_size();
|
|
if (opset_size <= 0)
|
|
{
|
|
CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
|
|
return;
|
|
}
|
|
|
|
for (int i = 0; i < opset_size; ++i)
|
|
{
|
|
const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
|
|
const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
|
|
int version = opset_entry.has_version() ? opset_entry.version() : -1;
|
|
if (domain.empty() || domain == str_domain_ai_onnx)
|
|
{
|
|
// ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
|
|
onnx_opset = std::max(onnx_opset, version);
|
|
onnx_opset_map[str_domain_ai_onnx] = onnx_opset;
|
|
}
|
|
else
|
|
{
|
|
CV_LOG_DEBUG(NULL, "DNN/ONNX: using non-standard ONNX opset[" << i << "]: domain='" << domain << "' version=" << version);
|
|
onnx_opset_map[domain] = onnx_opset;
|
|
}
|
|
}
|
|
|
|
CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
|
|
|
|
buildDispatchMap_ONNX_AI(onnx_opset);
|
|
for (const auto& pair : onnx_opset_map)
|
|
{
|
|
if (pair.first == str_domain_ai_onnx)
|
|
{
|
|
continue; // done above
|
|
}
|
|
else if (pair.first == "com.microsoft")
|
|
{
|
|
buildDispatchMap_COM_MICROSOFT(pair.second);
|
|
}
|
|
else
|
|
{
|
|
CV_LOG_INFO(NULL, "DNN/ONNX: unknown domain='" << pair.first << "' version=" << pair.second << ". No dispatch map, you may need to register 'custom' layers.");
|
|
}
|
|
}
|
|
}
|
|
|
|
/*static bool ifInt8Output(const String& layerType)
|
|
{
|
|
// Contains all node types whose output should be int8 when it get int8 input.
|
|
// ai.onnx opset 15
|
|
static std::vector<String> input8output8List = {
|
|
"QuantizeLinear",
|
|
"QLinearAdd",
|
|
"QLinearMul",
|
|
"QLinearAveragePool",
|
|
"QLinearGlobalAveragePool",
|
|
"QLinearLeakyRelu",
|
|
"QLinearSigmoid",
|
|
"QLinearConcat",
|
|
"QGemm",
|
|
"QLinearSoftmax",
|
|
"QLinearConv",
|
|
"QLinearMatMul",
|
|
"MaxPool",
|
|
"ReduceMax",
|
|
"ReduceMin",
|
|
"Split",
|
|
"Clip",
|
|
"Abs",
|
|
"Transpose",
|
|
"Squeeze",
|
|
"Flatten",
|
|
"Unsqueeze",
|
|
"Expand",
|
|
"Reshape",
|
|
"Pad",
|
|
"Gather",
|
|
"Concat",
|
|
"Resize",
|
|
"SpaceToDepth",
|
|
"DepthToSpace",
|
|
"Pow",
|
|
"Add",
|
|
"Sub",
|
|
"Mul",
|
|
"Div"
|
|
};
|
|
auto layerIt = std::find(input8output8List.begin(), input8output8List.end(), layerType);
|
|
return layerIt != input8output8List.end();
|
|
}*/
|
|
|
|
Net ONNXImporter2::parseModel()
|
|
{
|
|
global_node_idx = 0;
|
|
have_errors = false;
|
|
CV_Assert(model_proto.has_graph());
|
|
opencv_onnx::GraphProto* graph_proto = model_proto.mutable_graph();
|
|
|
|
std::string framework_version;
|
|
if (model_proto.has_producer_name())
|
|
framework_name = model_proto.producer_name();
|
|
if (model_proto.has_producer_version())
|
|
framework_version = model_proto.producer_version();
|
|
|
|
CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
|
|
<< (model_proto.has_ir_version() ? format(" v%d", (int)model_proto.ir_version()) : cv::String())
|
|
<< " model produced by '" << framework_name << "'"
|
|
<< (framework_version.empty() ? cv::String() : format(":%s", framework_version.c_str()))
|
|
<< ". Number of nodes = " << graph_proto->node_size()
|
|
<< ", initializers = " << graph_proto->initializer_size()
|
|
<< ", inputs = " << graph_proto->input_size()
|
|
<< ", outputs = " << graph_proto->output_size()
|
|
);
|
|
|
|
parseOperatorSet();
|
|
Ptr<Graph> mainGraph = parseGraph(graph_proto, true);
|
|
netimpl->mainGraph = mainGraph;
|
|
netimpl->modelFormat = DNN_MODEL_ONNX;
|
|
netimpl->originalLayout = DATA_LAYOUT_NCHW;
|
|
netimpl->onnx_opset = onnx_opset;
|
|
|
|
if (have_errors) {
|
|
std::stringstream sstrm;
|
|
sstrm << "DNN/ONNX: the model ";
|
|
if (!onnxFilename.empty())
|
|
sstrm << "'" << onnxFilename << "' ";
|
|
sstrm << "cannot be loaded with the new parser. Trying the older parser. ";
|
|
if (!missing_ops.empty()) {
|
|
sstrm << " Unsupported operations:\n";
|
|
auto it = missing_ops.begin();
|
|
size_t i, nmissing = missing_ops.size();
|
|
for (i = 0; i < nmissing; i++, ++it) {
|
|
sstrm << "\t" << *it << (i+1 < nmissing ? ",\n" : "\n");
|
|
}
|
|
}
|
|
CV_LOG_WARNING(NULL, sstrm.str());
|
|
return Net();
|
|
}
|
|
netimpl->prepareForInference();
|
|
// ************ uncomment for debugging **********
|
|
//net.dumpToStream(std::cout);
|
|
return net;
|
|
}
|
|
|
|
bool ONNXImporter2::parseValueInfo(const opencv_onnx::ValueInfoProto& valueInfoProto, ArgData& data)
|
|
{
|
|
CV_Assert(valueInfoProto.has_name());
|
|
CV_Assert(valueInfoProto.has_type());
|
|
const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
|
|
CV_Assert(typeProto.has_tensor_type());
|
|
const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
|
|
CV_Assert(tensor.has_shape());
|
|
const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
|
|
auto elem_type = tensor.elem_type();
|
|
|
|
data.type = dataType2cv(elem_type);
|
|
if (data.type < 0) {
|
|
CV_Error(Error::StsNotImplemented, format("unsupported datatype '%s'", dataType2str(elem_type).c_str()));
|
|
}
|
|
|
|
int dim_size = tensorShape.dim_size();
|
|
CV_CheckGE(dim_size, 0, "");
|
|
MatShape shape(dim_size);
|
|
for (int j = 0; j < dim_size; ++j)
|
|
{
|
|
const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
|
|
int64_t val_j;
|
|
if (dimension.has_dim_value()) {
|
|
val_j = dimension.dim_value();
|
|
} else if (dimension.has_dim_param()) {
|
|
const std::string& param_j = dimension.dim_param();
|
|
val_j = net.findDim(param_j, true);
|
|
} else {
|
|
raiseError();
|
|
return false;
|
|
}
|
|
//CV_Assert(0 <= val_j && val_j <= INT_MAX);
|
|
shape[j] = (int)val_j;
|
|
}
|
|
data.shape = shape;
|
|
return true;
|
|
}
|
|
|
|
Mat ONNXImporter2::parseTensor(const opencv_onnx::TensorProto& tensor_proto)
|
|
{
|
|
return getMatFromTensor2(tensor_proto, onnxBasePath);
|
|
}
|
|
|
|
Ptr<Graph> ONNXImporter2::parseGraph(opencv_onnx::GraphProto* graph_proto, bool mainGraph_)
|
|
{
|
|
CV_LOG_DEBUG(NULL, "DNN/ONNX: parsing graph '" << graph_proto->name() << "' of " << graph_proto->node_size() << " nodes");
|
|
simplifySubgraphs(*graph_proto);
|
|
int n_nodes = graph_proto->node_size();
|
|
CV_LOG_DEBUG(NULL, "DNN/ONNX: simplified the graph to " << n_nodes << " nodes");
|
|
|
|
opencv_onnx::GraphProto* saved_graph_proto = curr_graph_proto;
|
|
Ptr<Graph> saved_graph = curr_graph;
|
|
std::vector<Ptr<Layer> > saved_prog;
|
|
|
|
curr_graph_proto = graph_proto;
|
|
std::vector<Arg> inputs, outputs;
|
|
|
|
// parse constant tensors
|
|
int n_consts = graph_proto->initializer_size();
|
|
for (int i = 0; i < n_consts; i++) {
|
|
//const opencv_onnx::
|
|
const opencv_onnx::TensorProto& const_i = graph_proto->initializer(i);
|
|
Mat t = parseTensor(const_i);
|
|
netimpl->newConstArg(const_i.name(), t);
|
|
}
|
|
|
|
// parse graph inputs
|
|
int n_inputs = graph_proto->input_size();
|
|
for (int i = 0; i < n_inputs; i++) {
|
|
const opencv_onnx::ValueInfoProto& input_i = graph_proto->input(i);
|
|
if (net.haveArg(input_i.name()))
|
|
continue;
|
|
Arg arg = netimpl->newArg(input_i.name(), mainGraph_ ? DNN_ARG_INPUT : DNN_ARG_TEMP);
|
|
if (!parseValueInfo(input_i, netimpl->args.at(arg.idx))) {
|
|
raiseError();
|
|
return Ptr<Graph>();
|
|
}
|
|
inputs.push_back(arg);
|
|
}
|
|
|
|
// parse graph outputs
|
|
int n_outputs = graph_proto->output_size();
|
|
for (int i = 0; i < n_outputs; i++) {
|
|
const opencv_onnx::ValueInfoProto& output_i = graph_proto->output(i);
|
|
Arg arg = netimpl->newArg(output_i.name(), mainGraph_ ? DNN_ARG_OUTPUT : DNN_ARG_TEMP);
|
|
if (!parseValueInfo(output_i, netimpl->args.at(arg.idx))) {
|
|
raiseError();
|
|
return Ptr<Graph>();
|
|
}
|
|
outputs.push_back(arg);
|
|
}
|
|
|
|
curr_graph = netimpl->newGraph(graph_proto->name(), inputs, mainGraph_);
|
|
curr_graph->setOutputs(outputs);
|
|
|
|
std::swap(saved_prog, curr_prog);
|
|
|
|
std::vector<Ptr<Layer> > prog;
|
|
for (int i = 0; i < n_nodes && !have_errors; i++) {
|
|
parseNode(graph_proto->node(i));
|
|
}
|
|
|
|
curr_graph->setProg(curr_prog);
|
|
curr_prog = saved_prog;
|
|
|
|
Ptr<Graph> just_constructed = curr_graph;
|
|
curr_graph_proto = saved_graph_proto;
|
|
curr_graph = saved_graph;
|
|
|
|
return just_constructed;
|
|
}
|
|
|
|
std::string ONNXImporter2::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
if (!node_proto.has_domain())
|
|
return str_domain_ai_onnx;
|
|
const std::string& domain = node_proto.domain();
|
|
if (domain.empty())
|
|
return str_domain_ai_onnx;
|
|
return domain;
|
|
}
|
|
|
|
const ONNXImporter2::DispatchMap& ONNXImporter2::getDispatchMap(const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
static DispatchMap empty_map;
|
|
const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
|
|
auto it = domain_dispatch_map.find(layer_type_domain);
|
|
if (it == domain_dispatch_map.end())
|
|
{
|
|
return empty_map;
|
|
}
|
|
|
|
return it->second;
|
|
}
|
|
|
|
std::string ONNXImporter2::extractNodeName(const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
// We need to rework DNN outputs API, this is a workaround for #21698
|
|
if (node_proto.has_name() && !node_proto.name().empty())
|
|
{
|
|
if (useLegacyNames)
|
|
return node_proto.name();
|
|
return format("onnx_node!%s", node_proto.name().c_str());
|
|
}
|
|
return format("onnx_node!%d", global_node_idx++);
|
|
}
|
|
|
|
void ONNXImporter2::rememberMissingOp(const std::string& opname)
|
|
{
|
|
missing_ops.insert(opname);
|
|
raiseError();
|
|
}
|
|
|
|
void ONNXImporter2::parseNode(const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.output_size() >= 1);
|
|
std::string node_name = extractNodeName(node_proto);
|
|
const std::string& layer_type = node_proto.op_type();
|
|
std::string layer_type_domain = getLayerTypeDomain(node_proto);
|
|
const auto& dispatch = getDispatchMap(node_proto);
|
|
|
|
/*CV_LOG_INFO(NULL, "DNN/ONNX: processing node '" << node_name << "' ("
|
|
<< layer_type << ") with " << node_proto.input_size() << " inputs and "
|
|
<< node_proto.output_size() << " outputs from domain '"
|
|
<< layer_type_domain << "'");*/
|
|
|
|
if (dispatch.empty())
|
|
{
|
|
CV_LOG_ERROR(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'");
|
|
rememberMissingOp(layer_type);
|
|
return;
|
|
}
|
|
|
|
node_inputs.clear();
|
|
node_outputs.clear();
|
|
|
|
int n_inputs = node_proto.input_size();
|
|
for (int i = 0; i < n_inputs; i++) {
|
|
const std::string& arg_name = node_proto.input(i);
|
|
if (!net.haveArg(arg_name)) {
|
|
CV_LOG_ERROR(NULL, "DNN/ONNX: unknown input '" << arg_name << "' of node '" << node_name << "'");
|
|
raiseError();
|
|
}
|
|
Arg arg = net.getArg(arg_name);
|
|
/*ArgData adata = net.argData(arg);
|
|
printf("%s (%s), arg '%s'/'%s': adata.kind = %s, type=%s\n", node_name.c_str(), layer_type.c_str(),
|
|
arg_name.c_str(), adata.name.c_str(),
|
|
argKindToString(adata.kind).c_str(), typeToString(adata.type).c_str());*/
|
|
node_inputs.push_back(arg);
|
|
}
|
|
|
|
int n_outputs = node_proto.output_size();
|
|
for (int i = 0; i < n_outputs; i++) {
|
|
const std::string& arg_name = node_proto.output(i);
|
|
Arg arg = net.getArg(arg_name);
|
|
node_outputs.push_back(arg);
|
|
}
|
|
|
|
LayerParams layerParams;
|
|
try
|
|
{
|
|
layerParams = getLayerParams(node_proto);
|
|
|
|
layerParams.name = node_name;
|
|
layerParams.type = layer_type;
|
|
|
|
DispatchMap::const_iterator iter = dispatch.find(layer_type);
|
|
if (iter != dispatch.end())
|
|
{
|
|
if (!have_errors)
|
|
CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto);
|
|
} else if (!have_errors) {
|
|
//try customly parsing the layer without explicit dispatch map
|
|
parseCustomLayer(layerParams, node_proto);
|
|
} else {
|
|
rememberMissingOp(layer_type);
|
|
}
|
|
}
|
|
catch (const cv::Exception& e)
|
|
{
|
|
raiseError();
|
|
CV_LOG_INFO(NULL, "DNN/ONNX: error '" << e.what() << "' occurred when processing node '" << node_name
|
|
<< "' (" << layer_type << ") with "
|
|
<< node_proto.input_size() << " inputs and "
|
|
<< node_proto.output_size() << " outputs from domain '"
|
|
<< layer_type_domain << "'");
|
|
for (int i = 0; i < n_inputs; i++)
|
|
{
|
|
CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'");
|
|
}
|
|
for (int i = 0; i < n_outputs; i++)
|
|
{
|
|
CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'");
|
|
}
|
|
}
|
|
}
|
|
|
|
void ONNXImporter2::addLayer(LayerParams& layerParams,
|
|
const opencv_onnx::NodeProto& node_proto,
|
|
int max_inputs)
|
|
{
|
|
Ptr<Layer> layer = LayerFactory::createLayerInstance(layerParams.type, layerParams);
|
|
if (!layer) {
|
|
rememberMissingOp(layerParams.type);
|
|
return;
|
|
}
|
|
size_t actual_inputs = std::min((size_t)max_inputs, node_inputs.size());
|
|
layer->inputs = node_inputs;
|
|
layer->inputs.resize(actual_inputs);
|
|
layer->outputs = node_outputs;
|
|
layer->netimpl = netimpl;
|
|
CV_Assert(netimpl->dump_indent == 3);
|
|
curr_prog.push_back(layer);
|
|
}
|
|
|
|
void ONNXImporter2::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Power";
|
|
layerParams.set("scale", -1);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
parseSimpleLayers(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseArgMinMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
const std::string& layer_type = node_proto.op_type();
|
|
layerParams.type = "Arg";
|
|
layerParams.set("op", layer_type == "ArgMax" ? "max" : "min");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
static void setCeilMode(LayerParams& layerParams)
|
|
{
|
|
// auto_pad attribute is deprecated and uses ceil
|
|
if (layerParams.has("pad_mode"))
|
|
{
|
|
layerParams.set("ceil_mode", true);
|
|
}
|
|
else if (!layerParams.has("ceil_mode"))
|
|
{
|
|
layerParams.set("ceil_mode", false);
|
|
}
|
|
}
|
|
|
|
void ONNXImporter2::parseMaxUnpool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "MaxUnpool";
|
|
|
|
DictValue kernel_shape = layerParams.get("kernel_size");
|
|
CV_Assert(kernel_shape.size() == 2);
|
|
layerParams.set("pool_k_w", kernel_shape.get<int>(0));
|
|
layerParams.set("pool_k_h", kernel_shape.get<int>(1));
|
|
|
|
int pool_pad_w = 0, pool_pad_h = 0;
|
|
if (layerParams.has("pad"))
|
|
{
|
|
DictValue pads = layerParams.get("pad");
|
|
CV_CheckEQ(pads.size(), 2, "");
|
|
pool_pad_w = pads.get<int>(0);
|
|
pool_pad_h = pads.get<int>(1);
|
|
}
|
|
layerParams.set("pool_pad_w", pool_pad_w);
|
|
layerParams.set("pool_pad_h", pool_pad_h);
|
|
|
|
|
|
int pool_stride_w = 1, pool_stride_h = 1;
|
|
if (layerParams.has("stride"))
|
|
{
|
|
DictValue strides = layerParams.get("stride");
|
|
CV_CheckEQ(strides.size(), 2, "");
|
|
pool_stride_w = strides.get<int>(0);
|
|
pool_stride_h = strides.get<int>(1);
|
|
}
|
|
layerParams.set("pool_stride_w", pool_stride_w);
|
|
layerParams.set("pool_stride_h", pool_stride_h);
|
|
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int depth = layerParams.get<int>("depth", CV_32F);
|
|
layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling";
|
|
layerParams.set("pool", "MAX");
|
|
setCeilMode(layerParams);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Pooling";
|
|
layerParams.set("pool", "AVE");
|
|
setCeilMode(layerParams);
|
|
layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseGlobalPool(LayerParams &layerParams, const opencv_onnx::NodeProto &node_proto_)
|
|
{
|
|
opencv_onnx::NodeProto node_proto = node_proto_;
|
|
const std::string& layer_type = node_proto.op_type();
|
|
const std::string output_name = node_proto.output(0);
|
|
|
|
CV_Assert(node_proto.input_size() == 1);
|
|
layerParams.type = "Pooling";
|
|
String pool;
|
|
if (layer_type == "GlobalMaxPool")
|
|
pool = "MAX";
|
|
else if (layer_type == "GlobalAveragePool")
|
|
pool = "AVE";
|
|
else
|
|
CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation.");
|
|
|
|
CV_Assert(!layerParams.has("axes"));
|
|
layerParams.set("global_pooling", true);
|
|
layerParams.set("pool", pool);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Reduce2";
|
|
const auto& op_type = node_proto.op_type();
|
|
String reduce_type;
|
|
if (op_type == "ReduceMax")
|
|
reduce_type = "MAX";
|
|
else if (op_type == "ReduceMean")
|
|
reduce_type = "MEAN";
|
|
else if (op_type == "ReduceMin")
|
|
reduce_type = "MIN";
|
|
else if (op_type == "ReduceProd")
|
|
reduce_type = "PROD";
|
|
else if (op_type == "ReduceSum")
|
|
reduce_type = "SUM";
|
|
else if (op_type == "ReduceL1")
|
|
reduce_type = "L1";
|
|
else if (op_type == "ReduceL2")
|
|
reduce_type = "L2";
|
|
else if (op_type == "ReduceLogSum")
|
|
reduce_type = "LOG_SUM";
|
|
else if (op_type == "ReduceLogSumExp")
|
|
reduce_type = "LOG_SUM_EXP";
|
|
else if (op_type == "ReduceSumSquare")
|
|
reduce_type = "SUM_SQUARE";
|
|
else
|
|
CV_Error(Error::StsNotImplemented, "DNN/ONNX: " + op_type + " is not supported.");
|
|
layerParams.set("reduce", reduce_type);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int ninputs = node_proto.input_size();
|
|
CV_Assert(ninputs == 1 || (3 <= ninputs && ninputs <= 5));
|
|
layerParams.type = "Slice2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckGE(node_proto.input_size(), 1, "");
|
|
CV_CheckLE(node_proto.input_size(), 2, "");
|
|
layerParams.type = "Split2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto&)
|
|
{
|
|
CV_Assert(node_inputs.empty());
|
|
CV_Assert(node_outputs.size() == 1);
|
|
CV_Assert(layerParams.blobs.size() == 1);
|
|
Mat m = layerParams.blobs[0];
|
|
Arg out = node_outputs[0];
|
|
ArgData& data = netimpl->args.at(out.idx);
|
|
data.kind = DNN_ARG_CONST;
|
|
data.type = m.type();
|
|
data.shape = m.shape();
|
|
netimpl->__tensors__.at(out.idx) = m;
|
|
}
|
|
|
|
// BUG: https://github.com/opencv/opencv/issues/26308
|
|
void ONNXImporter2::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
|
{
|
|
opencv_onnx::NodeProto lstm_proto = node_proto_;
|
|
layerParams.type = "LSTM2";
|
|
|
|
layerParams.set("is_onnx", true);
|
|
layerParams.set("reverse", layerParams.get<String>("direction", "") == "reverse");
|
|
layerParams.set("bidirectional", layerParams.get<String>("direction", "") == "bidirectional");
|
|
|
|
|
|
|
|
bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty();
|
|
bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty();
|
|
bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty();
|
|
|
|
const std::string y_name = need_y ? lstm_proto.output(0) : "";
|
|
const std::string yh_name = need_yh ? lstm_proto.output(1) : "";
|
|
const std::string yc_name = need_yc ? lstm_proto.output(2) : "";
|
|
|
|
layerParams.set("produce_cell_output", need_yc);
|
|
layerParams.set("produce_output_yh", need_yh);
|
|
|
|
|
|
if (lstm_proto.input_size() == 8)
|
|
layerParams.set("use_peephole", true);
|
|
|
|
|
|
addLayer(layerParams, lstm_proto);
|
|
}
|
|
|
|
// BUG: https://github.com/opencv/opencv/issues/26309
|
|
void ONNXImporter2::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
|
{
|
|
rememberMissingOp(node_proto_.op_type());
|
|
}
|
|
|
|
void ONNXImporter2::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
|
|
layerParams.erase("scale");
|
|
|
|
if (layerParams.has("bias"))
|
|
{
|
|
layerParams.type = "Scale";
|
|
layerParams.blobs.push_back(
|
|
Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale));
|
|
|
|
layerParams.set("bias_term", true);
|
|
Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
|
|
for (int j = 0; j < bias.total(); j++) {
|
|
bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
|
|
}
|
|
layerParams.blobs.push_back(bias);
|
|
layerParams.erase("bias");
|
|
}
|
|
else {
|
|
layerParams.set("scale", scale);
|
|
layerParams.type = "Power";
|
|
}
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Clip";
|
|
float min_value = -FLT_MAX, max_value = FLT_MAX;
|
|
int input_size = node_proto.input_size();
|
|
CV_Check(input_size, 1 <= input_size && input_size <= 3, "");
|
|
|
|
if (input_size >= 2 && !node_proto.input(1).empty())
|
|
{
|
|
if (net.isConstArg(node_inputs[1]))
|
|
{
|
|
Mat m = net.argTensor(node_inputs[1]);
|
|
m.convertTo(m, CV_32F);
|
|
CV_Assert(m.total() == 1);
|
|
min_value = m.at<float>(0);
|
|
layerParams.set("min", min_value);
|
|
}
|
|
}
|
|
|
|
if (input_size == 3 && !node_proto.input(2).empty())
|
|
{
|
|
if (net.isConstArg(node_inputs[2]))
|
|
{
|
|
Mat m = net.argTensor(node_inputs[2]);
|
|
m.convertTo(m, CV_32F);
|
|
CV_Assert(m.total() == 1);
|
|
max_value = m.at<float>(0);
|
|
layerParams.set("max", max_value);
|
|
}
|
|
}
|
|
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "ReLU";
|
|
layerParams.set("negative_slope", layerParams.get<float>("alpha", 0.01));
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "ReLU";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "ELU";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "TanH";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseAbs(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "AbsVal";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "PReLU";
|
|
CV_Assert(node_inputs.size() == 2);
|
|
CV_Assert(net.isConstArg(node_inputs[1]));
|
|
layerParams.blobs.push_back(net.argTensor(node_inputs[1]));
|
|
addLayer(layerParams, node_proto, 1);
|
|
}
|
|
|
|
void ONNXImporter2::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
replaceLayerParam(layerParams, "size", "local_size");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) {
|
|
int num_inputs = node_proto.input_size();
|
|
CV_CheckEQ(num_inputs, 3, "DNN/ONNXImporter2 - InstanceNorm: three inputs are required");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
if (node_proto.input_size() != 5)
|
|
CV_Error(Error::StsNotImplemented,
|
|
"Expected input, scale, bias, mean and var");
|
|
|
|
layerParams.type = "BatchNorm";
|
|
replaceLayerParam(layerParams, "epsilon", "eps");
|
|
replaceLayerParam(layerParams, "spatial", "use_global_stats");
|
|
|
|
CV_Assert(net.isConstArg(node_inputs[3]));
|
|
CV_Assert(net.isConstArg(node_inputs[4]));
|
|
|
|
Mat meanData = net.argTensor(node_inputs[3]);
|
|
Mat stdData = net.argTensor(node_inputs[4]);
|
|
|
|
layerParams.blobs.push_back(meanData);
|
|
layerParams.blobs.push_back(stdData);
|
|
|
|
if (!node_proto.input(1).empty()) {
|
|
layerParams.set("has_weight", true);
|
|
CV_Assert(net.isConstArg(node_inputs[1]));
|
|
layerParams.blobs.push_back(net.argTensor(node_inputs[1])); // weightData
|
|
} else {
|
|
layerParams.set("has_weight", false);
|
|
}
|
|
|
|
if (!node_proto.input(2).empty()) {
|
|
layerParams.set("has_bias", true);
|
|
CV_Assert(net.isConstArg(node_inputs[1]));
|
|
layerParams.blobs.push_back(net.argTensor(node_inputs[2])); // biasData
|
|
} else {
|
|
layerParams.set("has_bias", false);
|
|
}
|
|
addLayer(layerParams, node_proto, 1);
|
|
}
|
|
|
|
void ONNXImporter2::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Gemm";
|
|
int n_inputs = node_proto.input_size();
|
|
CV_Assert(2 <= n_inputs && n_inputs <= 3);
|
|
|
|
if (net.isConstArg(node_inputs[1]) && (n_inputs == 2 || net.isConstArg(node_inputs[2]))) {
|
|
Mat B = net.argTensor(node_inputs[1]);
|
|
layerParams.blobs.push_back(B);
|
|
if (n_inputs > 2) {
|
|
Mat bias = net.argTensor(node_inputs[2]);
|
|
layerParams.blobs.push_back(bias);
|
|
}
|
|
n_inputs = 1;
|
|
}
|
|
|
|
addLayer(layerParams, node_proto, n_inputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) {
|
|
int n_inputs = node_proto.input_size();
|
|
CV_Assert(2 <= n_inputs && n_inputs <= 3);
|
|
|
|
if (net.isConstArg(node_inputs[1]) && (n_inputs == 2 || net.isConstArg(node_inputs[2]))) {
|
|
Mat B = net.argTensor(node_inputs[1]);
|
|
layerParams.blobs.push_back(B);
|
|
if (n_inputs > 2) {
|
|
Mat bias = net.argTensor(node_inputs[2]);
|
|
layerParams.blobs.push_back(bias);
|
|
}
|
|
n_inputs = 1;
|
|
}
|
|
addLayer(layerParams, node_proto, n_inputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int n_inputs = node_proto.input_size();
|
|
CV_Assert(2 <= n_inputs && n_inputs <= 3);
|
|
layerParams.type = "Convolution";
|
|
|
|
if (net.isConstArg(node_inputs[1]) && (n_inputs == 2 || net.isConstArg(node_inputs[2]))) {
|
|
Mat weights = net.argTensor(node_inputs[1]);
|
|
layerParams.blobs.push_back(weights);
|
|
if (n_inputs > 2) {
|
|
Mat bias = net.argTensor(node_inputs[2]);
|
|
layerParams.blobs.push_back(bias);
|
|
}
|
|
n_inputs = 1;
|
|
}
|
|
addLayer(layerParams, node_proto, n_inputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int n_inputs = node_proto.input_size();
|
|
CV_Assert(2 <= n_inputs && n_inputs <= 3);
|
|
layerParams.type = "Deconvolution";
|
|
|
|
layerParams.set("bias_term", node_proto.input_size() == 3);
|
|
|
|
if (net.isConstArg(node_inputs[1]) && (n_inputs == 2 || net.isConstArg(node_inputs[2]))) {
|
|
Mat weights = net.argTensor(node_inputs[1]);
|
|
layerParams.blobs.push_back(weights);
|
|
if (n_inputs > 2) {
|
|
Mat bias = net.argTensor(node_inputs[2]);
|
|
layerParams.blobs.push_back(bias);
|
|
}
|
|
n_inputs = 1;
|
|
}
|
|
|
|
if (!layerParams.has("kernel_size"))
|
|
CV_Error(Error::StsNotImplemented,
|
|
"Required attribute 'kernel_size' is not present.");
|
|
|
|
if (layerParams.has("output_shape"))
|
|
{
|
|
const DictValue& outShape = layerParams.get("output_shape");
|
|
DictValue strides = layerParams.get("stride");
|
|
DictValue kernel = layerParams.get("kernel_size");
|
|
|
|
String padMode;
|
|
std::vector<int> adjust_pads;
|
|
if (layerParams.has("pad_mode"))
|
|
{
|
|
padMode = toUpperCase(layerParams.get<String>("pad_mode"));
|
|
if (padMode != "SAME" && padMode != "VALID")
|
|
CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
|
|
|
|
for (int i = 0; i < strides.size(); i++)
|
|
{
|
|
int sz = outShape.get<int>(2 + i);
|
|
int stride = strides.get<int>(i);
|
|
adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride :
|
|
(sz - kernel.get<int>(i)) % stride);
|
|
}
|
|
layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], (int)adjust_pads.size()));
|
|
}
|
|
}
|
|
else if (layerParams.has("output_padding"))
|
|
{
|
|
replaceLayerParam(layerParams, "output_padding", "adj");
|
|
}
|
|
addLayer(layerParams, node_proto, n_inputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 1);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Squeeze";
|
|
CV_Assert(node_proto.input_size() <= 2);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 1);
|
|
layerParams.set("onnx", true);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert((node_proto.input_size() == 1 && layerParams.has("axes")) ||
|
|
node_proto.input_size() == 2);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Expand2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
bool have_shape_attr = layerParams.has("shape");
|
|
CV_Assert((node_proto.input_size() == 2 && !have_shape_attr) ||
|
|
(node_proto.input_size() == 1 && have_shape_attr));
|
|
layerParams.type = "Reshape2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Pad2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 1);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseCast2(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Cast2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseCastLike(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckEQ(node_proto.input_size(), 2, "CastLike requires two inputs");
|
|
layerParams.type = "Cast2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseConstantOfShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "ConstantOfShape";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Gather2";
|
|
CV_CheckEQ(node_proto.input_size(), 2, "");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseGatherElements(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckEQ(node_proto.input_size(), 2, "GatherElements: two inputs are required");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckEQ(node_proto.output_size(), 1, "");
|
|
layerParams.type = "Concat2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseIf(LayerParams& layerParams,
|
|
const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() >= 1);
|
|
layerParams.type = "If";
|
|
|
|
addLayer(layerParams, node_proto);
|
|
|
|
std::vector<Ptr<Graph> > thenelse(2);
|
|
for (int i = 0; i < node_proto.attribute_size(); ++i)
|
|
{
|
|
const auto& attr = node_proto.attribute(i);
|
|
if (attr.name() == "then_branch" || attr.name() == "else_branch") {
|
|
opencv_onnx::GraphProto branch = attr.g();
|
|
Ptr<Graph> graph = parseGraph(&branch, false);
|
|
thenelse[(int)(attr.name() == "else_branch")] = graph;
|
|
}
|
|
}
|
|
|
|
CV_Assert_N(!thenelse[0].empty(), !thenelse[1].empty());
|
|
|
|
Ptr<Layer>& ifLayer = curr_prog.back();
|
|
*ifLayer->subgraphs() = thenelse;
|
|
}
|
|
|
|
// https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize
|
|
void ONNXImporter2::parseResize2(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int ninputs = node_proto.input_size();
|
|
layerParams.type = "Resize2";
|
|
String interp_mode = layerParams.get<String>("coordinate_transformation_mode", "half_pixel");
|
|
|
|
bool halfPixel = interp_mode == "tf_half_pixel_for_nn" || interp_mode == "half_pixel" || interp_mode == "pytorch_half_pixel";
|
|
|
|
layerParams.set("align_corners", interp_mode == "align_corners");
|
|
layerParams.set("half_pixel_centers", halfPixel);
|
|
if (layerParams.get<String>("mode") == "linear")
|
|
{
|
|
layerParams.set("mode", halfPixel ? "opencv_linear" : "bilinear");
|
|
}
|
|
|
|
if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
|
|
layerParams.set("mode", "opencv_linear");
|
|
|
|
// opset-10: input = [X, scales]
|
|
// opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
|
|
// opset-13: may have empty input, [X, "", "", sizes] or [x, "", scales]
|
|
int scalesInputId = ninputs == 2 ? 1 : 2;
|
|
Arg scalesArg = node_inputs[scalesInputId];
|
|
Mat scales;
|
|
if(scalesArg.idx > 0 && netimpl->isConstArg(scalesArg))
|
|
scales = netimpl->argTensor(scalesArg);
|
|
|
|
if (interp_mode == "tf_crop_and_resize")
|
|
{
|
|
CV_Assert(ninputs >= 3);
|
|
Arg roiArg = node_inputs[1];
|
|
bool hasSizes = (ninputs >= 4);
|
|
Arg sizesArg = hasSizes ? node_inputs[3] : Arg();
|
|
|
|
bool staticRoi = netimpl->isConstArg(roiArg);
|
|
bool staticSizes = hasSizes && netimpl->isConstArg(sizesArg);
|
|
|
|
if (staticRoi && (!hasSizes || staticSizes))
|
|
{
|
|
Mat roiMat = netimpl->argTensor(roiArg), roiF;
|
|
CV_CheckEQ(roiMat.total(), (size_t)4,
|
|
"ONNX/Resize: ROI must have 4 values [y1,x1,y2,x2]");
|
|
roiMat.convertTo(roiF, CV_32F);
|
|
layerParams.set("y1", roiF.at<float>(0));
|
|
layerParams.set("x1", roiF.at<float>(1));
|
|
layerParams.set("y2", roiF.at<float>(2));
|
|
layerParams.set("x2", roiF.at<float>(3));
|
|
|
|
if (hasSizes && staticSizes)
|
|
{
|
|
Mat szMat = netimpl->argTensor(sizesArg), sz;
|
|
CV_CheckEQ(szMat.total(), (size_t)4,
|
|
"ONNX/Resize: sizes must have 4 values [N,C,H,W]");
|
|
szMat.convertTo(sz, CV_32S);
|
|
layerParams.set("height", sz.at<int>(2));
|
|
layerParams.set("width", sz.at<int>(3));
|
|
}
|
|
layerParams.set("dynamic_roi", false);
|
|
}
|
|
else layerParams.set("dynamic_roi", true);
|
|
}
|
|
|
|
if (scales.total() == 4)
|
|
{
|
|
CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
|
|
CV_CheckEQ(scales.type(), CV_32F, "Scales should have 32F type");
|
|
layerParams.set("zoom_factor_y", scales.at<float>(2));
|
|
layerParams.set("zoom_factor_x", scales.at<float>(3));
|
|
ninputs = 1;
|
|
}
|
|
else if (ninputs >= 4) // opset-11 [x, roi, scales, sizes] or opset-13: input = [X, "", "", sizes]
|
|
{
|
|
Arg sizesArg = node_inputs[3];
|
|
if (netimpl->isConstArg(sizesArg))
|
|
{
|
|
Mat shapes_ = netimpl->argTensor(sizesArg), shapes;
|
|
CV_CheckEQ(shapes_.total(), (size_t)4, "HCHW layout is expected");
|
|
shapes_.convertTo(shapes, CV_32S);
|
|
layerParams.set("width", shapes.at<int>(3));
|
|
layerParams.set("height", shapes.at<int>(2));
|
|
ninputs = 1;
|
|
}
|
|
}
|
|
|
|
for (int i_attr = 0; i_attr < node_proto.attribute_size(); ++i_attr)
|
|
{
|
|
const auto& a = node_proto.attribute(i_attr);
|
|
if (a.name() == "nearest_mode" && a.has_s())
|
|
layerParams.set("nearest_mode", String(a.s()));
|
|
else if (a.name() == "exclude_outside" && a.has_i())
|
|
layerParams.set("exclude_outside", static_cast<int>(a.i()) != 0);
|
|
else if (a.name() == "cubic_coeff_a" && a.has_f())
|
|
layerParams.set("cubic_coeff_a", static_cast<float>(a.f()));
|
|
else if (a.name() == "extrapolation_value" && a.has_f())
|
|
layerParams.set("extrapolation_value", static_cast<float>(a.f()));
|
|
}
|
|
|
|
replaceLayerParam(layerParams, "mode", "interpolation");
|
|
addLayer(layerParams, node_proto, ninputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseUnique(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Unique";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseSize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Size";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseBitShift(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
// ONNX spec: direction is required string attr: "LEFT" or "RIGHT"
|
|
// https://onnx.ai/onnx/operators/onnx__BitShift.html
|
|
layerParams.type = "BitShift";
|
|
String dir = layerParams.get<String>("direction", "");
|
|
|
|
CV_Assert(dir == "LEFT" || dir == "RIGHT");
|
|
CV_Assert(!dir.empty());
|
|
|
|
if (!dir.empty())
|
|
{
|
|
if (dir == "LEFT" || dir == "left")
|
|
layerParams.set("direction", 0);
|
|
else if (dir == "RIGHT" || dir == "right")
|
|
layerParams.set("direction", 1);
|
|
}
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseBitwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
const std::string& op_type = node_proto.op_type();
|
|
layerParams.type = "NaryEltwise";
|
|
if (op_type == "BitwiseAnd")
|
|
layerParams.set("operation", String("bitwise_and"));
|
|
else if (op_type == "BitwiseOr")
|
|
layerParams.set("operation", String("bitwise_or"));
|
|
else if (op_type == "BitwiseXor")
|
|
layerParams.set("operation", String("bitwise_xor"));
|
|
else
|
|
CV_Error(Error::StsNotImplemented, String("Unsupported bitwise op: ") + op_type);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseBitwiseNot(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Not";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseTrilu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int ninputs = node_proto.input_size();
|
|
layerParams.type = "Trilu";
|
|
addLayer(layerParams, node_proto, ninputs);
|
|
|
|
}
|
|
|
|
void ONNXImporter2::parseIsNaN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "IsNaN";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseIsInf(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "IsInf";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseDet(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "Det";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseGridSample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "GridSample";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseNonMaxSuprression(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "NonMaxSuprression";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int n_inputs = node_proto.input_size();
|
|
//fused from Resize Subgraph
|
|
if (layerParams.has("coordinate_transformation_mode"))
|
|
{
|
|
String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
|
|
CV_Assert(interp_mode != "tf_crop_and_resize");
|
|
|
|
bool halfPixel = interp_mode == "tf_half_pixel_for_nn" || interp_mode == "half_pixel" || interp_mode == "pytorch_half_pixel";
|
|
|
|
layerParams.set("align_corners", interp_mode == "align_corners");
|
|
layerParams.set("half_pixel_centers", halfPixel);
|
|
if (layerParams.get<String>("mode") == "linear")
|
|
{
|
|
layerParams.set("mode", halfPixel ? "opencv_linear" : "bilinear");
|
|
}
|
|
}
|
|
if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
|
|
layerParams.set("mode", "opencv_linear");
|
|
|
|
layerParams.type = "Resize";
|
|
if (layerParams.has("scales"))
|
|
{
|
|
// Pytorch layer
|
|
DictValue scales = layerParams.get("scales");
|
|
CV_Assert(scales.size() == 4);
|
|
layerParams.set("zoom_factor_y", scales.getIntValue(2));
|
|
layerParams.set("zoom_factor_x", scales.getIntValue(3));
|
|
}
|
|
else if (layerParams.has("height_scale") && layerParams.has("width_scale"))
|
|
{
|
|
// Caffe2 layer
|
|
replaceLayerParam(layerParams, "height_scale", "zoom_factor_y");
|
|
replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
|
|
}
|
|
else
|
|
{
|
|
CV_Assert(n_inputs >= 2);
|
|
// scales as input
|
|
if (net.isConstArg(node_inputs[1])) {
|
|
Mat scales;
|
|
net.argTensor(node_inputs[1]).convertTo(scales, CV_32F);
|
|
CV_Assert(scales.total() == 4);
|
|
layerParams.set("zoom_factor_y", scales.at<float>(2));
|
|
layerParams.set("zoom_factor_x", scales.at<float>(3));
|
|
n_inputs = 1;
|
|
}
|
|
}
|
|
replaceLayerParam(layerParams, "mode", "interpolation");
|
|
addLayer(layerParams, node_proto, n_inputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseNonZero(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "NonZero";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
const std::string& layer_type = node_proto.op_type();
|
|
int axis;
|
|
if (onnx_opset != 0 && onnx_opset <= 11) {
|
|
axis = layerParams.get<int>("axis", 1);
|
|
} else {
|
|
axis = layerParams.get<int>("axis", -1);
|
|
}
|
|
layerParams.set<int>("axis", axis);
|
|
layerParams.type = "Softmax";
|
|
layerParams.set("log_softmax", layer_type == "LogSoftmax");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckEQ(node_proto.input_size(), 3, "");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseCumSum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int ninputs = node_proto.input_size();
|
|
CV_Assert(ninputs == 2);
|
|
layerParams.type = "CumSum";
|
|
if (net.isConstArg(node_inputs[1]))
|
|
{
|
|
Mat axisTensor;
|
|
net.argTensor(node_inputs[1]).convertTo(axisTensor, CV_32S);
|
|
CV_Assert(axisTensor.total() == 1);
|
|
int axis = axisTensor.at<int>(0);
|
|
layerParams.set("axis", axis);
|
|
ninputs = 1;
|
|
}
|
|
addLayer(layerParams, node_proto, ninputs);
|
|
}
|
|
|
|
// "Equal" "Greater" "Less" "Pow" "Add" "Sub" "Mul" "Div" "Sum" "Min" "Max" "GreaterOrEqual" "LessOrEqual" "And" "Or" "Xor"
|
|
void ONNXImporter2::parseElementWise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
|
{
|
|
opencv_onnx::NodeProto node_proto = node_proto_;
|
|
String op_type = toLowerCase(node_proto.op_type());
|
|
|
|
layerParams.type = "NaryEltwise";
|
|
layerParams.set("operation", toLowerCase(node_proto.op_type()));
|
|
if (node_proto.op_type() == "Mod") {
|
|
if (layerParams.get<int>("fmod", 0)) {
|
|
layerParams.set("operation", "fmod");
|
|
};
|
|
}
|
|
// add element-wise layer
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseDepthSpaceOps(LayerParams &layerParams, const opencv_onnx::NodeProto& node_proto) {
|
|
CV_CheckTrue(layerParams.has("blocksize"), "blocksize is required but not found");
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
// Currently we only support range with all constant inputs
|
|
void ONNXImporter2::parseRange(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 3); // 0 - start, 1 - limit, 2 - delta
|
|
layerParams.type = "Range";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseScatter(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckEQ(node_proto.input_size(), 3, "Scatter: three inputs are required.");
|
|
layerParams.type = "Scatter";
|
|
if (node_proto.op_type() == "ScatterND")
|
|
layerParams.type = "ScatterND";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseTile(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
// for Tile>1, only the case of 'repeats' being constant is supported.
|
|
// 'repeats' is treated as a parameter instead of an input to determine shape in pre-run.
|
|
layerParams.type = "Tile2";
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseLayerNorm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int n_inputs = node_proto.input_size();
|
|
CV_Assert(2 <= n_inputs && n_inputs <= 3);
|
|
if (net.isConstArg(node_inputs[1]) && (n_inputs == 2 || net.isConstArg(node_inputs[2]))) {
|
|
Mat scale = net.argTensor(node_inputs[1]);
|
|
layerParams.blobs.push_back(scale);
|
|
if (n_inputs > 2) {
|
|
Mat bias = net.argTensor(node_inputs[2]);
|
|
layerParams.blobs.push_back(bias);
|
|
}
|
|
n_inputs = 1;
|
|
}
|
|
addLayer(layerParams, node_proto, n_inputs);
|
|
}
|
|
|
|
void ONNXImporter2::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseEinsum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
// Check if of equation is valid
|
|
std::string equation = layerParams.get<std::string>("equation");
|
|
CV_CheckFalse(equation.empty(), "Equation is empty");
|
|
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseTopK2(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
layerParams.type = "TopK2";
|
|
if (node_proto.input_size() >= 2 && net.isConstArg(node_inputs[1]))
|
|
{
|
|
Mat kMat = net.argTensor(node_inputs[1]);
|
|
CV_Assert(kMat.type() == CV_32S || kMat.type() == CV_64S);
|
|
int k = kMat.type() == CV_32S ? getScalarFromMat<int>(kMat):(int)getScalarFromMat<int64_t>(kMat);
|
|
layerParams.set("k", k);
|
|
addLayer(layerParams, node_proto, 1);
|
|
}
|
|
else //Dynamic K
|
|
{
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
}
|
|
|
|
void ONNXImporter2::parseDequantizeLinear(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQuantizeLinear(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int dt = layerParams.get<int>("output_dtype", -1);
|
|
if (dt >= 0)
|
|
{
|
|
layerParams.set<int>("output_dtype", dataType2cv((opencv_onnx::TensorProto_DataType)dt));
|
|
}
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
// BUG: https://github.com/opencv/opencv/issues/26310
|
|
/*void ONNXImporter2::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
|
{
|
|
opencv_onnx::NodeProto node_proto = node_proto_;
|
|
int ninputs = node_proto.input_size();
|
|
CV_Assert(ninputs == 8 || ninputs == 9);
|
|
|
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int inp_zp = (int)getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
|
|
if (layerParams.has("pad"))
|
|
{
|
|
bool asymmetricPadding = false;
|
|
DictValue pads = layerParams.get("pad");
|
|
const int dims = pads.size() / 2;
|
|
|
|
for (int i = 0; i < dims; ++i)
|
|
{
|
|
if (pads.get<int>(i) != pads.get<int>(i + dims))
|
|
{
|
|
asymmetricPadding = true;
|
|
break;
|
|
}
|
|
}
|
|
if (asymmetricPadding && pads.size() == 4)
|
|
{
|
|
layerParams.erase("pad");
|
|
std::vector<int> paddings(4, 0);
|
|
for (int i = 0; i < dims; ++i)
|
|
{
|
|
paddings.push_back(pads.get<int>(i));
|
|
paddings.push_back(pads.get<int>(dims + i));
|
|
}
|
|
LayerParams padLp;
|
|
padLp.name = layerParams.name + "/pad";
|
|
padLp.type = "PaddingInt8";
|
|
padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
|
|
padLp.set("depth", CV_8S);
|
|
padLp.set<double>("value", (double)inp_zp);
|
|
|
|
opencv_onnx::NodeProto proto;
|
|
proto.add_input(node_proto.input(0));
|
|
proto.add_output(padLp.name);
|
|
|
|
addLayer(padLp, proto);
|
|
node_proto.set_input(0, padLp.name);
|
|
}
|
|
}
|
|
|
|
Mat weights = getBlob(node_proto, 3);
|
|
int outCn = weights.size[0];
|
|
Mat w_scale = getBlob(node_proto, 4);
|
|
CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
|
|
bool per_channel = w_scale.total() == outCn;
|
|
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
|
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
|
|
int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
|
|
|
|
Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
|
|
|
|
Mat weights_2d = weights.reshape(1, outCn);
|
|
Mat biasFused(1, outCn, CV_32S);
|
|
Mat outputMultiplier(1, outCn, CV_32F);
|
|
for (int i = 0; i < outCn; i++)
|
|
{
|
|
biasFused.at<int>(i) = bias.at<int>(i) - inp_zp*(cv::sum(weights_2d.row(i))[0]);
|
|
outputMultiplier.at<float>(i) = (inp_sc * wt_sc.at<float>(i)) / out_sc;
|
|
}
|
|
|
|
layerParams.type = "ConvolutionInt8";
|
|
layerParams.set("num_output", outCn);
|
|
layerParams.set("input_zeropoint", inp_zp);
|
|
layerParams.set("input_scale",inp_sc);
|
|
layerParams.set("zeropoints", out_zp);
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("per_channel", per_channel);
|
|
layerParams.blobs.push_back(weights);
|
|
layerParams.blobs.push_back(biasFused);
|
|
layerParams.blobs.push_back(outputMultiplier);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int ninputs = node_proto.input_size();
|
|
CV_Assert(ninputs == 8);
|
|
|
|
if (constBlobs.find(node_proto.input(3)) == constBlobs.end())
|
|
CV_Error(Error::StsNotImplemented, "Variable weights is not supported");
|
|
|
|
int firstInpDims = outShapes[node_proto.input(0)].size();
|
|
|
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
|
|
Mat weights = getBlob(node_proto, 3).t();
|
|
int outCn = weights.size[0];
|
|
int secondInpDims = weights.dims;
|
|
|
|
Mat w_scale = getBlob(node_proto, 4);
|
|
CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
|
|
bool per_channel = w_scale.total() == outCn ? true : false;
|
|
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
|
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
|
|
int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
|
|
|
|
Mat bias(1, outCn, CV_32S);
|
|
Mat outputMultiplier(1, outCn, CV_32F);
|
|
for (int i = 0; i < outCn; i++)
|
|
{
|
|
bias.at<int>(i) = -inp_zp*(cv::sum(weights.row(i))[0]);
|
|
outputMultiplier.at<float>(i) = (inp_sc * wt_sc.at<float>(i)) / out_sc;
|
|
}
|
|
|
|
layerParams.type = "InnerProductInt8";
|
|
layerParams.set("num_output", outCn);
|
|
layerParams.set("axis", firstInpDims - secondInpDims + 1);
|
|
layerParams.set("input_scale", inp_sc);
|
|
layerParams.set("input_zeropoint", inp_zp);
|
|
layerParams.set("zeropoints", out_zp);
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("per_channel", per_channel);
|
|
|
|
layerParams.blobs.push_back(weights);
|
|
layerParams.blobs.push_back(bias);
|
|
layerParams.blobs.push_back(outputMultiplier);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
// A * B + C = Y, we require that the dimension of A is [m, k], and the dimension of B is [n, k].
|
|
// And the dim of output Y is [m, n]
|
|
void ONNXImporter2::parseQGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
int ninputs = node_proto.input_size();
|
|
CV_Assert(ninputs == 8 || ninputs == 9);
|
|
|
|
layerParams.type = "InnerProductInt8";
|
|
|
|
if (constBlobs.find(node_proto.input(3)) == constBlobs.end())
|
|
CV_Error(Error::StsNotImplemented, "Variable weights is not supported");
|
|
|
|
Mat weights = getBlob(node_proto, 3);
|
|
|
|
if (!layerParams.get<int>("transB", 0))
|
|
{
|
|
transpose(weights, weights);
|
|
}
|
|
|
|
CV_Assert(layerParams.get<float>("alpha", 1) == 1.0f);
|
|
CV_Assert(layerParams.get<int>("transA", 0) == 0);
|
|
|
|
int firstInpDims = outShapes[node_proto.input(0)].size();
|
|
|
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
|
|
int outCn = weights.size[0];
|
|
int secondInpDims = weights.dims;
|
|
|
|
Mat w_scale = getBlob(node_proto, 4);
|
|
CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
|
|
bool per_channel = w_scale.total() == outCn;
|
|
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
|
|
|
|
Mat w_zp = getBlob(node_proto, 5);
|
|
int8_t* ptrZp = w_zp.ptr<int8_t>(0);
|
|
|
|
for (int i = 0; i < w_zp.total(); i++)
|
|
{
|
|
if (ptrZp[i] != (int8_t)0)
|
|
CV_Error(Error::StsUnsupportedFormat, "The zero-point non-zero case of W is not supported!");
|
|
}
|
|
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 7));
|
|
int8_t out_zp = ninputs == 9 ? getScalarFromMat<int8_t>(getBlob(node_proto, 8)) : 0;
|
|
|
|
Mat bias;
|
|
if (constBlobs.find(node_proto.input(6)) != constBlobs.end())
|
|
bias = getBlob(node_proto, 6);
|
|
if (bias.empty())
|
|
bias = Mat::zeros(1, outCn, CV_32S);
|
|
|
|
Mat biasFused(1, outCn, CV_32S);
|
|
Mat outputMultiplier(1, outCn, CV_32F);
|
|
for (int i = 0; i < outCn; i++)
|
|
{
|
|
biasFused.at<int>(i) = bias.at<int>(i) - inp_zp*(cv::sum(weights.row(i))[0]);
|
|
outputMultiplier.at<float>(i) = (inp_sc * wt_sc.at<float>(i)) / out_sc;
|
|
}
|
|
|
|
layerParams.type = "InnerProductInt8";
|
|
layerParams.set("num_output", outCn);
|
|
layerParams.set("axis", firstInpDims - secondInpDims + 1);
|
|
layerParams.set("input_scale", inp_sc);
|
|
layerParams.set("input_zeropoint", inp_zp);
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("zeropoints", out_zp);
|
|
layerParams.set("per_channel", per_channel);
|
|
|
|
layerParams.blobs.push_back(weights);
|
|
layerParams.blobs.push_back(biasFused);
|
|
layerParams.blobs.push_back(outputMultiplier);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
|
{
|
|
opencv_onnx::NodeProto node_proto = node_proto_;
|
|
CV_Assert(node_proto.input_size() == 7 || node_proto.input_size() == 8);
|
|
std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod";
|
|
int constId = -1;
|
|
for (int i = 0; i < 4; i += 3)
|
|
{
|
|
if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
|
|
constId = i;
|
|
}
|
|
|
|
float inp_0_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t inp_0_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
|
|
float inp_1_sc = getScalarFromMat<float>(getBlob(node_proto, 4));
|
|
int8_t inp_1_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 5));
|
|
|
|
// Set 2nd input as the const input
|
|
if (constId == 0)
|
|
{
|
|
cv::swap(inp_0_sc, inp_1_sc);
|
|
cv::swap(inp_0_zp, inp_1_zp);
|
|
}
|
|
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
|
|
|
|
int8_t out_zp = 0;
|
|
if (node_proto.input_size() == 8)
|
|
out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
|
|
|
|
std::vector<float> inp_scales = {inp_0_sc, inp_1_sc};
|
|
std::vector<int8_t> inp_zps = {inp_0_zp, inp_1_zp};
|
|
|
|
std::vector<float> coeffs;
|
|
float offset;
|
|
if (op == "sum")
|
|
{
|
|
coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc};
|
|
offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1];
|
|
}
|
|
else
|
|
{
|
|
coeffs = {inp_scales[0]/out_sc, inp_scales[1]};
|
|
offset = out_zp;
|
|
}
|
|
|
|
if (constId != -1)
|
|
{
|
|
Mat blob = getBlob(node_proto, constId);
|
|
if (blob.total() == 1)
|
|
{
|
|
float val = inp_scales[1] * (blob.at<int8_t>(0) - inp_zps[1]);
|
|
float scale = inp_scales[0] / out_sc;
|
|
if (op == "prod")
|
|
scale *= val;
|
|
|
|
float shift = out_zp - scale*inp_zps[0];
|
|
if (op == "sum")
|
|
shift += (val/out_sc);
|
|
|
|
LayerParams rescaleParams;
|
|
rescaleParams.name = layerParams.name;
|
|
rescaleParams.type = "Requantize";
|
|
rescaleParams.set("depth", CV_8S);
|
|
rescaleParams.set("scale", scale);
|
|
rescaleParams.set("shift", shift);
|
|
rescaleParams.set("isEltwise", true);
|
|
addLayer(rescaleParams, node_proto);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
MatShape inpShape = outShapes[node_proto.input(3 - constId)];
|
|
if (blob.dims == 2)
|
|
blob = blob.t();
|
|
|
|
if (shape(blob) == inpShape)
|
|
{
|
|
LayerParams constParams;
|
|
constParams.name = layerParams.name + "/const";
|
|
constParams.type = "ConstInt8";
|
|
constParams.set("depth", CV_8S);
|
|
constParams.set("scales", inp_1_sc);
|
|
constParams.set("zeropoints", inp_1_zp);
|
|
constParams.blobs.push_back(blob);
|
|
|
|
int id = net.addLayer(constParams.name, constParams.type, CV_8S, constParams);
|
|
layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0, CV_8S)));
|
|
outShapes[constParams.name] = shape(blob);
|
|
node_proto.set_input(constId, constParams.name);
|
|
|
|
layerParams.type = "EltwiseInt8";
|
|
layerParams.set("operation", op);
|
|
layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
|
|
layerParams.set("offset", offset);
|
|
}
|
|
else
|
|
{
|
|
layerParams.type = "ScaleInt8";
|
|
layerParams.set("bias_term", op == "sum");
|
|
int axis = 1;
|
|
for (int i = 0; i < graph_proto->initializer_size(); i++)
|
|
{
|
|
opencv_onnx::TensorProto tensor_proto = graph_proto->initializer(i);
|
|
if (tensor_proto.name() == node_proto.input(constId))
|
|
{
|
|
axis = inpShape.size() - tensor_proto.dims_size();
|
|
break;
|
|
}
|
|
}
|
|
layerParams.set("axis", axis);
|
|
blob = blob.reshape(1, 1);
|
|
Mat blob_dequantized;
|
|
blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1]));
|
|
layerParams.blobs.push_back(blob_dequantized);
|
|
}
|
|
}
|
|
}
|
|
else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)])
|
|
{
|
|
layerParams.type = "EltwiseInt8";
|
|
layerParams.set("operation", op);
|
|
layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
|
|
layerParams.set("offset", offset);
|
|
}
|
|
else
|
|
{
|
|
layerParams.type = "ScaleInt8";
|
|
layerParams.set("bias_term", op == "sum");
|
|
}
|
|
|
|
layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
|
|
layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size()));
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("zeropoints", out_zp);
|
|
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
|
|
|
float slope = layerParams.get<float>("alpha");
|
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
|
|
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
|
|
|
Mat lookUpTable(1, 256, CV_8S);
|
|
int8_t* table = lookUpTable.ptr<int8_t>();
|
|
for (int i = -128; i < 128; i++)
|
|
{
|
|
float x = inp_sc*(i - inp_zp);
|
|
float y = x >= 0.f ? x : slope*x;
|
|
int quantized = out_zp + cvRound(y/out_sc);
|
|
table[i+128] = saturate_cast<int8_t>(quantized);
|
|
}
|
|
|
|
layerParams.type = "ReLUInt8";
|
|
layerParams.set("input_scale", inp_sc);
|
|
layerParams.set("input_zeropoint", inp_zp);
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("zeropoints", out_zp);
|
|
layerParams.set("slope", slope);
|
|
layerParams.blobs.push_back(lookUpTable);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
|
|
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
|
|
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
|
|
|
Mat lookUpTable(1, 256, CV_8S);
|
|
int8_t* table = lookUpTable.ptr<int8_t>();
|
|
for (int i = -128; i < 128; i++)
|
|
{
|
|
float x = inp_sc*(i - inp_zp);
|
|
float y = 1.f/(1.f + std::exp(-x));
|
|
int quantized = out_zp + cvRound(y/out_sc);
|
|
table[i+128] = saturate_cast<int8_t>(quantized);
|
|
}
|
|
|
|
layerParams.type = "SigmoidInt8";
|
|
layerParams.set("input_scale", inp_sc);
|
|
layerParams.set("input_zeropoint", inp_zp);
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("zeropoints", out_zp);
|
|
layerParams.blobs.push_back(lookUpTable);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
|
|
|
|
float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
|
|
int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
|
|
|
layerParams.type = "PoolingInt8";
|
|
layerParams.set("pool", "ave");
|
|
layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool");
|
|
layerParams.set("multiplier", inp_sc/out_sc);
|
|
layerParams.set("input_scale", inp_sc);
|
|
layerParams.set("input_zeropoint", inp_zp);
|
|
layerParams.set("scales", out_sc);
|
|
layerParams.set("zeropoints", out_zp);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
|
|
{
|
|
opencv_onnx::NodeProto node_proto = node_proto_;
|
|
layerParams.type = "ConcatInt8";
|
|
int num_inputs = node_proto.input_size();
|
|
|
|
float out_scale = getScalarFromMat<float>(getBlob(node_proto, 0));
|
|
int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 1));
|
|
|
|
for (int i = 2; i < num_inputs; i += 3)
|
|
{
|
|
float inp_scale = getScalarFromMat<float>(getBlob(node_proto, i + 1));
|
|
int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, i + 2));
|
|
|
|
if (inp_scale != out_scale || inp_zp != out_zp)
|
|
{
|
|
float scale = inp_scale/out_scale;
|
|
float shift = out_zp - scale*inp_zp;
|
|
|
|
if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
|
|
{
|
|
Mat blob = getBlob(node_proto, i);
|
|
Mat blob_rescaled;
|
|
blob.convertTo(blob_rescaled, CV_8S, scale, shift);
|
|
constBlobs[node_proto.input(i)] = blob_rescaled;
|
|
}
|
|
else
|
|
{
|
|
LayerParams rescaleParams;
|
|
rescaleParams.name = node_proto.input(i) + "/rescale";
|
|
rescaleParams.type = "Requantize";
|
|
rescaleParams.set("depth", CV_8S);
|
|
rescaleParams.set("scale", scale);
|
|
rescaleParams.set("shift", shift);
|
|
rescaleParams.set("isEltwise", false);
|
|
|
|
opencv_onnx::NodeProto proto;
|
|
proto.add_input(node_proto.input(i));
|
|
proto.add_output(rescaleParams.name);
|
|
addLayer(rescaleParams, proto);
|
|
node_proto.set_input(i, rescaleParams.name);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool hasVariableInps = false;
|
|
for (int i = 2; i < num_inputs; i += 3)
|
|
{
|
|
if (layer_id.find(node_proto.input(i)) != layer_id.end())
|
|
{
|
|
hasVariableInps = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!hasVariableInps)
|
|
{
|
|
std::vector<Mat> inputs, concatenated;
|
|
MatShape inputShape;
|
|
for (size_t i = 2; i < num_inputs; i += 3)
|
|
{
|
|
Mat blob = getBlob(node_proto, i);
|
|
if (blob.size.dims() > inputShape.size())
|
|
{
|
|
inputShape = shape(blob);
|
|
}
|
|
inputs.push_back(blob);
|
|
}
|
|
|
|
int axis = layerParams.get<int>("axis", 1);
|
|
for (size_t i = 0; i < inputs.size(); ++i)
|
|
{
|
|
MatShape targetShape = inputShape;
|
|
targetShape[axis] = shape(inputs[i])[axis];
|
|
CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
|
|
inputs[i] = inputs[i].reshape(0, targetShape);
|
|
}
|
|
runLayer(layerParams, inputs, concatenated);
|
|
CV_Assert(concatenated.size() == 1);
|
|
addConstant(layerParams.name, concatenated[0]);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
for (int i = 2; i < num_inputs; i += 3)
|
|
{
|
|
if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
|
|
{
|
|
LayerParams constParams;
|
|
constParams.name = node_proto.input(i);
|
|
constParams.type = "ConstInt8";
|
|
constParams.blobs.push_back(getBlob(node_proto, i));
|
|
constParams.set("depth", CV_8S);
|
|
|
|
opencv_onnx::NodeProto proto;
|
|
proto.add_output(constParams.name);
|
|
addLayer(constParams, proto);
|
|
}
|
|
}
|
|
}
|
|
layerParams.set("scales", out_scale);
|
|
layerParams.set("zeropoints", out_zp);
|
|
addLayer(layerParams, node_proto);
|
|
}
|
|
|
|
void ONNXImporter2::parseQSoftmax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
|
|
{
|
|
CV_CheckEQ(node_proto.input_size(), 5, "DNN/ONNX: QLinearSoftmax requires 5 inputs, X, X_scale, X_zero_point, Y_scale, Y_zero_point");
|
|
|
|
int opset = layerParams.get<int>("opset");
|
|
if (opset < 13) {
|
|
layerParams.set("coerced_2d", true);
|
|
}
|
|
|
|
float x_scale = getScalarFromMat<float>(getBlob(node_proto, 1));
|
|
int8_t x_zero_point = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
|
|
float y_scale = getScalarFromMat<float>(getBlob(node_proto, 3));
|
|
int8_t y_zero_point = getScalarFromMat<int8_t>(getBlob(node_proto, 4));
|
|
|
|
layerParams.type = "SoftmaxInt8";
|
|
// layerParams also has "axis" and "opset" attrs
|
|
layerParams.set("input_scale", x_scale);
|
|
layerParams.set("input_zeropoint", x_zero_point);
|
|
layerParams.set("scales", y_scale);
|
|
layerParams.set("zeropoints", y_zero_point);
|
|
addLayer(layerParams, node_proto);
|
|
}*/
|
|
|
|
void ONNXImporter2::parseAttention(LayerParams& params, const opencv_onnx::NodeProto& node_proto) {
|
|
int i, n_inputs = node_proto.input_size();
|
|
CV_CheckTrue(params.has("num_heads"), "ONNXImporter2/parseAttention: num_heads is required but missing");
|
|
CV_CheckTrue(params.has("qkv_hidden_sizes"), "ONNXImporter2/parseAttention: qkv_hidden_sizes is required but missing");
|
|
|
|
auto param_qkv_hidden_sizes = params.get("qkv_hidden_sizes");
|
|
CV_CheckEQ(param_qkv_hidden_sizes.size(), 3, "ONNXImporter2/parseAttention: qkv_hidden_sizes is must and only have three elements");
|
|
|
|
for (i = 1; i < n_inputs; i++) {
|
|
if (!net.isConstArg(node_inputs[i]))
|
|
break;
|
|
}
|
|
|
|
if (i == n_inputs) {
|
|
for (i = 1; i < n_inputs; i++) {
|
|
Mat blob = net.argTensor(node_inputs[i]);
|
|
params.blobs.push_back(blob);
|
|
}
|
|
n_inputs = 1;
|
|
}
|
|
|
|
addLayer(params, node_proto, n_inputs);
|
|
}
|
|
|
|
// Domain: ai.onnx (default)
|
|
// URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
|
|
void ONNXImporter2::buildDispatchMap_ONNX_AI(int opset_version)
|
|
{
|
|
CV_UNUSED(opset_version);
|
|
DispatchMap dispatch;
|
|
|
|
dispatch["ArgMax"] = dispatch["ArgMin"] = &ONNXImporter2::parseArgMinMax;
|
|
dispatch["MaxUnpool"] = &ONNXImporter2::parseMaxUnpool;
|
|
dispatch["MaxPool"] = &ONNXImporter2::parseMaxPool;
|
|
dispatch["AveragePool"] = &ONNXImporter2::parseAveragePool;
|
|
dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = &ONNXImporter2::parseGlobalPool;
|
|
dispatch["ReduceMax"] = dispatch["ReduceMin"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] =
|
|
dispatch["ReduceSumSquare"] = dispatch["ReduceProd"] = dispatch["ReduceL1"] =
|
|
dispatch["ReduceL2"] = dispatch["ReduceLogSum"] = dispatch["ReduceLogSumExp"] = &ONNXImporter2::parseReduce;
|
|
dispatch["Slice"] = &ONNXImporter2::parseSlice;
|
|
dispatch["Split"] = &ONNXImporter2::parseSplit;
|
|
dispatch["Neg"] = &ONNXImporter2::parseNeg;
|
|
dispatch["Constant"] = &ONNXImporter2::parseConstant;
|
|
dispatch["LSTM"] = &ONNXImporter2::parseLSTM;
|
|
dispatch["GRU"] = &ONNXImporter2::parseGRU;
|
|
dispatch["ImageScaler"] = &ONNXImporter2::parseImageScaler;
|
|
dispatch["Clip"] = &ONNXImporter2::parseClip;
|
|
dispatch["LeakyRelu"] = &ONNXImporter2::parseLeakyRelu;
|
|
dispatch["Relu"] = &ONNXImporter2::parseRelu;
|
|
dispatch["Elu"] = &ONNXImporter2::parseElu;
|
|
dispatch["Tanh"] = &ONNXImporter2::parseTanh;
|
|
dispatch["Abs"] = &ONNXImporter2::parseAbs;
|
|
dispatch["PRelu"] = &ONNXImporter2::parsePRelu;
|
|
dispatch["NonZero"] = &ONNXImporter2::parseNonZero;
|
|
dispatch["LRN"] = &ONNXImporter2::parseLRN;
|
|
dispatch["InstanceNormalization"] = &ONNXImporter2::parseInstanceNormalization;
|
|
dispatch["BatchNormalization"] = &ONNXImporter2::parseBatchNormalization;
|
|
dispatch["Gemm"] = &ONNXImporter2::parseGemm;
|
|
dispatch["MatMul"] = &ONNXImporter2::parseMatMul;
|
|
dispatch["Conv"] = &ONNXImporter2::parseConv;
|
|
dispatch["ConvTranspose"] = &ONNXImporter2::parseConvTranspose;
|
|
dispatch["Transpose"] = &ONNXImporter2::parseTranspose;
|
|
dispatch["Squeeze"] = &ONNXImporter2::parseSqueeze;
|
|
dispatch["Flatten"] = &ONNXImporter2::parseFlatten;
|
|
dispatch["Unsqueeze"] = &ONNXImporter2::parseUnsqueeze;
|
|
dispatch["Expand"] = &ONNXImporter2::parseExpand;
|
|
dispatch["Reshape"] = &ONNXImporter2::parseReshape;
|
|
dispatch["Pad"] = &ONNXImporter2::parsePad;
|
|
dispatch["Shape"] = &ONNXImporter2::parseShape;
|
|
dispatch["Cast"] = &ONNXImporter2::parseCast2;
|
|
dispatch["CastLike"] = &ONNXImporter2::parseCastLike;
|
|
dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter2::parseConstantOfShape;
|
|
dispatch["Gather"] = &ONNXImporter2::parseGather;
|
|
dispatch["GatherElements"] = &ONNXImporter2::parseGatherElements;
|
|
dispatch["Concat"] = &ONNXImporter2::parseConcat;
|
|
dispatch["If"] = &ONNXImporter2::parseIf;
|
|
dispatch["Resize"] = &ONNXImporter2::parseResize2;
|
|
dispatch["Size"] = &ONNXImporter2::parseSize;
|
|
dispatch["Unique"] = &ONNXImporter2::parseUnique;
|
|
dispatch["Trilu"] = &ONNXImporter2::parseTrilu;
|
|
dispatch["IsNaN"] = &ONNXImporter2::parseIsNaN;
|
|
dispatch["IsInf"] = &ONNXImporter2::parseIsInf;
|
|
dispatch["Det"] = &ONNXImporter2::parseDet;
|
|
dispatch["GridSample"] = &ONNXImporter2::parseGridSample;
|
|
dispatch["Upsample"] = &ONNXImporter2::parseUpsample;
|
|
dispatch["BitShift"] = &ONNXImporter2::parseBitShift;
|
|
dispatch["BitwiseAnd"] = &ONNXImporter2::parseBitwise;
|
|
dispatch["BitwiseOr"] = &ONNXImporter2::parseBitwise;
|
|
dispatch["BitwiseXor"] = &ONNXImporter2::parseBitwise;
|
|
dispatch["BitwiseNot"] = &ONNXImporter2::parseBitwiseNot;
|
|
dispatch["NonMaxSuprression"] = &ONNXImporter2::parseNonMaxSuprression;
|
|
dispatch["SoftMax"] = dispatch["Softmax"] = dispatch["LogSoftmax"] = &ONNXImporter2::parseSoftMax;
|
|
dispatch["DetectionOutput"] = &ONNXImporter2::parseDetectionOutput;
|
|
dispatch["CumSum"] = &ONNXImporter2::parseCumSum;
|
|
dispatch["SpaceToDepth"] = dispatch["DepthToSpace"] = &ONNXImporter2::parseDepthSpaceOps;
|
|
dispatch["ScatterElements"] = dispatch["Scatter"] = dispatch["ScatterND"] = &ONNXImporter2::parseScatter;
|
|
dispatch["Tile"] = &ONNXImporter2::parseTile;
|
|
dispatch["LayerNormalization"] = &ONNXImporter2::parseLayerNorm;
|
|
dispatch["GroupNormalization"] = &ONNXImporter2::parseInstanceNormalization;
|
|
|
|
dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] =
|
|
dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = dispatch["GreaterOrEqual"] =
|
|
dispatch["LessOrEqual"] = dispatch["Mod"] = dispatch["And"] = dispatch["Or"] = dispatch["Xor"] = &ONNXImporter2::parseElementWise;
|
|
|
|
dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = dispatch["Mean"] = &ONNXImporter2::parseElementWise;
|
|
dispatch["Where"] = &ONNXImporter2::parseElementWise;
|
|
dispatch["Range"] = &ONNXImporter2::parseRange;
|
|
dispatch["Einsum"] = &ONNXImporter2::parseEinsum;
|
|
dispatch["TopK"] = &ONNXImporter2::parseTopK2;
|
|
|
|
std::vector<std::string> simpleLayers {
|
|
"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
|
|
"Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
|
|
"Identity", "Log", "Not", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh",
|
|
"Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu", "Gelu",
|
|
"GeluApproximation"
|
|
};
|
|
for (const auto& name : simpleLayers)
|
|
{
|
|
dispatch[name] = &ONNXImporter2::parseSimpleLayers;
|
|
}
|
|
|
|
// BUG: https://github.com/opencv/opencv/issues/26310
|
|
// ai.onnx: opset 10+
|
|
dispatch["DequantizeLinear"] = &ONNXImporter2::parseDequantizeLinear;
|
|
dispatch["QuantizeLinear"] = &ONNXImporter2::parseQuantizeLinear;
|
|
//dispatch["QLinearConv"] = &ONNXImporter2::parseQConv;
|
|
//dispatch["QLinearMatMul"] = &ONNXImporter2::parseQMatMul;
|
|
|
|
// com.microsft: This operator is added for compatibility via onnx graph simplifier.
|
|
// Opset domain cannot be modified from onnx_graph_simplifier.cpp so this
|
|
// operator cannot be parsed if only added in buildDispatchMap_COM_MICROSOFT
|
|
dispatch["Attention"] = &ONNXImporter2::parseAttention;
|
|
|
|
domain_dispatch_map[str_domain_ai_onnx] = dispatch;
|
|
}
|
|
|
|
// Domain: com.microsoft
|
|
// URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
|
|
void ONNXImporter2::buildDispatchMap_COM_MICROSOFT(int opset_version)
|
|
{
|
|
CV_UNUSED(opset_version);
|
|
DispatchMap dispatch;
|
|
|
|
// BUG: https://github.com/opencv/opencv/issues/26310
|
|
//dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter2::parseQEltwise;
|
|
//dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter2::parseQAvgPool;
|
|
//dispatch["QLinearLeakyRelu"] = &ONNXImporter2::parseQLeakyRelu;
|
|
//dispatch["QLinearSigmoid"] = &ONNXImporter2::parseQSigmoid;
|
|
//dispatch["QLinearConcat"] = &ONNXImporter2::parseQConcat;
|
|
//dispatch["QGemm"] = &ONNXImporter2::parseQGemm;
|
|
//dispatch["QLinearSoftmax"] = &ONNXImporter2::parseQSoftmax;
|
|
dispatch["Attention"] = &ONNXImporter2::parseAttention;
|
|
|
|
domain_dispatch_map["com.microsoft"] = dispatch;
|
|
}
|
|
|
|
|
|
Net readNetFromONNX2(const String& onnxFile)
|
|
{
|
|
ONNXImporter2 importer;
|
|
Net net = importer.parseFile(onnxFile.c_str());
|
|
if (net.getMainGraph()) {
|
|
net.getImpl()->modelFileName = onnxFile;
|
|
}
|
|
return net;
|
|
}
|
|
|
|
Net readNetFromONNX2(const char* buffer, size_t size)
|
|
{
|
|
ONNXImporter2 importer;
|
|
return importer.parseBuffer(buffer, size);
|
|
}
|
|
|
|
Net readNetFromONNX2(const std::vector<uchar>& buffer)
|
|
{
|
|
ONNXImporter2 importer;
|
|
return importer.parseBuffer(buffer.data(), buffer.size());
|
|
}
|
|
|
|
#else // HAVE_PROTOBUF
|
|
|
|
#define DNN_PROTOBUF_UNSUPPORTED() CV_Error(Error::StsError, "DNN/ONNX: Build OpenCV with Protobuf to import ONNX models")
|
|
|
|
Net readNetFromONNX2(const String&) {
|
|
DNN_PROTOBUF_UNSUPPORTED();
|
|
}
|
|
|
|
Net readNetFromONNX2(const char*, size_t) {
|
|
DNN_PROTOBUF_UNSUPPORTED();
|
|
}
|
|
|
|
Net readNetFromONNX2(const std::vector<uchar>&) {
|
|
DNN_PROTOBUF_UNSUPPORTED();
|
|
}
|
|
|
|
#endif // HAVE_PROTOBUF
|
|
|
|
CV__DNN_INLINE_NS_END
|
|
}} // namespace
|