mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-06 12:19:50 +01:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
09799402f9
|
|
@ -101,7 +101,7 @@ TEST_F(fisheyeTest, projectPoints)
|
|||
EXPECT_MAT_NEAR(distorted0, distorted2, 1e-10);
|
||||
}
|
||||
|
||||
TEST_F(fisheyeTest, DISABLED_undistortImage)
|
||||
TEST_F(fisheyeTest, undistortImage)
|
||||
{
|
||||
cv::Matx33d theK = this->K;
|
||||
cv::Mat theD = cv::Mat(this->D);
|
||||
|
|
|
|||
|
|
@ -229,6 +229,10 @@ namespace cv {
|
|||
{
|
||||
activation_param.type = "Swish";
|
||||
}
|
||||
else if (type == "mish")
|
||||
{
|
||||
activation_param.type = "Mish";
|
||||
}
|
||||
else if (type == "logistic")
|
||||
{
|
||||
activation_param.type = "Sigmoid";
|
||||
|
|
@ -436,7 +440,7 @@ namespace cv {
|
|||
fused_layer_names.push_back(last_layer);
|
||||
}
|
||||
|
||||
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold)
|
||||
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y)
|
||||
{
|
||||
cv::dnn::LayerParams region_param;
|
||||
region_param.name = "Region-name";
|
||||
|
|
@ -449,6 +453,7 @@ namespace cv {
|
|||
region_param.set<bool>("logistic", true);
|
||||
region_param.set<float>("thresh", thresh);
|
||||
region_param.set<float>("nms_threshold", nms_threshold);
|
||||
region_param.set<float>("scale_x_y", scale_x_y);
|
||||
|
||||
std::vector<float> usedAnchors(numAnchors * 2);
|
||||
for (int i = 0; i < numAnchors; ++i)
|
||||
|
|
@ -786,6 +791,7 @@ namespace cv {
|
|||
int num_of_anchors = getParam<int>(layer_params, "num", -1);
|
||||
float thresh = getParam<float>(layer_params, "thresh", 0.2);
|
||||
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.4);
|
||||
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
|
||||
|
||||
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
|
||||
CV_Assert(!anchors_values.empty());
|
||||
|
|
@ -798,7 +804,7 @@ namespace cv {
|
|||
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
|
||||
|
||||
setParams.setPermute(false);
|
||||
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold);
|
||||
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y);
|
||||
}
|
||||
else {
|
||||
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
|
||||
|
|
@ -813,6 +819,10 @@ namespace cv {
|
|||
{
|
||||
setParams.setActivation("swish");
|
||||
}
|
||||
else if (activation == "mish")
|
||||
{
|
||||
setParams.setActivation("mish");
|
||||
}
|
||||
else if (activation == "logistic")
|
||||
{
|
||||
setParams.setActivation("logistic");
|
||||
|
|
@ -935,8 +945,8 @@ namespace cv {
|
|||
}
|
||||
|
||||
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
|
||||
if(activation == "leaky" || activation == "swish" || activation == "logistic")
|
||||
++cv_layers_counter; // For ReLU, Swish, Sigmoid
|
||||
if(activation == "leaky" || activation == "swish" || activation == "mish" || activation == "logistic")
|
||||
++cv_layers_counter; // For ReLU, Swish, Mish, Sigmoid
|
||||
|
||||
if(!darknet_layers_counter)
|
||||
tensor_shape.resize(1);
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer
|
|||
{
|
||||
public:
|
||||
int coords, classes, anchors, classfix;
|
||||
float thresh, nmsThreshold;
|
||||
float thresh, nmsThreshold, scale_x_y;
|
||||
bool useSoftmax, useLogistic;
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat blob_umat;
|
||||
|
|
@ -88,6 +88,7 @@ public:
|
|||
useSoftmax = params.get<bool>("softmax", false);
|
||||
useLogistic = params.get<bool>("logistic", false);
|
||||
nmsThreshold = params.get<float>("nms_threshold", 0.4);
|
||||
scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
|
||||
|
||||
CV_Assert(nmsThreshold >= 0.);
|
||||
CV_Assert(coords == 4);
|
||||
|
|
@ -302,8 +303,10 @@ public:
|
|||
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||
int box_index = index_sample_offset + index * cell_size;
|
||||
|
||||
dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
|
||||
dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
|
||||
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
|
||||
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
|
||||
dstData[box_index + 0] = (x + x_tmp) / cols;
|
||||
dstData[box_index + 1] = (y + y_tmp) / rows;
|
||||
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
|
||||
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
|
||||
|
||||
|
|
@ -471,6 +474,8 @@ public:
|
|||
auto shape_3d = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{boxes_shape.size()}, boxes_shape.data());
|
||||
|
||||
ngraph::Shape box_broad_shape{1, (size_t)anchors, (size_t)h, (size_t)w};
|
||||
auto scale_x_y_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &scale_x_y);
|
||||
auto shift_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{0.5});
|
||||
|
||||
std::shared_ptr<ngraph::Node> box_x;
|
||||
{
|
||||
|
|
@ -478,6 +483,9 @@ public:
|
|||
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, cols});
|
||||
box_x = std::make_shared<ngraph::op::v1::StridedSlice>(input2d, lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
|
||||
box_x = std::make_shared<ngraph::op::Sigmoid>(box_x);
|
||||
box_x = std::make_shared<ngraph::op::v1::Subtract>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_x = std::make_shared<ngraph::op::v1::Multiply>(box_x, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_x = std::make_shared<ngraph::op::v1::Add>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, shape_3d, true);
|
||||
|
||||
std::vector<float> x_indices(w * h * anchors);
|
||||
|
|
@ -504,6 +512,9 @@ public:
|
|||
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{2, cols});
|
||||
box_y = std::make_shared<ngraph::op::v1::StridedSlice>(input2d, lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
|
||||
box_y = std::make_shared<ngraph::op::Sigmoid>(box_y);
|
||||
box_y = std::make_shared<ngraph::op::v1::Subtract>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_y = std::make_shared<ngraph::op::v1::Multiply>(box_y, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_y = std::make_shared<ngraph::op::v1::Add>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, shape_3d, true);
|
||||
|
||||
std::vector<float> y_indices(h * anchors);
|
||||
|
|
|
|||
|
|
@ -172,18 +172,19 @@ public:
|
|||
CV_Assert(inputs.size() == 1);
|
||||
const MatSize& inpShape = inputs[0].size;
|
||||
|
||||
finalSliceRanges = sliceRanges;
|
||||
if (sliceRanges.empty())
|
||||
{
|
||||
// Divide input blob on equal parts by axis.
|
||||
int outAxisSize = inpShape[axis] / outputs.size();
|
||||
sliceRanges.resize(outputs.size(),
|
||||
std::vector<Range>(axis + 1, Range::all()));
|
||||
finalSliceRanges.resize(outputs.size(),
|
||||
std::vector<Range>(axis + 1, Range::all()));
|
||||
int prevSlice = 0;
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
sliceRanges[i][axis].start = prevSlice;
|
||||
sliceRanges[i][axis].end = sliceRanges[i][axis].start + outAxisSize;
|
||||
prevSlice = sliceRanges[i][axis].end;
|
||||
finalSliceRanges[i][axis].start = prevSlice;
|
||||
finalSliceRanges[i][axis].end = finalSliceRanges[i][axis].start + outAxisSize;
|
||||
prevSlice = finalSliceRanges[i][axis].end;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -191,16 +192,16 @@ public:
|
|||
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
CV_Assert(sliceRanges[i].size() <= inpShape.dims());
|
||||
CV_Assert(finalSliceRanges[i].size() <= inpShape.dims());
|
||||
// Fill the rest of ranges.
|
||||
for (int j = sliceRanges[i].size(); j < inpShape.dims(); ++j)
|
||||
for (int j = finalSliceRanges[i].size(); j < inpShape.dims(); ++j)
|
||||
{
|
||||
sliceRanges[i].push_back(Range::all());
|
||||
finalSliceRanges[i].push_back(Range::all());
|
||||
}
|
||||
// Clamp.
|
||||
for (int j = 0; j < sliceRanges[i].size(); ++j)
|
||||
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
|
||||
{
|
||||
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
|
||||
finalSliceRanges[i][j] = clamp(finalSliceRanges[i][j], inpShape[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -241,8 +242,8 @@ public:
|
|||
kernel.set(idx++, (int)(rows * cols));
|
||||
kernel.set(idx++, (int)inpMat.size[3]);
|
||||
kernel.set(idx++, (int)cols);
|
||||
kernel.set(idx++, (int)sliceRanges[i][2].start);
|
||||
kernel.set(idx++, (int)sliceRanges[i][3].start);
|
||||
kernel.set(idx++, (int)finalSliceRanges[i][2].start);
|
||||
kernel.set(idx++, (int)finalSliceRanges[i][3].start);
|
||||
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
|
||||
bool ret = kernel.run(1, global, local, false);
|
||||
if (!ret)
|
||||
|
|
@ -266,10 +267,10 @@ public:
|
|||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
const Mat& inpMat = inputs[0];
|
||||
CV_Assert(outputs.size() == sliceRanges.size());
|
||||
CV_Assert(outputs.size() == finalSliceRanges.size());
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
inpMat(sliceRanges[i]).copyTo(outputs[i]);
|
||||
inpMat(finalSliceRanges[i]).copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -278,11 +279,11 @@ public:
|
|||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(sliceRanges.size() == 1, inputs.size() <= 2);
|
||||
CV_Assert_N(finalSliceRanges.size() == 1, inputs.size() <= 2);
|
||||
|
||||
std::vector<size_t> axes, offsets, dims;
|
||||
int from, to, step;
|
||||
int numDims = sliceRanges[0].size();
|
||||
int numDims = finalSliceRanges[0].size();
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD)
|
||||
{
|
||||
from = axis;
|
||||
|
|
@ -298,8 +299,8 @@ public:
|
|||
for (int i = from; i != to; i += step)
|
||||
{
|
||||
axes.push_back(i);
|
||||
offsets.push_back(sliceRanges[0][i].start);
|
||||
dims.push_back(sliceRanges[0][i].size());
|
||||
offsets.push_back(finalSliceRanges[0][i].start);
|
||||
dims.push_back(finalSliceRanges[0][i].size());
|
||||
}
|
||||
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
|
|
@ -315,7 +316,7 @@ public:
|
|||
{
|
||||
std::vector<size_t> outShape(numDims);
|
||||
for (int i = 0; i < numDims; ++i)
|
||||
outShape[i] = sliceRanges[0][i].size();
|
||||
outShape[i] = finalSliceRanges[0][i].size();
|
||||
|
||||
ieLayer.getInputPorts()[1].setParameter("type", "weights");
|
||||
|
||||
|
|
@ -338,13 +339,13 @@ public:
|
|||
{
|
||||
CV_Assert_N(nodes.size() <= 2);
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
CV_Assert(sliceRanges[0].size() == ieInpNode->get_shape().size());
|
||||
CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size());
|
||||
|
||||
std::vector<int64_t> offsets, dims;
|
||||
for (int i = 0; i < sliceRanges[0].size(); ++i)
|
||||
for (int i = 0; i < finalSliceRanges[0].size(); ++i)
|
||||
{
|
||||
offsets.push_back(sliceRanges[0][i].start);
|
||||
dims.push_back(sliceRanges[0][i].end);
|
||||
offsets.push_back(finalSliceRanges[0][i].start);
|
||||
dims.push_back(finalSliceRanges[0][i].end);
|
||||
}
|
||||
|
||||
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
|
|
@ -384,6 +385,10 @@ public:
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
protected:
|
||||
// The actual non-negative values determined from @p sliceRanges depends on input size.
|
||||
std::vector<std::vector<Range> > finalSliceRanges;
|
||||
};
|
||||
|
||||
class CropLayerImpl CV_FINAL : public SliceLayerImpl
|
||||
|
|
@ -447,18 +452,18 @@ public:
|
|||
offset_final[i] = offset[i - start_axis];
|
||||
}
|
||||
|
||||
sliceRanges.resize(1);
|
||||
sliceRanges[0].resize(dims);
|
||||
finalSliceRanges.resize(1);
|
||||
finalSliceRanges[0].resize(dims);
|
||||
for (int i = 0; i < start_axis; i++)
|
||||
{
|
||||
sliceRanges[0][i] = Range(0, inpBlob.size[i]);
|
||||
finalSliceRanges[0][i] = Range(0, inpBlob.size[i]);
|
||||
}
|
||||
for (int i = start_axis; i < dims; i++)
|
||||
{
|
||||
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
|
||||
CV_Error(Error::StsBadArg, "invalid crop parameters or blob sizes");
|
||||
|
||||
sliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
|
||||
finalSliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -549,6 +549,11 @@ TEST_P(Test_Darknet_layers, upsample)
|
|||
testDarknetLayer("upsample");
|
||||
}
|
||||
|
||||
TEST_P(Test_Darknet_layers, mish)
|
||||
{
|
||||
testDarknetLayer("mish", true);
|
||||
}
|
||||
|
||||
TEST_P(Test_Darknet_layers, avgpool_softmax)
|
||||
{
|
||||
testDarknetLayer("avgpool_softmax");
|
||||
|
|
|
|||
|
|
@ -1791,4 +1791,38 @@ TEST_P(Layer_Test_Resize, change_input)
|
|||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Resize, dnnBackendsAndTargets());
|
||||
|
||||
typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_Slice;
|
||||
TEST_P(Layer_Test_Slice, variable_input_shape)
|
||||
{
|
||||
int backendId = get<0>(GetParam());
|
||||
int targetId = get<1>(GetParam());
|
||||
|
||||
int begin[] = {0, 0, 0, 0};
|
||||
int end[] = {-1, -1, -1, -1};
|
||||
|
||||
Net net;
|
||||
LayerParams lp;
|
||||
lp.type = "Slice";
|
||||
lp.name = "testLayer";
|
||||
lp.set("begin", DictValue::arrayInt<int*>(&begin[0], 4));
|
||||
lp.set("end", DictValue::arrayInt<int*>(&end[0], 4));
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
Mat inp(4 + i, 5 + i, CV_8UC1);
|
||||
randu(inp, 0, 255);
|
||||
inp = blobFromImage(inp);
|
||||
|
||||
net.setInput(inp);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(out, inp);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets());
|
||||
|
||||
}} // namespace
|
||||
|
|
|
|||
|
|
@ -559,7 +559,7 @@ public:
|
|||
|
||||
TEST_P(Test_ONNX_nets, Alexnet)
|
||||
{
|
||||
#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL)
|
||||
#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32))
|
||||
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
|
||||
#else
|
||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||
|
|
@ -623,7 +623,7 @@ TEST_P(Test_ONNX_nets, Googlenet)
|
|||
|
||||
TEST_P(Test_ONNX_nets, CaffeNet)
|
||||
{
|
||||
#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL)
|
||||
#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32))
|
||||
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
|
||||
#else
|
||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||
|
|
@ -639,7 +639,7 @@ TEST_P(Test_ONNX_nets, CaffeNet)
|
|||
|
||||
TEST_P(Test_ONNX_nets, RCNN_ILSVRC13)
|
||||
{
|
||||
#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL)
|
||||
#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32))
|
||||
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
|
||||
#else
|
||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||
|
|
|
|||
|
|
@ -55,19 +55,19 @@
|
|||
|
||||
namespace
|
||||
{
|
||||
static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
static void calcScharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
{
|
||||
using namespace cv;
|
||||
using cv::detail::deriv_type;
|
||||
int rows = src.rows, cols = src.cols, cn = src.channels(), depth = src.depth();
|
||||
CV_Assert(depth == CV_8U);
|
||||
dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
|
||||
parallel_for_(Range(0, rows), cv::detail::SharrDerivInvoker(src, dst), cv::getNumThreads());
|
||||
parallel_for_(Range(0, rows), cv::detail::ScharrDerivInvoker(src, dst), cv::getNumThreads());
|
||||
}
|
||||
|
||||
}//namespace
|
||||
|
||||
void cv::detail::SharrDerivInvoker::operator()(const Range& range) const
|
||||
void cv::detail::ScharrDerivInvoker::operator()(const Range& range) const
|
||||
{
|
||||
using cv::detail::deriv_type;
|
||||
int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn;
|
||||
|
|
@ -801,7 +801,7 @@ int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Si
|
|||
deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);
|
||||
|
||||
Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
|
||||
calcSharrDeriv(thisLevel, derivI);
|
||||
calcScharrDeriv(thisLevel, derivI);
|
||||
|
||||
if(derivBorder != BORDER_TRANSPARENT)
|
||||
copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
|
||||
|
|
@ -1382,7 +1382,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
|
|||
Mat _derivI( imgSize.height + winSize.height*2,
|
||||
imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() );
|
||||
derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
|
||||
calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
|
||||
calcScharrDeriv(prevPyr[level * lvlStep1], derivI);
|
||||
copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
|
||||
}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@ namespace detail
|
|||
|
||||
typedef short deriv_type;
|
||||
|
||||
struct SharrDerivInvoker : ParallelLoopBody
|
||||
struct ScharrDerivInvoker : ParallelLoopBody
|
||||
{
|
||||
SharrDerivInvoker(const Mat& _src, const Mat& _dst)
|
||||
ScharrDerivInvoker(const Mat& _src, const Mat& _dst)
|
||||
: src(_src), dst(_dst)
|
||||
{ }
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,20 @@
|
|||
/*
|
||||
Text detection model: https://github.com/argman/EAST
|
||||
Download link: https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1
|
||||
|
||||
Text recognition model taken from here: https://github.com/meijieru/crnn.pytorch
|
||||
How to convert from pb to onnx:
|
||||
Using classes from here: https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py
|
||||
|
||||
import torch
|
||||
import models.crnn as crnn
|
||||
|
||||
model = CRNN(32, 1, 37, 256)
|
||||
model.load_state_dict(torch.load('crnn.pth'))
|
||||
dummy_input = torch.randn(1, 1, 32, 100)
|
||||
torch.onnx.export(model, dummy_input, "crnn.onnx", verbose=True)
|
||||
*/
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
|
@ -8,21 +25,26 @@ using namespace cv::dnn;
|
|||
const char* keys =
|
||||
"{ help h | | Print help message. }"
|
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
|
||||
"{ model m | | Path to a binary .pb file contains trained network.}"
|
||||
"{ model m | | Path to a binary .pb file contains trained detector network.}"
|
||||
"{ ocr | | Path to a binary .pb or .onnx file contains trained recognition network.}"
|
||||
"{ width | 320 | Preprocess input image by resizing to a specific width. It should be multiple by 32. }"
|
||||
"{ height | 320 | Preprocess input image by resizing to a specific height. It should be multiple by 32. }"
|
||||
"{ thr | 0.5 | Confidence threshold. }"
|
||||
"{ nms | 0.4 | Non-maximum suppression threshold. }";
|
||||
|
||||
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
|
||||
std::vector<RotatedRect>& detections, std::vector<float>& confidences);
|
||||
void decodeBoundingBoxes(const Mat& scores, const Mat& geometry, float scoreThresh,
|
||||
std::vector<RotatedRect>& detections, std::vector<float>& confidences);
|
||||
|
||||
void fourPointsTransform(const Mat& frame, Point2f vertices[4], Mat& result);
|
||||
|
||||
void decodeText(const Mat& scores, std::string& text);
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// Parse command line arguments.
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
parser.about("Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
|
||||
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
|
||||
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
|
||||
if (argc == 1 || parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
|
|
@ -33,7 +55,8 @@ int main(int argc, char** argv)
|
|||
float nmsThreshold = parser.get<float>("nms");
|
||||
int inpWidth = parser.get<int>("width");
|
||||
int inpHeight = parser.get<int>("height");
|
||||
String model = parser.get<String>("model");
|
||||
String modelDecoder = parser.get<String>("model");
|
||||
String modelRecognition = parser.get<String>("ocr");
|
||||
|
||||
if (!parser.check())
|
||||
{
|
||||
|
|
@ -41,17 +64,19 @@ int main(int argc, char** argv)
|
|||
return 1;
|
||||
}
|
||||
|
||||
CV_Assert(!model.empty());
|
||||
CV_Assert(!modelDecoder.empty());
|
||||
|
||||
// Load network.
|
||||
Net net = readNet(model);
|
||||
// Load networks.
|
||||
Net detector = readNet(modelDecoder);
|
||||
Net recognizer;
|
||||
|
||||
if (!modelRecognition.empty())
|
||||
recognizer = readNet(modelRecognition);
|
||||
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap;
|
||||
if (parser.has("input"))
|
||||
cap.open(parser.get<String>("input"));
|
||||
else
|
||||
cap.open(0);
|
||||
bool openSuccess = parser.has("input") ? cap.open(parser.get<String>("input")) : cap.open(0);
|
||||
CV_Assert(openSuccess);
|
||||
|
||||
static const std::string kWinName = "EAST: An Efficient and Accurate Scene Text Detector";
|
||||
namedWindow(kWinName, WINDOW_NORMAL);
|
||||
|
|
@ -62,6 +87,7 @@ int main(int argc, char** argv)
|
|||
outNames[1] = "feature_fusion/concat_3";
|
||||
|
||||
Mat frame, blob;
|
||||
TickMeter tickMeter;
|
||||
while (waitKey(1) < 0)
|
||||
{
|
||||
cap >> frame;
|
||||
|
|
@ -72,8 +98,10 @@ int main(int argc, char** argv)
|
|||
}
|
||||
|
||||
blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false);
|
||||
net.setInput(blob);
|
||||
net.forward(outs, outNames);
|
||||
detector.setInput(blob);
|
||||
tickMeter.start();
|
||||
detector.forward(outs, outNames);
|
||||
tickMeter.stop();
|
||||
|
||||
Mat scores = outs[0];
|
||||
Mat geometry = outs[1];
|
||||
|
|
@ -81,43 +109,64 @@ int main(int argc, char** argv)
|
|||
// Decode predicted bounding boxes.
|
||||
std::vector<RotatedRect> boxes;
|
||||
std::vector<float> confidences;
|
||||
decode(scores, geometry, confThreshold, boxes, confidences);
|
||||
decodeBoundingBoxes(scores, geometry, confThreshold, boxes, confidences);
|
||||
|
||||
// Apply non-maximum suppression procedure.
|
||||
std::vector<int> indices;
|
||||
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
|
||||
|
||||
// Render detections.
|
||||
Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight);
|
||||
|
||||
// Render text.
|
||||
for (size_t i = 0; i < indices.size(); ++i)
|
||||
{
|
||||
RotatedRect& box = boxes[indices[i]];
|
||||
|
||||
Point2f vertices[4];
|
||||
box.points(vertices);
|
||||
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
vertices[j].x *= ratio.x;
|
||||
vertices[j].y *= ratio.y;
|
||||
}
|
||||
|
||||
if (!modelRecognition.empty())
|
||||
{
|
||||
Mat cropped;
|
||||
fourPointsTransform(frame, vertices, cropped);
|
||||
|
||||
cvtColor(cropped, cropped, cv::COLOR_BGR2GRAY);
|
||||
|
||||
Mat blobCrop = blobFromImage(cropped, 1.0/127.5, Size(), Scalar::all(127.5));
|
||||
recognizer.setInput(blobCrop);
|
||||
|
||||
tickMeter.start();
|
||||
Mat result = recognizer.forward();
|
||||
tickMeter.stop();
|
||||
|
||||
std::string wordRecognized = "";
|
||||
decodeText(result, wordRecognized);
|
||||
putText(frame, wordRecognized, vertices[1], FONT_HERSHEY_SIMPLEX, 1.5, Scalar(0, 0, 255));
|
||||
}
|
||||
|
||||
for (int j = 0; j < 4; ++j)
|
||||
line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1);
|
||||
}
|
||||
|
||||
// Put efficiency information.
|
||||
std::vector<double> layersTimes;
|
||||
double freq = getTickFrequency() / 1000;
|
||||
double t = net.getPerfProfile(layersTimes) / freq;
|
||||
std::string label = format("Inference time: %.2f ms", t);
|
||||
std::string label = format("Inference time: %.2f ms", tickMeter.getTimeMilli());
|
||||
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
|
||||
imshow(kWinName, frame);
|
||||
|
||||
tickMeter.reset();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
|
||||
std::vector<RotatedRect>& detections, std::vector<float>& confidences)
|
||||
void decodeBoundingBoxes(const Mat& scores, const Mat& geometry, float scoreThresh,
|
||||
std::vector<RotatedRect>& detections, std::vector<float>& confidences)
|
||||
{
|
||||
detections.clear();
|
||||
CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); CV_Assert(scores.size[0] == 1);
|
||||
|
|
@ -159,3 +208,51 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fourPointsTransform(const Mat& frame, Point2f vertices[4], Mat& result)
|
||||
{
|
||||
const Size outputSize = Size(100, 32);
|
||||
|
||||
Point2f targetVertices[4] = {Point(0, outputSize.height - 1),
|
||||
Point(0, 0), Point(outputSize.width - 1, 0),
|
||||
Point(outputSize.width - 1, outputSize.height - 1),
|
||||
};
|
||||
Mat rotationMatrix = getPerspectiveTransform(vertices, targetVertices);
|
||||
|
||||
warpPerspective(frame, result, rotationMatrix, outputSize);
|
||||
}
|
||||
|
||||
void decodeText(const Mat& scores, std::string& text)
|
||||
{
|
||||
static const std::string alphabet = "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
Mat scoresMat = scores.reshape(1, scores.size[0]);
|
||||
|
||||
std::vector<char> elements;
|
||||
elements.reserve(scores.size[0]);
|
||||
|
||||
for (int rowIndex = 0; rowIndex < scoresMat.rows; ++rowIndex)
|
||||
{
|
||||
Point p;
|
||||
minMaxLoc(scoresMat.row(rowIndex), 0, 0, 0, &p);
|
||||
if (p.x > 0 && static_cast<size_t>(p.x) <= alphabet.size())
|
||||
{
|
||||
elements.push_back(alphabet[p.x - 1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
elements.push_back('-');
|
||||
}
|
||||
}
|
||||
|
||||
if (elements.size() > 0 && elements[0] != '-')
|
||||
text += elements[0];
|
||||
|
||||
for (size_t elementIndex = 1; elementIndex < elements.size(); ++elementIndex)
|
||||
{
|
||||
if (elementIndex > 0 && elements[elementIndex] != '-' &&
|
||||
elements[elementIndex - 1] != elements[elementIndex])
|
||||
{
|
||||
text += elements[elementIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user