diff --git a/3rdparty/carotene/CMakeLists.txt b/3rdparty/carotene/CMakeLists.txt index 528fcf62e1..4319815708 100644 --- a/3rdparty/carotene/CMakeLists.txt +++ b/3rdparty/carotene/CMakeLists.txt @@ -40,4 +40,5 @@ if(WITH_NEON) target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON") endif() -add_library(carotene STATIC EXCLUDE_FROM_ALL "$") +# we add dummy file to fix XCode build +add_library(carotene STATIC EXCLUDE_FROM_ALL "$" "${CAROTENE_SOURCE_DIR}/dummy.cpp") diff --git a/3rdparty/carotene/hal/CMakeLists.txt b/3rdparty/carotene/hal/CMakeLists.txt index 8ca7a7de32..592771c676 100644 --- a/3rdparty/carotene/hal/CMakeLists.txt +++ b/3rdparty/carotene/hal/CMakeLists.txt @@ -82,7 +82,8 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS ${carotene_defs}) # set_source_files_properties(impl.cpp $ COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000") endif() -add_library(tegra_hal STATIC $) +# we add dummy file to fix XCode build +add_library(tegra_hal STATIC $ "dummy.cpp") set_target_properties(tegra_hal PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH}) set(OPENCV_SRC_DIR "${CMAKE_SOURCE_DIR}") if(NOT BUILD_SHARED_LIBS) diff --git a/3rdparty/carotene/hal/dummy.cpp b/3rdparty/carotene/hal/dummy.cpp new file mode 100644 index 0000000000..7f10ff3e8c --- /dev/null +++ b/3rdparty/carotene/hal/dummy.cpp @@ -0,0 +1,2 @@ +// This file is needed for compilation on some platforms e.g. with XCode generator +// Related issue: https://gitlab.kitware.com/cmake/cmake/-/issues/17457 diff --git a/3rdparty/carotene/src/dummy.cpp b/3rdparty/carotene/src/dummy.cpp new file mode 100644 index 0000000000..7f10ff3e8c --- /dev/null +++ b/3rdparty/carotene/src/dummy.cpp @@ -0,0 +1,2 @@ +// This file is needed for compilation on some platforms e.g. with XCode generator +// Related issue: https://gitlab.kitware.com/cmake/cmake/-/issues/17457 diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 4d4fa24ccd..9b74c0e36a 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -88,7 +88,12 @@ if(CUDA_FOUND) message(STATUS "CUDA detected: " ${CUDA_VERSION}) - set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere") + OCV_OPTION(CUDA_ENABLE_DEPRECATED_GENERATION "Enable deprecated generations in the list" OFF) + set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere") + if(CUDA_ENABLE_DEPRECATED_GENERATION) + set(_generations "Fermi" "${_generations}") + set(_generations "Kepler" "${_generations}") + endif() set(_arch_fermi "2.0") set(_arch_kepler "3.0;3.5;3.7") set(_arch_maxwell "5.0;5.2") @@ -209,10 +214,6 @@ if(CUDA_FOUND) endif() endmacro() - macro(ocv_wipeout_deprecated _arch_bin_list) - string(REPLACE "2.1" "2.1(2.0)" ${_arch_bin_list} "${${_arch_bin_list}}") - endmacro() - set(__cuda_arch_ptx "") if(CUDA_GENERATION STREQUAL "Fermi") set(__cuda_arch_bin ${_arch_fermi}) @@ -275,7 +276,6 @@ if(CUDA_FOUND) ) endif() endif() - ocv_wipeout_deprecated(__cuda_arch_bin) set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") @@ -283,10 +283,14 @@ if(CUDA_FOUND) string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") - # Check if user specified 1.0 compute capability: we don't support it - if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " 1.0") - message(SEND_ERROR "CUDA: 1.0 compute capability is not supported - exclude it from ARCH/PTX list are re-run CMake") - endif() + # Check if user specified 1.0/2.1 compute capability: we don't support it + macro(ocv_wipeout_deprecated_cc target_cc) + if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " ${target_cc}") + message(SEND_ERROR "CUDA: ${target_cc} compute capability is not supported - exclude it from ARCH/PTX list and re-run CMake") + endif() + endmacro() + ocv_wipeout_deprecated_cc("1.0") + ocv_wipeout_deprecated_cc("2.1") # NVCC flags to be set set(NVCC_FLAGS_EXTRA "") diff --git a/doc/js_tutorials/js_assets/opencv_logo.jpg b/doc/js_tutorials/js_assets/opencv_logo.jpg index a2854e1e9e..c2bf3a1748 100644 Binary files a/doc/js_tutorials/js_assets/opencv_logo.jpg and b/doc/js_tutorials/js_assets/opencv_logo.jpg differ diff --git a/doc/opencv-logo-small.png b/doc/opencv-logo-small.png index 763ceb2b32..b7e76d27ba 100644 Binary files a/doc/opencv-logo-small.png and b/doc/opencv-logo-small.png differ diff --git a/doc/opencv-logo-white.png b/doc/opencv-logo-white.png index 3c7098459e..a683e3569f 100644 Binary files a/doc/opencv-logo-white.png and b/doc/opencv-logo-white.png differ diff --git a/doc/opencv-logo.png b/doc/opencv-logo.png index 76cc29f6dc..3ed6a233bb 100644 Binary files a/doc/opencv-logo.png and b/doc/opencv-logo.png differ diff --git a/doc/opencv-logo2.png b/doc/opencv-logo2.png index bc71a2ae50..6658e07b31 100644 Binary files a/doc/opencv-logo2.png and b/doc/opencv-logo2.png differ diff --git a/doc/opencv.bib b/doc/opencv.bib index bdfbc8cf1e..975630a18d 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -584,6 +584,16 @@ pages = {1033--1040}, publisher = {IEEE} } +@article{YM11, + author = {Yu, Guoshen and Morel, Jean-Michel}, + title = {ASIFT: An Algorithm for Fully Affine Invariant Comparison}, + year = {2011}, + pages = {11--38}, + journal = {Image Processing On Line}, + volume = {1}, + doi = {10.5201/ipol.2011.my-asift}, + url = {http://www.ipol.im/pub/algo/my_affine_sift/} +} @inproceedings{LCS11, author = {Leutenegger, Stefan and Chli, Margarita and Siegwart, Roland Yves}, title = {BRISK: Binary robust invariant scalable keypoints}, diff --git a/doc/opencv.ico b/doc/opencv.ico index 38f033f3b0..c4d2cfd471 100644 Binary files a/doc/opencv.ico and b/doc/opencv.ico differ diff --git a/doc/py_tutorials/py_setup/images/opencv_logo.jpg b/doc/py_tutorials/py_setup/images/opencv_logo.jpg index a2854e1e9e..c2bf3a1748 100644 Binary files a/doc/py_tutorials/py_setup/images/opencv_logo.jpg and b/doc/py_tutorials/py_setup/images/opencv_logo.jpg differ diff --git a/modules/calib3d/src/calibration_handeye.cpp b/modules/calib3d/src/calibration_handeye.cpp index 18561c77fe..37d4e89d78 100644 --- a/modules/calib3d/src/calibration_handeye.cpp +++ b/modules/calib3d/src/calibration_handeye.cpp @@ -712,7 +712,10 @@ void calibrateHandEye(InputArrayOfArrays R_gripper2base, InputArrayOfArrays t_gr { Mat m = Mat::eye(4, 4, CV_64FC1); Mat R = m(Rect(0, 0, 3, 3)); - R_gripper2base_[i].convertTo(R, CV_64F); + if(R_gripper2base_[i].size() == Size(3, 3)) + R_gripper2base_[i].convertTo(R, CV_64F); + else + Rodrigues(R_gripper2base_[i], R); Mat t = m(Rect(3, 0, 1, 3)); t_gripper2base_[i].convertTo(t, CV_64F); @@ -727,7 +730,10 @@ void calibrateHandEye(InputArrayOfArrays R_gripper2base, InputArrayOfArrays t_gr { Mat m = Mat::eye(4, 4, CV_64FC1); Mat R = m(Rect(0, 0, 3, 3)); - R_target2cam_[i].convertTo(R, CV_64F); + if(R_target2cam_[i].size() == Size(3, 3)) + R_target2cam_[i].convertTo(R, CV_64F); + else + Rodrigues(R_target2cam_[i], R); Mat t = m(Rect(3, 0, 1, 3)); t_target2cam_[i].convertTo(t, CV_64F); diff --git a/modules/calib3d/test/test_calibration_hand_eye.cpp b/modules/calib3d/test/test_calibration_hand_eye.cpp index d2cef969b3..848dcf07c2 100644 --- a/modules/calib3d/test/test_calibration_hand_eye.cpp +++ b/modules/calib3d/test/test_calibration_hand_eye.cpp @@ -317,7 +317,10 @@ void CV_CalibrateHandEyeTest::simulateData(RNG& rng, int nPoses, t_gripper2base_noise.at(2,0) += rng.gaussian(0.001); } - R_target2cam.push_back(T_target2cam(Rect(0, 0, 3, 3))); + // test rvec represenation + Mat rvec_target2cam; + cv::Rodrigues(T_target2cam(Rect(0, 0, 3, 3)), rvec_target2cam); + R_target2cam.push_back(rvec_target2cam); t_target2cam.push_back(T_target2cam(Rect(3, 0, 1, 3))); } } diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index ff9fa36232..adbe3727a4 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1614,7 +1614,9 @@ elements. CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0, double minVal = -DBL_MAX, double maxVal = DBL_MAX); -/** @brief converts NaN's to the given number +/** @brief converts NaNs to the given number +@param a input/output matrix (CV_32F type). +@param val value to convert the NaNs */ CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0); diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 32f59d1a3e..5d8fbc8b84 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -600,6 +600,14 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + /** + * @brief Detection output layer. + * + * The layer size is: @f$ (1 \times 1 \times N \times 7) @f$ + * where N is [keep_top_k] parameter multiplied by batch size. Each row is: + * [image_id, label, confidence, xmin, ymin, xmax, ymax] + * where image_id is the index of image input in the batch. + */ class CV_EXPORTS DetectionOutputLayer : public Layer { public: diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index f6504b96c7..bc0b413588 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -221,6 +221,10 @@ namespace cv { { cv::dnn::LayerParams activation_param; if (type == "relu") + { + activation_param.type = "ReLU"; + } + else if (type == "leaky") { activation_param.set("negative_slope", 0.1f); activation_param.type = "ReLU"; @@ -862,24 +866,8 @@ namespace cv { } std::string activation = getParam(layer_params, "activation", "linear"); - if (activation == "leaky") - { - setParams.setActivation("relu"); - } - else if (activation == "swish") - { - setParams.setActivation("swish"); - } - else if (activation == "mish") - { - setParams.setActivation("mish"); - } - else if (activation == "logistic") - { - setParams.setActivation("logistic"); - } - else if (activation != "linear") - CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); + if (activation != "linear") + setParams.setActivation(activation); net->out_channels_vec[layers_counter] = tensor_shape[0]; } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 2823ee1115..ae7501ea9f 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -114,18 +114,19 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - CV_Assert(inputs.size() > 0); + CV_Assert((inputs.size() > outputs.size() && blobs.empty()) || + (!inputs.empty() && (blobs.size() == 1 || blobs.size() == 2))); + MatSize weightShape = blobs.empty() ? inputs[1].size : blobs[0].size; - CV_Assert(blobs.size() == 1 || blobs.size() == 2); CV_Assert(inputs[0].dims == outputs[0].dims); - CV_Assert(blobs[0].dims == kernel_size.size() + 2); + CV_Assert(weightShape.dims() == kernel_size.size() + 2); for (int i = 0; i < kernel_size.size(); i++) { - CV_Assert(blobs[0].size[i + 2] == kernel_size[i]); + CV_Assert(weightShape[i + 2] == kernel_size[i]); } const Mat &input = inputs[0]; CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S)); - for (size_t i = 0; i < inputs.size(); i++) + for (size_t i = 0; i < outputs.size(); i++) { CV_Assert(inputs[i].type() == input.type()); CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); @@ -270,6 +271,7 @@ public: MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { + CV_Assert(!blobs.empty()); int dims = inpShape.size(); int inpD = dims == 5 ? inpShape[2] : 1; int inpH = inpShape[dims - 2]; @@ -296,6 +298,8 @@ public: { if (kernel_size.size() == 3) return preferableTarget == DNN_TARGET_CPU; + if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty()) + return false; return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height); } else @@ -305,7 +309,7 @@ public: return (preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV); else if (kernel_size.size() == 2) return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_HALIDE || + (backendId == DNN_BACKEND_HALIDE && !blobs.empty()) || (backendId == DNN_BACKEND_VKCOM && haveVulkan()); else return false; @@ -317,16 +321,16 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { - CV_Assert(blobs.size() != 0); - CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]); - CV_Assert(inputs.size() == (size_t)1); + CV_Assert(!blobs.empty() || inputs.size() > 1); + const int* weightShape = blobs.empty() ? &inputs[1][0] : blobs[0].size.p; + CV_Assert(!hasBias() || blobs[1].total() == (size_t)weightShape[0]); internals.clear(); CV_Assert(inputs.size() != 0); std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); - int outCn = blobs[0].size[0]; + int outCn = weightShape[0]; std::vector outShape; outShape.push_back(inputs[0][0]); outShape.push_back(outCn); @@ -342,10 +346,10 @@ public: getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape); } - int ngroups = inpCn / blobs[0].size[1]; - if (ngroups == 0 || ngroups * blobs[0].size[1] != inpCn) + int ngroups = inpCn / weightShape[1]; + if (ngroups == 0 || ngroups * weightShape[1] != inpCn) CV_Error(Error::StsError, format("Number of input channels should " - "be multiple of %d but got %d", blobs[0].size[1], inpCn)); + "be multiple of %d but got %d", weightShape[1], inpCn)); CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0); outputs.resize(1, outShape); @@ -357,15 +361,15 @@ public: { BaseConvolutionLayerImpl::finalize(inputs_arr, outputs_arr); - CV_Assert(!blobs.empty()); - const int outCn = blobs[0].size[0]; + std::vector inputs; + inputs_arr.getMatVector(inputs); // prepare weightsMat where each row is aligned and has enough zero padding on the right to // use vectorized (i.e. with intrinsics) loops without tail processing - Mat wm = blobs[0].reshape(1, outCn); + Mat wm = blobs.empty() ? inputs[1].reshape(1, numOutput) : blobs[0].reshape(1, numOutput); if( wm.step1() % VEC_ALIGN != 0 ) { int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); - Mat wm_buffer = Mat(outCn, newcols, wm.type()); + Mat wm_buffer = Mat(numOutput, newcols, wm.type()); Mat wm_padding = wm_buffer.colRange(wm.cols, newcols); wm_padding.setTo(Scalar::all(0.)); Mat wm_aligned = wm_buffer.colRange(0, wm.cols); @@ -373,18 +377,18 @@ public: wm = wm_aligned; } weightsMat = wm; - weightsMultipliers.assign(outCn, 1.0); + weightsMultipliers.assign(numOutput, 1.0); - Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat(); - biasvec.resize(outCn+2); + Mat biasMat = hasBias() ? blobs[1].reshape(1, numOutput) : Mat(); + biasvec.resize(numOutput+2); if( biasMat.empty() ) { - for(int i = 0; i < outCn; i++ ) + for(int i = 0; i < numOutput; i++ ) biasvec[i] = 0.f; } else { - for(int i = 0; i < outCn; i++ ) + for(int i = 0; i < numOutput; i++ ) biasvec[i] = biasMat.at(i); } #ifdef HAVE_OPENCL @@ -394,7 +398,7 @@ public: bool setActivation(const Ptr& layer) CV_OVERRIDE { - if (!activ.empty() && !layer.empty()) + if ((!activ.empty() && !layer.empty()) || blobs.empty()) return false; activ = layer; @@ -743,37 +747,48 @@ public: virtual Ptr initNgraph(const std::vector > &inputs, const std::vector >& nodes) CV_OVERRIDE { - CV_Assert_N(inputs.size() == 1, nodes.size() == 1); + CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1); auto& ieInpNode = nodes[0].dynamicCast()->node; std::vector dims = ieInpNode->get_shape(); CV_Assert(dims.size() == 4 || dims.size() == 5); + std::shared_ptr ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; const int inpCn = dims[1]; - const int outCn = blobs[0].size[0]; - const int inpGroupCn = blobs[0].size[1]; + const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1]; const int group = inpCn / inpGroupCn; - std::vector kernel_shape = getShape(blobs[0]); + std::vector kernel_shape; if (group != 1) { - kernel_shape[0] /= group; - kernel_shape.insert(kernel_shape.begin(), group); + kernel_shape.push_back(group); } + kernel_shape.push_back(numOutput / group); + kernel_shape.push_back(inpCn / group); + std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape)); - auto ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, blobs[0].data); - if (fusedWeights) + if (nodes.size() == 1) { - if (weightsMat.isContinuous()) + ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, blobs[0].data); + if (fusedWeights) { - ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, weightsMat.data); - } - else - { - Mat newWeights; - Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / outCn); - cvWeights.copyTo(newWeights); - ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, newWeights.data); + if (weightsMat.isContinuous()) + { + ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, weightsMat.data); + } + else + { + Mat newWeights; + Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / numOutput); + cvWeights.copyTo(newWeights); + ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, newWeights.data); + } } } + else + { + auto shape = std::make_shared(ngraph::element::i64, + ngraph::Shape{kernel_shape.size()}, kernel_shape.data()); + ieWeights = std::make_shared(ieWeights, shape, true); + } ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT; if (!padMode.empty()) @@ -798,11 +813,21 @@ public: pad_type); } - if (hasBias() || fusedBias) + if (hasBias() || fusedBias || nodes.size() == 3) { std::vector shape(conv_node->get_shape().size(), 1); - shape[1] = outCn; - auto bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), biasvec.data()); + shape[1] = conv_node->get_shape()[1]; + std::shared_ptr bias; + if (nodes.size() == 3) + { + auto bias_shape = std::make_shared(ngraph::element::i64, + ngraph::Shape{shape.size()}, shape.data()); + bias = std::make_shared(nodes[2].dynamicCast()->node, bias_shape, true); + } + else + { + bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), biasvec.data()); + } auto conv_bias = std::make_shared(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY); return Ptr(new InfEngineNgraphNode(conv_bias)); } @@ -1516,6 +1541,26 @@ public: for (int i = 0; i < inputs.size(); ++i) CV_Assert(inputs[i].u != outputs[0].u); + if (blobs.empty()) + { + size_t n = inputs.size() - 1; + umat_blobs.resize(n); + for (size_t i = 0; i < n; i++) + { + if (use_half) + { + Mat matFP32; + convertFp16(inputs[i + 1], matFP32); + matFP32.copyTo(umat_blobs[i]); + } + else + { + inputs[i + 1].copyTo(umat_blobs[i]); + } + } + inputs.resize(1); + } + if (umat_blobs.empty()) { size_t n = blobs.size(); @@ -1526,7 +1571,7 @@ public: } } - if (convolutionOp.empty()) + if (convolutionOp.empty() || blobs.empty()) { OCL4DNNConvConfig config; config.in_shape = shape(inputs[0]); @@ -1536,7 +1581,7 @@ public: config.stride = stride; config.dilation = dilation; config.group = inputs[0].size[1] / umat_blobs[0].size[1]; - config.bias_term = (hasBias()) ? true : false; + config.bias_term = umat_blobs.size() == 2; config.use_half = use_half; convolutionOp = Ptr >(new OCL4DNNConvSpatial(config)); @@ -1663,16 +1708,37 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); + int outCn = blobs.empty() ? inputs[1].size[0] : blobs[0].size[0]; + // Need to align non-const blobs + if (blobs.empty()) + { + Mat wm = inputs[1].reshape(1, outCn); + if( wm.step1() % VEC_ALIGN != 0 ) + { + wm.copyTo(weightsMat); + if (inputs.size() > 2) + { + Mat biasMat = inputs[2].reshape(1, outCn); + biasMat.col(0).copyTo(biasvec); + biasvec.resize(outCn + 2); + } + else + { + biasvec.resize(outCn + 2, 0); + } + } + } + /*printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3], kernel.width, kernel.height, pad.width, pad.height, stride.width, stride.height, dilation.width, dilation.height);*/ - CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0, + int inpGroupCn = blobs.empty() ? inputs[1].size[1] : blobs[0].size[1]; + CV_Assert_N(inputs.size() >= (size_t)1, inputs[0].size[1] % inpGroupCn == 0, outputs.size() == 1, inputs[0].data != outputs[0].data); - int ngroups = inputs[0].size[1]/blobs[0].size[1]; + int ngroups = inputs[0].size[1] / inpGroupCn; CV_Assert(outputs[0].size[1] % ngroups == 0); - int outCn = blobs[0].size[0]; reluslope.clear(); if( activ ) @@ -1810,11 +1876,11 @@ public: virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { - CV_Assert(inputs.size() == outputs.size()); + CV_Assert(inputs.size() == outputs.size() || inputs.size() == outputs.size() + blobs.size()); int64 flops = 0; int karea = std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies()); - for (int i = 0; i < inputs.size(); i++) + for (int i = 0; i < outputs.size(); i++) { flops += total(outputs[i])*(CV_BIG_INT(2)*karea*inputs[i][1] + 1); } diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index b17391f638..b9d30bbd62 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -587,7 +587,7 @@ public: } else { - std::vector data = {(size_t)ieInpNode->get_shape()[0], (size_t)blobs[0].size[1]}; + std::vector data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]}; auto new_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, data.data()); auto inp = std::make_shared(ieInpNode, new_shape, true); diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 943e347ff6..ffc8540a25 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -397,8 +397,9 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; + std::vector order(_order.begin(), _order.end()); auto tr_axes = std::make_shared(ngraph::element::i64, - ngraph::Shape({_order.size()}), _order.data()); + ngraph::Shape({order.size()}), order.data()); auto transpose = std::make_shared(ieInpNode, tr_axes); return Ptr(new InfEngineNgraphNode(transpose)); } diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 0522ff7cfa..6deabb5884 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -167,6 +167,10 @@ public: void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { +#ifdef HAVE_OPENCL + ocl_exec_cache.clear(); +#endif + std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); @@ -221,26 +225,33 @@ public: } #ifdef HAVE_OPENCL - bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + struct OpenCLExecInfo { - std::vector inputs; - std::vector outputs; + std::string kernel_name; + std::string build_opts; + size_t local_size[2]; + size_t global_size[2]; - inputs_.getUMatVector(inputs); - outputs_.getUMatVector(outputs); + OpenCLExecInfo() + { + local_size[0] = local_size[1] = 0; + global_size[0] = global_size[1] = 0; + } + }; + std::vector ocl_exec_cache; + + void ocl_prepare(const std::vector& inputs, const std::vector& outputs) + { + CV_TRACE_FUNCTION(); CV_Assert(outputs.size() == finalSliceRanges.size()); + ocl_exec_cache.resize(outputs.size()); const UMat& input = inputs[0]; - if (input.dims > 5) - { - CV_LOG_INFO(NULL, "DNN/OpenCL/Slice: implementation doesn't support dims=" << input.dims << ". Fallback to CPU"); - return false; - } + const int dims = input.dims; size_t WSZ = 128; - const int dims = input.dims; const int elemSize = (int)input.elemSize(); String opts0 = cv::format( "-DDIMS=%d -DELEMSIZE=%d", @@ -250,10 +261,11 @@ public: { opts0 += cv::format(" -DSRC_STEP_%d=%d", d, (int)input.step[dims - 1 - d]); } - String kname = cv::format("slice_%d", dims); for (size_t i = 0; i < outputs.size(); i++) { - UMat& output = outputs[i]; + OpenCLExecInfo& ocl = ocl_exec_cache[i]; + + const UMat& output = outputs[i]; const std::vector& range = finalSliceRanges[i]; String opts = opts0; @@ -269,6 +281,8 @@ public: CV_CheckEQ(range[d].size(), (int)output.size[d], ""); } + const size_t param_LIMIT_BLOCK_SIZE_PER_WG = WSZ * 64; + int block_dims = 0; size_t block_size = elemSize; for (int i = dims - 1; i >= 0; --i) @@ -277,12 +291,14 @@ public: break; block_size *= output.size[i]; block_dims++; + if (block_size >= param_LIMIT_BLOCK_SIZE_PER_WG) + break; } const size_t total = output.total() * elemSize; size_t num_blocks = total / block_size; - if ((num_blocks <= 8 && block_size >= WSZ * 4) || (block_size >= WSZ * 64)) + if ((num_blocks <= 8 && block_size >= WSZ * 4) || (block_size >= param_LIMIT_BLOCK_SIZE_PER_WG)) { // use 1D copy mode opts += cv::format(" -DUSE_COPY_1D=1"); @@ -352,23 +368,98 @@ public: opts += cv::format(" -DWSZ=%d", (int)WSZ); - size_t local[] = { WSZ, 1 }; - size_t global[] = { WSZ, num_blocks }; + std::ostringstream kernel_suffix; + kernel_suffix << dims << 'x' << elemSize << "_bsz" << block_size; + kernel_suffix << "__src_"; + for (int d = 0; d < dims; d++) + { + kernel_suffix << input.size[dims - 1 - d] << '_'; + } + kernel_suffix << '_'; + /*for (int d = 0; d < dims; d++) + { + kernel_suffix << input.step[dims - 1 - d] << '_'; + } + kernel_suffix << '_';*/ - ocl::Kernel kernel(kname.c_str(), ocl::dnn::slice_oclsrc, opts); + kernel_suffix << "dst_"; + for (int d = 0; d < dims; d++) + { + kernel_suffix << output.size[dims - 1 - d] << '_'; + } + /*kernel_suffix << '_'; + for (int d = 0; d < dims; d++) + { + kernel_suffix << output.step[dims - 1 - d] << '_'; + }*/ + kernel_suffix << "_slice_"; + for (int d = 0; d < dims; d++) + { + kernel_suffix << range[dims - 1 - d].start << '_'; + } + for (int d = 0; d < dims; d++) + { + kernel_suffix << '_' << range[dims - 1 - d].end; + } + + std::string kernel_suffix_str = kernel_suffix.str(); + opts += cv::format(" -DSLICE_KERNEL_SUFFIX=%s", kernel_suffix_str.c_str()); + + ocl.kernel_name = cv::format("slice_%s", kernel_suffix_str.c_str()); + ocl.build_opts = opts; + ocl.local_size[0] = WSZ; + ocl.local_size[1] = 1; + ocl.global_size[0] = WSZ; + ocl.global_size[1] = num_blocks; + } // for outputs.size() + } // ocl_prepare + + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + CV_TRACE_FUNCTION(); + + std::vector inputs; + std::vector outputs; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + CV_Assert(outputs.size() == finalSliceRanges.size()); + + const UMat& input = inputs[0]; + const int dims = input.dims; + if (dims > 5) + { + CV_LOG_INFO(NULL, "DNN/OpenCL/Slice: implementation doesn't support dims=" << dims << ". Fallback to CPU"); + return false; + } + + if (ocl_exec_cache.empty()) + { + ocl_prepare(inputs, outputs); + } + CV_CheckEQ(ocl_exec_cache.size(), outputs.size(), ""); + + for (size_t i = 0; i < outputs.size(); i++) + { + const OpenCLExecInfo& ocl = ocl_exec_cache[i]; + + UMat& output = outputs[i]; + + ocl::Kernel kernel(ocl.kernel_name.c_str(), ocl::dnn::slice_oclsrc, ocl.build_opts); if (kernel.empty()) return false; bool ret = kernel.args( ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output) ) - .run(2, global, local, false); + .run(2, (size_t*)ocl.global_size, (size_t*)ocl.local_size, false); if (!ret) return false; } // for outputs.size() return true; - } + } // forward_ocl #endif void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 2e1c185bbe..794d0721e0 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1003,10 +1003,13 @@ void ONNXImporter::populateNet(Net dstNet) CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Convolution"; for (int j = 1; j < node_proto.input_size(); j++) { - layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); + if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) + { + layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); + } } - layerParams.set("num_output", layerParams.blobs[0].size[0]); - layerParams.set("bias_term", node_proto.input_size() == 3); + int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; + layerParams.set("num_output", outCn); } else if (layer_type == "ConvTranspose") { diff --git a/modules/dnn/src/opencl/slice.cl b/modules/dnn/src/opencl/slice.cl index d468dbc16a..f32d66a9ca 100644 --- a/modules/dnn/src/opencl/slice.cl +++ b/modules/dnn/src/opencl/slice.cl @@ -48,19 +48,85 @@ global: #define BLOCK_COLS_X4 (BLOCK_COLS / 4) #define BLOCK_COLS_X16 (BLOCK_COLS / 16) -#ifdef USE_COPY_1D - -static inline -__attribute__((always_inline)) -void copy_block_1d( +__attribute__((reqd_work_group_size(WSZ, 1, 1))) +__kernel void +CONCAT(slice_, SLICE_KERNEL_SUFFIX)( __global const uchar* src0, - const uint src_offset, - __global uchar* dst0, - const uint dst_offset + __global uchar* dst0 ) { - __global const uchar* src = src0 + src_offset; - __global uchar* dst = dst0 + dst_offset; + uint block_id = get_global_id(1); + uint dst_offset0 = block_id * BLOCK_SIZE; + uint src_offset0 = 0; + + { // calculate src_offset0 + +#define CALC_SRC_INDEX(dim) \ + { \ + uint plane_sz = CONCAT(DST_STEP_, dim) / BLOCK_SIZE; \ + CONCAT(idx_, dim) = block_id / plane_sz; \ + block_id = block_id - CONCAT(idx_, dim) * plane_sz; \ + } +#define UPDATE_SRC_OFFSET(dim) \ + src_offset0 = mad24((uint)(CONCAT(idx_, dim) + CONCAT(SRC_START_, dim)), (uint)CONCAT(SRC_STEP_, dim), (uint)src_offset0); +/* + if (get_global_id(0) == 0 && get_global_id(1) == 0) \ + printf("(%d, %d): @%d src_offset0=%d idx_dim=%d block_id=%d\n", \ + get_global_id(0), get_global_id(1), \ + dim, src_offset0, CONCAT(idx_, dim), block_id \ + ); +*/ + +#if DIMS > 5 +#error "invalid configuration" +#endif +#if DIMS > 4 + uint idx_4 = 0; +#if BLOCK_DIMS <= 4 + CALC_SRC_INDEX(4) +#endif + UPDATE_SRC_OFFSET(4) +#endif +#if DIMS > 3 + uint idx_3 = 0; +#if BLOCK_DIMS <= 3 + CALC_SRC_INDEX(3) +#endif + UPDATE_SRC_OFFSET(3) +#endif +#if DIMS > 2 + uint idx_2 = 0; +#if BLOCK_DIMS <= 2 + CALC_SRC_INDEX(2) +#endif + UPDATE_SRC_OFFSET(2) +#endif +#if DIMS > 1 + uint idx_1 = 0; +#if BLOCK_DIMS <= 1 + CALC_SRC_INDEX(1) +#endif + UPDATE_SRC_OFFSET(1) +#endif +#if DIMS > 0 + uint idx_0 = 0; + UPDATE_SRC_OFFSET(0) +#endif + +/* + if (get_global_id(0) == 0) + printf("(%d, %d): src_offset0=%d dst_offset0=%d\n", + get_global_id(0), get_global_id(1), + src_offset0, dst_offset0 + ); +*/ + + } // calculate src_offset0 + +#ifdef USE_COPY_1D + { // copy_block_1d + __global const uchar* src = src0 + src_offset0; + __global uchar* dst = dst0 + dst_offset0; uint processed = 0; @@ -70,8 +136,9 @@ void copy_block_1d( uint i = get_local_id(0) * 16; // uchar16 while (i < BLOCK_COLS_X16 * 16) { - uint4 idx = (uint4)(i, i + 16 * WSZ, i + 32 * WSZ, i + 48 * WSZ); - idx = select((uint4)i, idx, idx < (BLOCK_COLS_X16 * 16)); + uint4 idx0 = (uint4)i; + uint4 idx = idx0 + (uint4)(0, 16 * WSZ, 32 * WSZ, 48 * WSZ); + idx = select(idx0, idx, idx < (BLOCK_COLS_X16 * 16)); uchar16 a0 = vload16(0, src + idx.s0); uchar16 a1 = vload16(0, src + idx.s1); @@ -97,8 +164,9 @@ void copy_block_1d( uint i = get_local_id(0) * 4 + processed; // uchar4 while (i < BLOCK_COLS_X4 * 4) { - uint4 idx = (uint4)(i, i + 4 * WSZ, i + 8 * WSZ, i + 12 * WSZ); - idx = select((uint4)i, idx, idx < (BLOCK_COLS_X4 * 4)); + uint4 idx0 = (uint4)i; + uint4 idx = idx0 + (uint4)(0, 4 * WSZ, 8 * WSZ, 12 * WSZ); + idx = select(idx0, idx, idx < (BLOCK_COLS_X4 * 4)); uchar4 a0 = vload4(0, src + idx.s0); uchar4 a1 = vload4(0, src + idx.s1); @@ -130,19 +198,11 @@ void copy_block_1d( } } #endif -} + } // copy_block_1d -#else // USE_COPY_1D +#else -static inline -__attribute__((always_inline)) -void copy_block_2d( - __global const uchar* src0, - const uint src_offset0, - __global uchar* dst0, - const uint dst_offset0 -) -{ + { // copy_block_2d __global const uchar* src = src0 + src_offset0; __global uchar* dst = dst0 + dst_offset0; @@ -199,85 +259,6 @@ void copy_block_2d( #endif // BLOCK_COLS_FILL_X4 != BLOCK_COLS i += WSZ * 4; } -} - -#endif // USE_COPY_1D - -__kernel void -CONCAT(slice_, DIMS)( - __global const uchar* src, - __global uchar* dst -) -{ - uint block_id = get_global_id(1); - - uint dst_offset = block_id * BLOCK_SIZE; - - uint src_offset = 0; - -#define CALC_SRC_INDEX(dim) \ - { \ - uint plane_sz = CONCAT(DST_STEP_, dim) / BLOCK_SIZE; \ - CONCAT(idx_, dim) = block_id / plane_sz; \ - block_id = block_id - CONCAT(idx_, dim) * plane_sz; \ - } -#define UPDATE_SRC_OFFSET(dim) \ - src_offset = mad24((uint)(CONCAT(idx_, dim) + CONCAT(SRC_START_, dim)), (uint)CONCAT(SRC_STEP_, dim), (uint)src_offset); -/* - if (get_global_id(0) == 0 && get_global_id(1) == 0) \ - printf("(%d, %d): @%d src_offset=%d idx_dim=%d block_id=%d\n", \ - get_global_id(0), get_global_id(1), \ - dim, src_offset, CONCAT(idx_, dim), block_id \ - ); -*/ - -#if DIMS > 5 -#error "invalid configuration" -#endif -#if DIMS > 4 - uint idx_4 = 0; -#if BLOCK_DIMS <= 4 - CALC_SRC_INDEX(4) -#endif - UPDATE_SRC_OFFSET(4) -#endif -#if DIMS > 3 - uint idx_3 = 0; -#if BLOCK_DIMS <= 3 - CALC_SRC_INDEX(3) -#endif - UPDATE_SRC_OFFSET(3) -#endif -#if DIMS > 2 - uint idx_2 = 0; -#if BLOCK_DIMS <= 2 - CALC_SRC_INDEX(2) -#endif - UPDATE_SRC_OFFSET(2) -#endif -#if DIMS > 1 - uint idx_1 = 0; -#if BLOCK_DIMS <= 1 - CALC_SRC_INDEX(1) -#endif - UPDATE_SRC_OFFSET(1) -#endif -#if DIMS > 0 - uint idx_0 = 0; - UPDATE_SRC_OFFSET(0) -#endif - -/* - if (get_global_id(0) == 0) - printf("(%d, %d): src_offset=%d dst_offset=%d\n", - get_global_id(0), get_global_id(1), - src_offset, dst_offset - ); -*/ - -#ifdef USE_COPY_1D - copy_block_1d(src, src_offset, dst, dst_offset); -#else - copy_block_2d(src, src_offset, dst, dst_offset); + } // copy_block_2d #endif } diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index fb9cc0184b..53886e8e29 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -784,6 +784,11 @@ TEST_P(Test_Darknet_layers, connected) testDarknetLayer("connected", true); } +TEST_P(Test_Darknet_layers, relu) +{ + testDarknetLayer("relu"); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets()); }} // namespace diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 8ea5304c14..cb3313def3 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1133,6 +1133,9 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy) const Backend backendId = get<0>(GetParam()); const Target targetId = get<1>(GetParam()); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); @@ -1143,9 +1146,8 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy) else FAIL() << "Unknown backendId"; - std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt")); - Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin")); + Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin")); Mat inp = blobFromNPY(_tf("blob.npy")); @@ -1165,7 +1167,10 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy) std::vector outLayers = net.getUnconnectedOutLayers(); ASSERT_EQ(net.getLayer(outLayers[0])->name, "output"); - ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution"); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution"); + else + ASSERT_EQ(net.getLayer(outLayers[0])->type, "Add"); } TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8) @@ -1173,6 +1178,9 @@ TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8) const Backend backendId = get<0>(GetParam()); const Target targetId = get<1>(GetParam()); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); @@ -1189,12 +1197,10 @@ TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8) randu(inputs[0], 0, 255); inputs[0].convertTo(inputs[1], CV_32F); - std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - Mat outs[2]; for (int i = 0; i < 2; ++i) { - Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin")); + Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin")); net.setPreferableBackend(backendId); net.setPreferableTarget(targetId); net.setInput(inputs[i]); @@ -1210,6 +1216,9 @@ TEST_P(Layer_Test_Convolution_DLDT, multithreading) const Backend backendId = get<0>(GetParam()); const Target targetId = get<1>(GetParam()); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); @@ -1220,9 +1229,8 @@ TEST_P(Layer_Test_Convolution_DLDT, multithreading) else FAIL() << "Unknown backendId"; - std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - std::string xmlPath = _tf("layer_convolution" + suffix + ".xml"); - std::string binPath = _tf("layer_convolution" + suffix + ".bin"); + std::string xmlPath = _tf("layer_convolution.xml"); + std::string binPath = _tf("layer_convolution.bin"); Net firstNet = readNet(xmlPath, binPath); Net secondNet = readNet(xmlPath, binPath); Mat inp = blobFromNPY(_tf("blob.npy")); @@ -1281,8 +1289,7 @@ TEST_P(Test_DLDT_two_inputs_3dim, as_IR) int secondInpType = get<1>(GetParam()); Target targetId = get<2>(GetParam()); - std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - Net net = readNet(_tf("net_two_inputs" + suffix + ".xml"), _tf("net_two_inputs.bin")); + Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin")); std::vector inpSize = get<3>(GetParam()); Mat firstInp(3, inpSize.data(), firstInpType); Mat secondInp(3, inpSize.data(), secondInpType); diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index a1480b0e8b..b803f782d0 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -444,12 +444,14 @@ TEST_P(Async, model_optimizer_pipeline_set_and_forward_single) const Backend backendId = get<0>(get<1>(GetParam())); const Target targetId = get<1>(get<1>(GetParam())); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); - const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); - const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml"); + const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); + const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); @@ -503,12 +505,14 @@ TEST_P(Async, model_optimizer_pipeline_set_and_forward_all) const Backend backendId = get<0>(get<1>(GetParam())); const Target targetId = get<1>(get<1>(GetParam())); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); - const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); - const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml"); + const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); + const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); @@ -677,9 +681,11 @@ TEST_P(Test_Model_Optimizer, forward_two_nets) const Backend backendId = get<0>(GetParam()); const Target targetId = get<1>(GetParam()); - const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); - const std::string& proto = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml"); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + + const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); + const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); @@ -716,12 +722,14 @@ TEST_P(Test_Model_Optimizer, readFromBuffer) const Backend backendId = get<0>(GetParam()); const Target targetId = get<1>(GetParam()); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); - const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; - const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); - const std::string& modelFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml"); + const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution.bin"); + const std::string& modelFile = findDataFile("dnn/layers/layer_convolution.xml"); if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); @@ -769,8 +777,11 @@ TEST_P(Test_Model_Optimizer, flexible_inputs) const Backend backendId = get<0>(GetParam()); const Target targetId = get<1>(GetParam()); - const std::string& model = findDataFile("dnn/layers/layer_convolution_fp16.bin"); - const std::string& proto = findDataFile("dnn/layers/layer_convolution_fp16.xml"); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + + const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); + const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 86dfcae080..22a45286d6 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -114,6 +114,62 @@ TEST_P(Test_ONNX_layers, Convolution) testONNXModels("convolution"); } +TEST_P(Test_ONNX_layers, Convolution_variable_weight) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + String basename = "conv_variable_w"; + Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + for (int i = 0; i < 2; i++) + { + Mat input = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_0.npy")); + Mat weights = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_1.npy")); + Mat ref = blobFromNPY(_tf("data/output_" + basename + format("_%d", i) + ".npy")); + + net.setInput(input, "0"); + net.setInput(weights, "1"); + + Mat out = net.forward(); + normAssert(ref, out, "", default_l1, default_lInf); + } +} + +TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + String basename = "conv_variable_wb"; + Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + for (int i = 0; i < 2; i++) + { + Mat input = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_0.npy")); + Mat weights = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_1.npy")); + Mat bias = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_2.npy")); + Mat ref = blobFromNPY(_tf("data/output_" + basename + format("_%d", i) + ".npy")); + + net.setInput(input, "0"); + net.setInput(weights, "1"); + net.setInput(bias, "bias"); + + Mat out = net.forward(); + normAssert(ref, out, "", default_l1, default_lInf); + } +} + TEST_P(Test_ONNX_layers, Gather) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 08faf42370..2becf11950 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -245,6 +245,31 @@ typedef Feature2D DescriptorExtractor; //! @{ +/** @brief Class for implementing the wrapper which makes detectors and extractors to be affine invariant, +described as ASIFT in @cite YM11 . +*/ +class CV_EXPORTS_W AffineFeature : public Feature2D +{ +public: + /** + @param backend The detector/extractor you want to use as backend. + @param maxTilt The highest power index of tilt factor. 5 is used in the paper as tilt sampling range n. + @param minTilt The lowest power index of tilt factor. 0 is used in the paper. + @param tiltStep Tilt sampling step \f$\delta_t\f$ in Algorithm 1 in the paper. + @param rotateStepBase Rotation sampling step factor b in Algorithm 1 in the paper. + */ + CV_WRAP static Ptr create(const Ptr& backend, + int maxTilt = 5, int minTilt = 0, float tiltStep = 1.4142135623730951f, float rotateStepBase = 72); + + CV_WRAP virtual void setViewParams(const std::vector& tilts, const std::vector& rolls) = 0; + CV_WRAP virtual void getViewParams(std::vector& tilts, std::vector& rolls) const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +typedef AffineFeature AffineFeatureDetector; +typedef AffineFeature AffineDescriptorExtractor; + + /** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform (SIFT) algorithm by D. Lowe @cite Lowe04 . */ diff --git a/modules/features2d/src/affine_feature.cpp b/modules/features2d/src/affine_feature.cpp new file mode 100644 index 0000000000..41518d945d --- /dev/null +++ b/modules/features2d/src/affine_feature.cpp @@ -0,0 +1,358 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// This file is based on code issued with the following license. +/********************************************************************* +* Software License Agreement (BSD License) +* +* Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +* Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved. +* Copyright (C) 2013, Evgeny Toropov, all rights reserved. +* Third party copyrights are property of their respective owners. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* * The name of the copyright holders may not be used to endorse +* or promote products derived from this software without specific +* prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +* POSSIBILITY OF SUCH DAMAGE. +*********************************************************************/ + +/* + Guoshen Yu, Jean-Michel Morel, ASIFT: An Algorithm for Fully Affine + Invariant Comparison, Image Processing On Line, 1 (2011), pp. 11–38. + https://doi.org/10.5201/ipol.2011.my-asift + */ + +#include "precomp.hpp" +#include +namespace cv { + +class AffineFeature_Impl CV_FINAL : public AffineFeature +{ +public: + explicit AffineFeature_Impl(const Ptr& backend, + int maxTilt, int minTilt, float tiltStep, float rotateStepBase); + + int descriptorSize() const CV_OVERRIDE + { + return backend_->descriptorSize(); + } + + int descriptorType() const CV_OVERRIDE + { + return backend_->descriptorType(); + } + + int defaultNorm() const CV_OVERRIDE + { + return backend_->defaultNorm(); + } + + void detectAndCompute(InputArray image, InputArray mask, std::vector& keypoints, + OutputArray descriptors, bool useProvidedKeypoints=false) CV_OVERRIDE; + + void setViewParams(const std::vector& tilts, const std::vector& rolls) CV_OVERRIDE; + void getViewParams(std::vector& tilts, std::vector& rolls) const CV_OVERRIDE; + +protected: + void splitKeypointsByView(const std::vector& keypoints_, + std::vector< std::vector >& keypointsByView) const; + + const Ptr backend_; + int maxTilt_; + int minTilt_; + float tiltStep_; + float rotateStepBase_; + + // Tilt factors. + std::vector tilts_; + // Roll factors. + std::vector rolls_; + +private: + AffineFeature_Impl(const AffineFeature_Impl &); // copy disabled + AffineFeature_Impl& operator=(const AffineFeature_Impl &); // assign disabled +}; + +AffineFeature_Impl::AffineFeature_Impl(const Ptr& backend, + int maxTilt, int minTilt, float tiltStep, float rotateStepBase) + : backend_(backend), maxTilt_(maxTilt), minTilt_(minTilt), tiltStep_(tiltStep), rotateStepBase_(rotateStepBase) +{ + int i = minTilt_; + if( i == 0 ) + { + tilts_.push_back(1); + rolls_.push_back(0); + i++; + } + float tilt = 1; + for( ; i <= maxTilt_; i++ ) + { + tilt *= tiltStep_; + float rotateStep = rotateStepBase_ / tilt; + int rollN = cvFloor(180.0f / rotateStep); + if( rollN * rotateStep == 180.0f ) + rollN--; + for( int j = 0; j <= rollN; j++ ) + { + tilts_.push_back(tilt); + rolls_.push_back(rotateStep * j); + } + } +} + +void AffineFeature_Impl::setViewParams(const std::vector& tilts, + const std::vector& rolls) +{ + CV_Assert(tilts.size() == rolls.size()); + tilts_ = tilts; + rolls_ = rolls; +} + +void AffineFeature_Impl::getViewParams(std::vector& tilts, + std::vector& rolls) const +{ + tilts = tilts_; + rolls = rolls_; +} + +void AffineFeature_Impl::splitKeypointsByView(const std::vector& keypoints_, + std::vector< std::vector >& keypointsByView) const +{ + for( size_t i = 0; i < keypoints_.size(); i++ ) + { + const KeyPoint& kp = keypoints_[i]; + CV_Assert( kp.class_id >= 0 && kp.class_id < (int)tilts_.size() ); + keypointsByView[kp.class_id].push_back(kp); + } +} + +class skewedDetectAndCompute : public ParallelLoopBody +{ +public: + skewedDetectAndCompute( + const std::vector& _tilts, + const std::vector& _rolls, + std::vector< std::vector >& _keypointsCollection, + std::vector& _descriptorCollection, + const Mat& _image, + const Mat& _mask, + const bool _do_keypoints, + const bool _do_descriptors, + const Ptr& _backend) + : tilts(_tilts), + rolls(_rolls), + keypointsCollection(_keypointsCollection), + descriptorCollection(_descriptorCollection), + image(_image), + mask(_mask), + do_keypoints(_do_keypoints), + do_descriptors(_do_descriptors), + backend(_backend) {} + + void operator()( const cv::Range& range ) const CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + + const int begin = range.start; + const int end = range.end; + + for( int a = begin; a < end; a++ ) + { + Mat warpedImage, warpedMask; + Matx23f pose, invPose; + affineSkew(tilts[a], rolls[a], warpedImage, warpedMask, pose); + invertAffineTransform(pose, invPose); + + std::vector wKeypoints; + Mat wDescriptors; + if( !do_keypoints ) + { + const std::vector& keypointsInView = keypointsCollection[a]; + if( keypointsInView.size() == 0 ) // when there are no keypoints in this affine view + continue; + + std::vector pts_, pts; + KeyPoint::convert(keypointsInView, pts_); + transform(pts_, pts, pose); + wKeypoints.resize(keypointsInView.size()); + for( size_t wi = 0; wi < wKeypoints.size(); wi++ ) + { + wKeypoints[wi] = keypointsInView[wi]; + wKeypoints[wi].pt = pts[wi]; + } + } + backend->detectAndCompute(warpedImage, warpedMask, wKeypoints, wDescriptors, !do_keypoints); + if( do_keypoints ) + { + // KeyPointsFilter::runByPixelsMask( wKeypoints, warpedMask ); + if( wKeypoints.size() == 0 ) + { + keypointsCollection[a].clear(); + continue; + } + std::vector pts_, pts; + KeyPoint::convert(wKeypoints, pts_); + transform(pts_, pts, invPose); + + keypointsCollection[a].resize(wKeypoints.size()); + for( size_t wi = 0; wi < wKeypoints.size(); wi++ ) + { + keypointsCollection[a][wi] = wKeypoints[wi]; + keypointsCollection[a][wi].pt = pts[wi]; + keypointsCollection[a][wi].class_id = a; + } + } + if( do_descriptors ) + wDescriptors.copyTo(descriptorCollection[a]); + } + } +private: + void affineSkew(float tilt, float phi, + Mat& warpedImage, Mat& warpedMask, Matx23f& pose) const + { + int h = image.size().height; + int w = image.size().width; + Mat rotImage; + + Mat mask0; + if( mask.empty() ) + mask0 = Mat(h, w, CV_8UC1, 255); + else + mask0 = mask; + pose = Matx23f(1,0,0, + 0,1,0); + + if( phi == 0 ) + image.copyTo(rotImage); + else + { + phi = phi * (float)CV_PI / 180; + float s = std::sin(phi); + float c = std::cos(phi); + Matx22f A(c, -s, s, c); + Matx corners(0, 0, (float)w, 0, (float)w,(float)h, 0, (float)h); + Mat tf(corners * A.t()); + Mat tcorners; + tf.convertTo(tcorners, CV_32S); + Rect rect = boundingRect(tcorners); + h = rect.height; w = rect.width; + pose = Matx23f(c, -s, -(float)rect.x, + s, c, -(float)rect.y); + warpAffine(image, rotImage, pose, Size(w, h), INTER_LINEAR, BORDER_REPLICATE); + } + if( tilt == 1 ) + warpedImage = rotImage; + else + { + float s = 0.8f * sqrt(tilt * tilt - 1); + GaussianBlur(rotImage, rotImage, Size(0, 0), s, 0.01); + resize(rotImage, warpedImage, Size(0, 0), 1.0/tilt, 1.0, INTER_NEAREST); + pose(0, 0) /= tilt; + pose(0, 1) /= tilt; + pose(0, 2) /= tilt; + } + if( phi != 0 || tilt != 1 ) + warpAffine(mask0, warpedMask, pose, warpedImage.size(), INTER_NEAREST); + } + + + const std::vector& tilts; + const std::vector& rolls; + std::vector< std::vector >& keypointsCollection; + std::vector& descriptorCollection; + const Mat& image; + const Mat& mask; + const bool do_keypoints; + const bool do_descriptors; + const Ptr& backend; +}; + +void AffineFeature_Impl::detectAndCompute(InputArray _image, InputArray _mask, + std::vector& keypoints, + OutputArray _descriptors, + bool useProvidedKeypoints) +{ + CV_TRACE_FUNCTION(); + + bool do_keypoints = !useProvidedKeypoints; + bool do_descriptors = _descriptors.needed(); + Mat image = _image.getMat(), mask = _mask.getMat(); + Mat descriptors; + + if( (!do_keypoints && !do_descriptors) || _image.empty() ) + return; + + std::vector< std::vector > keypointsCollection(tilts_.size()); + std::vector< Mat > descriptorCollection(tilts_.size()); + + if( do_keypoints ) + keypoints.clear(); + else + splitKeypointsByView(keypoints, keypointsCollection); + + parallel_for_(Range(0, (int)tilts_.size()), skewedDetectAndCompute(tilts_, rolls_, keypointsCollection, descriptorCollection, + image, mask, do_keypoints, do_descriptors, backend_)); + + if( do_keypoints ) + for( size_t i = 0; i < keypointsCollection.size(); i++ ) + { + const std::vector& keys = keypointsCollection[i]; + keypoints.insert(keypoints.end(), keys.begin(), keys.end()); + } + + if( do_descriptors ) + { + _descriptors.create((int)keypoints.size(), backend_->descriptorSize(), backend_->descriptorType()); + descriptors = _descriptors.getMat(); + int iter = 0; + for( size_t i = 0; i < descriptorCollection.size(); i++ ) + { + const Mat& descs = descriptorCollection[i]; + if( descs.empty() ) + continue; + Mat roi(descriptors, Rect(0, iter, descriptors.cols, descs.rows)); + descs.copyTo(roi); + iter += descs.rows; + } + } +} + + +Ptr AffineFeature::create(const Ptr& backend, + int maxTilt, int minTilt, float tiltStep, float rotateStepBase) +{ + CV_Assert(minTilt < maxTilt); + CV_Assert(tiltStep > 0); + CV_Assert(rotateStepBase > 0); + return makePtr(backend, maxTilt, minTilt, tiltStep, rotateStepBase); +} + +String AffineFeature::getDefaultName() const +{ + return (Feature2D::getDefaultName() + ".AffineFeature"); +} + +} // namespace diff --git a/modules/features2d/test/test_affine_feature.cpp b/modules/features2d/test/test_affine_feature.cpp new file mode 100644 index 0000000000..f40f21ed8d --- /dev/null +++ b/modules/features2d/test/test_affine_feature.cpp @@ -0,0 +1,185 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "test_precomp.hpp" + +// #define GENERATE_DATA // generate data in debug mode + +namespace opencv_test { namespace { + +#ifndef GENERATE_DATA +static bool isSimilarKeypoints( const KeyPoint& p1, const KeyPoint& p2 ) +{ + const float maxPtDif = 1.f; + const float maxSizeDif = 1.f; + const float maxAngleDif = 2.f; + const float maxResponseDif = 0.1f; + + float dist = (float)cv::norm( p1.pt - p2.pt ); + return (dist < maxPtDif && + fabs(p1.size - p2.size) < maxSizeDif && + abs(p1.angle - p2.angle) < maxAngleDif && + abs(p1.response - p2.response) < maxResponseDif && + (p1.octave & 0xffff) == (p2.octave & 0xffff) // do not care about sublayers and class_id + ); +} +#endif + +TEST(Features2d_AFFINE_FEATURE, regression) +{ + Mat image = imread(cvtest::findDataFile("features2d/tsukuba.png")); + string xml = cvtest::TS::ptr()->get_data_path() + "asift/regression_cpp.xml.gz"; + ASSERT_FALSE(image.empty()); + + Mat gray; + cvtColor(image, gray, COLOR_BGR2GRAY); + + // Default ASIFT generates too large descriptors. This test uses small maxTilt to suppress the size of testdata. + Ptr ext = AffineFeature::create(SIFT::create(), 2, 0, 1.4142135623730951f, 144.0f); + Mat mpt, msize, mangle, mresponse, moctave, mclass_id; +#ifdef GENERATE_DATA + // calculate + vector calcKeypoints; + Mat calcDescriptors; + ext->detectAndCompute(gray, Mat(), calcKeypoints, calcDescriptors, false); + + // create keypoints XML + FileStorage fs(xml, FileStorage::WRITE); + ASSERT_TRUE(fs.isOpened()) << xml; + std::cout << "Creating keypoints XML..." << std::endl; + + mpt = Mat(calcKeypoints.size(), 2, CV_32F); + msize = Mat(calcKeypoints.size(), 1, CV_32F); + mangle = Mat(calcKeypoints.size(), 1, CV_32F); + mresponse = Mat(calcKeypoints.size(), 1, CV_32F); + moctave = Mat(calcKeypoints.size(), 1, CV_32S); + mclass_id = Mat(calcKeypoints.size(), 1, CV_32S); + + for( size_t i = 0; i < calcKeypoints.size(); i++ ) + { + const KeyPoint& key = calcKeypoints[i]; + mpt.at(i, 0) = key.pt.x; + mpt.at(i, 1) = key.pt.y; + msize.at(i, 0) = key.size; + mangle.at(i, 0) = key.angle; + mresponse.at(i, 0) = key.response; + moctave.at(i, 0) = key.octave; + mclass_id.at(i, 0) = key.class_id; + } + + fs << "keypoints_pt" << mpt; + fs << "keypoints_size" << msize; + fs << "keypoints_angle" << mangle; + fs << "keypoints_response" << mresponse; + fs << "keypoints_octave" << moctave; + fs << "keypoints_class_id" << mclass_id; + + // create descriptor XML + fs << "descriptors" << calcDescriptors; + fs.release(); +#else + const float badCountsRatio = 0.01f; + const float badDescriptorDist = 1.0f; + const float maxBadKeypointsRatio = 0.15f; + const float maxBadDescriptorRatio = 0.15f; + + // read keypoints + vector validKeypoints; + Mat validDescriptors; + FileStorage fs(xml, FileStorage::READ); + ASSERT_TRUE(fs.isOpened()) << xml; + + fs["keypoints_pt"] >> mpt; + ASSERT_EQ(mpt.type(), CV_32F); + fs["keypoints_size"] >> msize; + ASSERT_EQ(msize.type(), CV_32F); + fs["keypoints_angle"] >> mangle; + ASSERT_EQ(mangle.type(), CV_32F); + fs["keypoints_response"] >> mresponse; + ASSERT_EQ(mresponse.type(), CV_32F); + fs["keypoints_octave"] >> moctave; + ASSERT_EQ(moctave.type(), CV_32S); + fs["keypoints_class_id"] >> mclass_id; + ASSERT_EQ(mclass_id.type(), CV_32S); + + validKeypoints.resize(mpt.rows); + for( int i = 0; i < (int)validKeypoints.size(); i++ ) + { + validKeypoints[i].pt.x = mpt.at(i, 0); + validKeypoints[i].pt.y = mpt.at(i, 1); + validKeypoints[i].size = msize.at(i, 0); + validKeypoints[i].angle = mangle.at(i, 0); + validKeypoints[i].response = mresponse.at(i, 0); + validKeypoints[i].octave = moctave.at(i, 0); + validKeypoints[i].class_id = mclass_id.at(i, 0); + } + + // read descriptors + fs["descriptors"] >> validDescriptors; + fs.release(); + + // calc and compare keypoints + vector calcKeypoints; + ext->detectAndCompute(gray, Mat(), calcKeypoints, noArray(), false); + + float countRatio = (float)validKeypoints.size() / (float)calcKeypoints.size(); + ASSERT_LT(countRatio, 1 + badCountsRatio) << "Bad keypoints count ratio."; + ASSERT_GT(countRatio, 1 - badCountsRatio) << "Bad keypoints count ratio."; + + int badPointCount = 0, commonPointCount = max((int)validKeypoints.size(), (int)calcKeypoints.size()); + for( size_t v = 0; v < validKeypoints.size(); v++ ) + { + int nearestIdx = -1; + float minDist = std::numeric_limits::max(); + float angleDistOfNearest = std::numeric_limits::max(); + + for( size_t c = 0; c < calcKeypoints.size(); c++ ) + { + if( validKeypoints[v].class_id != calcKeypoints[c].class_id ) + continue; + float curDist = (float)cv::norm( calcKeypoints[c].pt - validKeypoints[v].pt ); + if( curDist < minDist ) + { + minDist = curDist; + nearestIdx = (int)c; + angleDistOfNearest = abs( calcKeypoints[c].angle - validKeypoints[v].angle ); + } + else if( curDist == minDist ) // the keypoints whose positions are same but angles are different + { + float angleDist = abs( calcKeypoints[c].angle - validKeypoints[v].angle ); + if( angleDist < angleDistOfNearest ) + { + nearestIdx = (int)c; + angleDistOfNearest = angleDist; + } + } + } + if( nearestIdx == -1 || !isSimilarKeypoints( validKeypoints[v], calcKeypoints[nearestIdx] ) ) + badPointCount++; + } + float badKeypointsRatio = (float)badPointCount / (float)commonPointCount; + std::cout << "badKeypointsRatio: " << badKeypointsRatio << std::endl; + ASSERT_LT( badKeypointsRatio , maxBadKeypointsRatio ) << "Bad accuracy!"; + + // Calc and compare descriptors. This uses validKeypoints for extraction. + Mat calcDescriptors; + ext->detectAndCompute(gray, Mat(), validKeypoints, calcDescriptors, true); + + int dim = validDescriptors.cols; + int badDescriptorCount = 0; + L1 distance; + + for( int i = 0; i < (int)validKeypoints.size(); i++ ) + { + float dist = distance( validDescriptors.ptr(i), calcDescriptors.ptr(i), dim ); + if( dist > badDescriptorDist ) + badDescriptorCount++; + } + float badDescriptorRatio = (float)badDescriptorCount / (float)validKeypoints.size(); + std::cout << "badDescriptorRatio: " << badDescriptorRatio << std::endl; + ASSERT_LT( badDescriptorRatio, maxBadDescriptorRatio ) << "Too many descriptors mismatched."; +#endif +} + +}} // namespace diff --git a/modules/flann/include/opencv2/flann.hpp b/modules/flann/include/opencv2/flann.hpp index 293990752b..9013ae4308 100644 --- a/modules/flann/include/opencv2/flann.hpp +++ b/modules/flann/include/opencv2/flann.hpp @@ -191,8 +191,28 @@ public: KDTreeIndexParams( int trees = 4 ); }; @endcode + - **HierarchicalClusteringIndexParams** When passing an object of this type the index constructed + will be a hierarchical tree of clusters, dividing each set of points into n clusters whose centers + are picked among the points without further refinement of their position. + This algorithm fits both floating, integer and binary vectors. : + @code + struct HierarchicalClusteringIndexParams : public IndexParams + { + HierarchicalClusteringIndexParams( + int branching = 32, + flann_centers_init_t centers_init = CENTERS_RANDOM, + int trees = 4, + int leaf_size = 100); + + }; + @endcode - **KMeansIndexParams** When passing an object of this type the index constructed will be a - hierarchical k-means tree. : + hierarchical k-means tree (one tree by default), dividing each set of points into n clusters + whose barycenters are refined iteratively. + Note that this algorithm has been extended to the support of binary vectors as an alternative + to LSH when knn search speed is the criterium. It will also outperform LSH when processing + directly (i.e. without the use of MCA/PCA) datasets whose points share mostly the same values + for most of the dimensions. It is recommended to set more than one tree with binary data. : @code struct KMeansIndexParams : public IndexParams { @@ -201,6 +221,13 @@ public: int iterations = 11, flann_centers_init_t centers_init = CENTERS_RANDOM, float cb_index = 0.2 ); + + KMeansIndexParams( + int branching, + int iterations, + flann_centers_init_t centers_init, + float cb_index, + int trees ); }; @endcode - **CompositeIndexParams** When using a parameters object of this type the index created @@ -219,7 +246,8 @@ public: - **LshIndexParams** When using a parameters object of this type the index created uses multi-probe LSH (by Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity Search by Qin Lv, William Josephson, Zhe Wang, Moses Charikar, Kai Li., Proceedings of the 33rd - International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007) : + International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007). + This algorithm is designed for binary vectors. : @code struct LshIndexParams : public IndexParams { diff --git a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h index 9d01644aad..b7a650ff00 100644 --- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h +++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h @@ -404,34 +404,16 @@ public: */ virtual ~HierarchicalClusteringIndex() { - free_elements(); - if (root!=NULL) { delete[] root; } if (indices!=NULL) { + free_indices(); delete[] indices; } } - - /** - * Release the inner elements of indices[] - */ - void free_elements() - { - if (indices!=NULL) { - for(int i=0; i::max)(); @@ -367,6 +382,13 @@ public: } cb_index_ = 0.4f; + root_ = new KMeansNodePtr[trees_]; + indices_ = new int*[trees_]; + + for (int i=0; i(); - std::memset(root_, 0, sizeof(KMeansNode)); + for (int i=0; i(); + std::memset(root_[i], 0, sizeof(KMeansNode)); - if(is_kdtree_distance::val || is_vector_space_distance::val) - { - computeNodeStatistics(root_, indices_, (unsigned int)size_); - computeClustering(root_, indices_, (int)size_, branching_,0); - } - else - { - computeBitfieldNodeStatistics(root_, indices_, (unsigned int)size_); - computeBitfieldClustering(root_, indices_, (int)size_, branching_,0); + if(is_kdtree_distance::val || is_vector_space_distance::val) { + computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_); + computeClustering(root_[i], indices_[i], (int)size_, branching_,0); + } + else { + computeBitfieldNodeStatistics(root_[i], indices_[i], (unsigned int)size_); + computeBitfieldClustering(root_[i], indices_[i], (int)size_, branching_,0); + } } } @@ -456,35 +481,43 @@ public: save_value(stream, iterations_); save_value(stream, memoryCounter_); save_value(stream, cb_index_); - save_value(stream, *indices_, (int)size_); - - save_tree(stream, root_); + save_value(stream, trees_); + for (int i=0; i& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { - int maxChecks = get_param(searchParams,"checks",32); + const int maxChecks = get_param(searchParams,"checks",32); if (maxChecks==FLANN_CHECKS_UNLIMITED) { - findExactNN(root_, result, vec); + findExactNN(root_[0], result, vec); } else { // Priority queue storing intermediate branches in the best-bin-first search Heap* heap = new Heap((int)size_); int checks = 0; - findNN(root_, result, vec, checks, maxChecks, heap); + for (int i=0; i= maxChecks) && result.full()) + break; + } BranchSt branch; while (heap->popMin(branch) && (checkspivot), (int)veclen_); if (node->childs==NULL) { - int indices_offset = (int)(node->indices - indices_); + int indices_offset = (int)(node->indices - indices_[num]); save_value(stream, indices_offset); } else { for(int i=0; ichilds[i]); + save_tree(stream, node->childs[i], num); } } } - void load_tree(FILE* stream, KMeansNodePtr& node) + void load_tree(FILE* stream, KMeansNodePtr& node, int num) { node = pool_.allocate(); load_value(stream, *node); @@ -636,12 +672,12 @@ private: if (node->childs==NULL) { int indices_offset; load_value(stream, indices_offset); - node->indices = indices_ + indices_offset; + node->indices = indices_[num] + indices_offset; } else { node->childs = pool_.allocate(branching_); for(int i=0; ichilds[i]); + load_tree(stream, node->childs[i], num); } } } @@ -660,6 +696,32 @@ private: } } + void free_centers() + { + if (root_ != NULL) { + for(int i=0; ichilds==NULL) { - if (checks>=maxChecks) { - if (result.full()) return; + if ((checks>=maxChecks) && result.full()) { + return; } checks += node->size; for (int i=0; isize; ++i) { @@ -1397,6 +1497,9 @@ private: /** The branching factor used in the hierarchical k-means clustering */ int branching_; + /** Number of kmeans trees (default is one) */ + int trees_; + /** Maximum number of iterations to use when performing k-means clustering */ int iterations_; @@ -1432,12 +1535,12 @@ private: /** * The root node in the tree. */ - KMeansNodePtr root_; + KMeansNodePtr* root_; /** * Array of indices to vectors in the dataset. */ - int* indices_; + int** indices_; /** * The distance diff --git a/modules/imgcodecs/src/grfmt_jpeg2000.cpp b/modules/imgcodecs/src/grfmt_jpeg2000.cpp index f4bb09718d..0f80d89c8d 100644 --- a/modules/imgcodecs/src/grfmt_jpeg2000.cpp +++ b/modules/imgcodecs/src/grfmt_jpeg2000.cpp @@ -378,7 +378,7 @@ bool Jpeg2KDecoder::readComponent8u( uchar *data, void *_buffer, for( y = 0; y < yend - ystart; ) { - jas_seqent_t* pix_row = &jas_matrix_get( buffer, y / ystep, 0 ); + jas_seqent_t* pix_row = jas_matrix_getref( buffer, y / ystep, 0 ); uchar* dst = data + (y - yoffset) * step - xoffset; if( xstep == 1 ) @@ -444,7 +444,7 @@ bool Jpeg2KDecoder::readComponent16u( unsigned short *data, void *_buffer, for( y = 0; y < yend - ystart; ) { - jas_seqent_t* pix_row = &jas_matrix_get( buffer, y / ystep, 0 ); + jas_seqent_t* pix_row = jas_matrix_getref( buffer, y / ystep, 0 ); ushort* dst = data + (y - yoffset) * step - xoffset; if( xstep == 1 ) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 6c22ea4f8f..4ac0d9db50 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -2310,7 +2310,7 @@ CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst, const Scalar& borderValue = Scalar()); /** @example samples/cpp/warpPerspective_demo.cpp -An example program shows using cv::findHomography and cv::warpPerspective for image warping +An example program shows using cv::getPerspectiveTransform and cv::warpPerspective for image warping */ /** @brief Applies a perspective transformation to an image. diff --git a/modules/stitching/include/opencv2/stitching.hpp b/modules/stitching/include/opencv2/stitching.hpp index 016e7d8023..f6e7f70172 100644 --- a/modules/stitching/include/opencv2/stitching.hpp +++ b/modules/stitching/include/opencv2/stitching.hpp @@ -272,7 +272,7 @@ public: @param pano Final pano. @return Status code. */ - Status composePanorama(InputArrayOfArrays images, OutputArray pano); + CV_WRAP Status composePanorama(InputArrayOfArrays images, OutputArray pano); /** @overload */ CV_WRAP Status stitch(InputArrayOfArrays images, OutputArray pano); diff --git a/modules/stitching/misc/python/test/test_stitching.py b/modules/stitching/misc/python/test/test_stitching.py index 89c701f018..719f0583f2 100644 --- a/modules/stitching/misc/python/test/test_stitching.py +++ b/modules/stitching/misc/python/test/test_stitching.py @@ -19,6 +19,7 @@ class stitching_test(NewOpenCVTests): self.assertAlmostEqual(pano.shape[0], 685, delta=100, msg="rows: %r" % list(pano.shape)) self.assertAlmostEqual(pano.shape[1], 1025, delta=100, msg="cols: %r" % list(pano.shape)) + class stitching_detail_test(NewOpenCVTests): def test_simple(self): @@ -82,5 +83,37 @@ class stitching_detail_test(NewOpenCVTests): timelapser = cv.detail.Timelapser_createDefault(cv.detail.Timelapser_CROP); self.assertIsNotNone(timelapser) + +class stitching_compose_panorama_test_no_args(NewOpenCVTests): + + def test_simple(self): + + img1 = self.get_sample('stitching/a1.png') + img2 = self.get_sample('stitching/a2.png') + + stitcher = cv.Stitcher.create(cv.Stitcher_PANORAMA) + + stitcher.estimateTransform((img1, img2)) + + result, _ = stitcher.composePanorama() + + assert result == 0 + + +class stitching_compose_panorama_args(NewOpenCVTests): + + def test_simple(self): + + img1 = self.get_sample('stitching/a1.png') + img2 = self.get_sample('stitching/a2.png') + + stitcher = cv.Stitcher.create(cv.Stitcher_PANORAMA) + + stitcher.estimateTransform((img1, img2)) + result, _ = stitcher.composePanorama((img1, img2)) + + assert result == 0 + + if __name__ == '__main__': NewOpenCVTests.bootstrap() diff --git a/samples/cpp/asift.cpp b/samples/cpp/asift.cpp new file mode 100644 index 0000000000..568954058d --- /dev/null +++ b/samples/cpp/asift.cpp @@ -0,0 +1,199 @@ +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace cv; + +static void help(char** argv) +{ + cout + << "This is a sample usage of AffineFeature detector/extractor.\n" + << "And this is a C++ version of samples/python/asift.py\n" + << "Usage: " << argv[0] << "\n" + << " [ --feature= ] # Feature to use.\n" + << " [ --flann ] # use Flann-based matcher instead of bruteforce.\n" + << " [ --maxlines= ] # The maximum number of lines in visualizing the matching result.\n" + << " [ --image1= ]\n" + << " [ --image2= ] # Path to images to compare." + << endl; +} + +static double timer() +{ + return getTickCount() / getTickFrequency(); +} + +int main(int argc, char** argv) +{ + vector fileName; + cv::CommandLineParser parser(argc, argv, + "{help h ||}" + "{feature|brisk|}" + "{flann||}" + "{maxlines|50|}" + "{image1|aero1.jpg|}{image2|aero3.jpg|}"); + if (parser.has("help")) + { + help(argv); + return 0; + } + string feature = parser.get("feature"); + bool useFlann = parser.has("flann"); + int maxlines = parser.get("maxlines"); + fileName.push_back(samples::findFile(parser.get("image1"))); + fileName.push_back(samples::findFile(parser.get("image2"))); + if (!parser.check()) + { + parser.printErrors(); + cout << "See --help (or missing '=' between argument name and value?)" << endl; + return 1; + } + + Mat img1 = imread(fileName[0], IMREAD_GRAYSCALE); + Mat img2 = imread(fileName[1], IMREAD_GRAYSCALE); + if (img1.empty()) + { + cerr << "Image " << fileName[0] << " is empty or cannot be found" << endl; + return 1; + } + if (img2.empty()) + { + cerr << "Image " << fileName[1] << " is empty or cannot be found" << endl; + return 1; + } + + Ptr backend; + Ptr matcher; + + if (feature == "sift") + { + backend = SIFT::create(); + if (useFlann) + matcher = DescriptorMatcher::create("FlannBased"); + else + matcher = DescriptorMatcher::create("BruteForce"); + } + else if (feature == "orb") + { + backend = ORB::create(); + if (useFlann) + matcher = makePtr(makePtr(6, 12, 1)); + else + matcher = DescriptorMatcher::create("BruteForce-Hamming"); + } + else if (feature == "brisk") + { + backend = BRISK::create(); + if (useFlann) + matcher = makePtr(makePtr(6, 12, 1)); + else + matcher = DescriptorMatcher::create("BruteForce-Hamming"); + } + else + { + cerr << feature << " is not supported. See --help" << endl; + return 1; + } + + cout << "extracting with " << feature << "..." << endl; + Ptr ext = AffineFeature::create(backend); + vector kp1, kp2; + Mat desc1, desc2; + + ext->detectAndCompute(img1, Mat(), kp1, desc1); + ext->detectAndCompute(img2, Mat(), kp2, desc2); + cout << "img1 - " << kp1.size() << " features, " + << "img2 - " << kp2.size() << " features" + << endl; + + cout << "matching with " << (useFlann ? "flann" : "bruteforce") << "..." << endl; + double start = timer(); + // match and draw + vector< vector > rawMatches; + vector p1, p2; + vector distances; + matcher->knnMatch(desc1, desc2, rawMatches, 2); + // filter_matches + for (size_t i = 0; i < rawMatches.size(); i++) + { + const vector& m = rawMatches[i]; + if (m.size() == 2 && m[0].distance < m[1].distance * 0.75) + { + p1.push_back(kp1[m[0].queryIdx].pt); + p2.push_back(kp2[m[0].trainIdx].pt); + distances.push_back(m[0].distance); + } + } + vector status; + vector< pair > pointPairs; + Mat H = findHomography(p1, p2, status, RANSAC); + int inliers = 0; + for (size_t i = 0; i < status.size(); i++) + { + if (status[i]) + { + pointPairs.push_back(make_pair(p1[i], p2[i])); + distances[inliers] = distances[i]; + // CV_Assert(inliers <= (int)i); + inliers++; + } + } + distances.resize(inliers); + + cout << "execution time: " << fixed << setprecision(2) << (timer()-start)*1000 << " ms" << endl; + cout << inliers << " / " << status.size() << " inliers/matched" << endl; + + cout << "visualizing..." << endl; + vector indices(inliers); + cv::sortIdx(distances, indices, SORT_EVERY_ROW+SORT_ASCENDING); + + // explore_match + int h1 = img1.size().height; + int w1 = img1.size().width; + int h2 = img2.size().height; + int w2 = img2.size().width; + Mat vis = Mat::zeros(max(h1, h2), w1+w2, CV_8U); + img1.copyTo(Mat(vis, Rect(0, 0, w1, h1))); + img2.copyTo(Mat(vis, Rect(w1, 0, w2, h2))); + cvtColor(vis, vis, COLOR_GRAY2BGR); + + vector corners(4); + corners[0] = Point2f(0, 0); + corners[1] = Point2f((float)w1, 0); + corners[2] = Point2f((float)w1, (float)h1); + corners[3] = Point2f(0, (float)h1); + vector icorners; + perspectiveTransform(corners, corners, H); + transform(corners, corners, Matx23f(1,0,(float)w1,0,1,0)); + Mat(corners).convertTo(icorners, CV_32S); + polylines(vis, icorners, true, Scalar(255,255,255)); + + for (int i = 0; i < min(inliers, maxlines); i++) + { + int idx = indices[i]; + const Point2f& pi1 = pointPairs[idx].first; + const Point2f& pi2 = pointPairs[idx].second; + circle(vis, pi1, 2, Scalar(0,255,0), -1); + circle(vis, pi2 + Point2f((float)w1,0), 2, Scalar(0,255,0), -1); + line(vis, pi1, pi2 + Point2f((float)w1,0), Scalar(0,255,0)); + } + if (inliers > maxlines) + cout << "only " << maxlines << " inliers are visualized" << endl; + imshow("affine find_obj", vis); + + // Mat vis2 = Mat::zeros(max(h1, h2), w1+w2, CV_8U); + // Mat warp1; + // warpPerspective(img1, warp1, H, Size(w1, h1)); + // warp1.copyTo(Mat(vis2, Rect(0, 0, w1, h1))); + // img2.copyTo(Mat(vis2, Rect(w1, 0, w2, h2))); + // imshow("warped", vis2); + + waitKey(); + cout << "done" << endl; + return 0; +} diff --git a/samples/cpp/warpPerspective_demo.cpp b/samples/cpp/warpPerspective_demo.cpp index 4a9069f5d2..947abd4359 100644 --- a/samples/cpp/warpPerspective_demo.cpp +++ b/samples/cpp/warpPerspective_demo.cpp @@ -8,7 +8,6 @@ #include "opencv2/imgproc.hpp" #include "opencv2/imgcodecs.hpp" #include "opencv2/highgui.hpp" -#include "opencv2/calib3d.hpp" #include using namespace std; @@ -36,6 +35,7 @@ Mat warping(Mat image, Size warped_image_size, vector< Point2f> srcPoints, vecto String windowTitle = "Perspective Transformation Demo"; String labels[4] = { "TL","TR","BR","BL" }; vector< Point2f> roi_corners; +vector< Point2f> midpoints(4); vector< Point2f> dst_corners(4); int roiIndex = 0; bool dragging; @@ -99,21 +99,26 @@ int main(int argc, char** argv) imshow( windowTitle, image ); + midpoints[0] = (roi_corners[0] + roi_corners[1]) / 2; + midpoints[1] = (roi_corners[1] + roi_corners[2]) / 2; + midpoints[2] = (roi_corners[2] + roi_corners[3]) / 2; + midpoints[3] = (roi_corners[3] + roi_corners[0]) / 2; + dst_corners[0].x = 0; dst_corners[0].y = 0; - dst_corners[1].x = (float)std::max(norm(roi_corners[0] - roi_corners[1]), norm(roi_corners[2] - roi_corners[3])); + dst_corners[1].x = (float)norm(midpoints[1] - midpoints[3]); dst_corners[1].y = 0; - dst_corners[2].x = (float)std::max(norm(roi_corners[0] - roi_corners[1]), norm(roi_corners[2] - roi_corners[3])); - dst_corners[2].y = (float)std::max(norm(roi_corners[1] - roi_corners[2]), norm(roi_corners[3] - roi_corners[0])); + dst_corners[2].x = dst_corners[1].x; + dst_corners[2].y = (float)norm(midpoints[0] - midpoints[2]); dst_corners[3].x = 0; - dst_corners[3].y = (float)std::max(norm(roi_corners[1] - roi_corners[2]), norm(roi_corners[3] - roi_corners[0])); + dst_corners[3].y = dst_corners[2].y; Size warped_image_size = Size(cvRound(dst_corners[2].x), cvRound(dst_corners[2].y)); - Mat H = findHomography(roi_corners, dst_corners); //get homography + Mat M = getPerspectiveTransform(roi_corners, dst_corners); Mat warped_image; - warpPerspective(original_image, warped_image, H, warped_image_size); // do perspective transformation + warpPerspective(original_image, warped_image, M, warped_image_size); // do perspective transformation imshow("Warped Image", warped_image); } diff --git a/samples/data/opencv-logo-white.png b/samples/data/opencv-logo-white.png index 3c7098459e..a683e3569f 100644 Binary files a/samples/data/opencv-logo-white.png and b/samples/data/opencv-logo-white.png differ diff --git a/samples/data/opencv-logo.png b/samples/data/opencv-logo.png index bc71a2ae50..995bdc0fa6 100644 Binary files a/samples/data/opencv-logo.png and b/samples/data/opencv-logo.png differ diff --git a/samples/winrt/ImageManipulations/assets/StoreLogo.png b/samples/winrt/ImageManipulations/assets/StoreLogo.png index af64bf00ad..0fb23ff2cf 100644 Binary files a/samples/winrt/ImageManipulations/assets/StoreLogo.png and b/samples/winrt/ImageManipulations/assets/StoreLogo.png differ diff --git a/samples/winrt/ImageManipulations/assets/opencv-logo-150.png b/samples/winrt/ImageManipulations/assets/opencv-logo-150.png index ea685d651a..8f447ad30c 100644 Binary files a/samples/winrt/ImageManipulations/assets/opencv-logo-150.png and b/samples/winrt/ImageManipulations/assets/opencv-logo-150.png differ diff --git a/samples/winrt/ImageManipulations/assets/opencv-logo-30.png b/samples/winrt/ImageManipulations/assets/opencv-logo-30.png index efaf5468a1..449be5858e 100644 Binary files a/samples/winrt/ImageManipulations/assets/opencv-logo-30.png and b/samples/winrt/ImageManipulations/assets/opencv-logo-30.png differ diff --git a/samples/winrt/ImageManipulations/assets/windows-sdk.scale-100.png b/samples/winrt/ImageManipulations/assets/windows-sdk.scale-100.png index af64bf00ad..0fb23ff2cf 100644 Binary files a/samples/winrt/ImageManipulations/assets/windows-sdk.scale-100.png and b/samples/winrt/ImageManipulations/assets/windows-sdk.scale-100.png differ diff --git a/samples/winrt/JavaScript/images/logo.scale-100.png b/samples/winrt/JavaScript/images/logo.scale-100.png index ea685d651a..8f447ad30c 100644 Binary files a/samples/winrt/JavaScript/images/logo.scale-100.png and b/samples/winrt/JavaScript/images/logo.scale-100.png differ diff --git a/samples/winrt/JavaScript/images/smalllogo.scale-100.png b/samples/winrt/JavaScript/images/smalllogo.scale-100.png index efaf5468a1..f2d1877348 100644 Binary files a/samples/winrt/JavaScript/images/smalllogo.scale-100.png and b/samples/winrt/JavaScript/images/smalllogo.scale-100.png differ diff --git a/samples/winrt/JavaScript/images/windows-sdk.png b/samples/winrt/JavaScript/images/windows-sdk.png index af64bf00ad..0fb23ff2cf 100644 Binary files a/samples/winrt/JavaScript/images/windows-sdk.png and b/samples/winrt/JavaScript/images/windows-sdk.png differ diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/Logo.png b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/Logo.png index ea685d651a..8f447ad30c 100644 Binary files a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/Logo.png and b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/Logo.png differ diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/SmallLogo.png b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/SmallLogo.png index efaf5468a1..449be5858e 100644 Binary files a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/SmallLogo.png and b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/SmallLogo.png differ diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/StoreLogo.png b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/StoreLogo.png index af64bf00ad..0fb23ff2cf 100644 Binary files a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/StoreLogo.png and b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Assets/StoreLogo.png differ