From 86561fef906344f17137ec7a45220b4528579456 Mon Sep 17 00:00:00 2001 From: Allan Rodriguez Date: Thu, 4 Apr 2019 16:11:42 -0700 Subject: [PATCH 01/22] Added C++ version of digits.py. --- samples/cpp/digits.cpp | 375 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 samples/cpp/digits.cpp diff --git a/samples/cpp/digits.cpp b/samples/cpp/digits.cpp new file mode 100644 index 0000000000..e3ef7fc59f --- /dev/null +++ b/samples/cpp/digits.cpp @@ -0,0 +1,375 @@ +#include "opencv2/core.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgcodecs.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/ml.hpp" + +#include +#include +#include + +using namespace cv; +using namespace std; + +const int SZ = 20; // size of each digit is SZ x SZ +const int CLASS_N = 10; +const char* DIGITS_FN = "digits.png"; + +static void help() +{ + cout << + "\n" + "SVM and KNearest digit recognition.\n" + "\n" + "Sample loads a dataset of handwritten digits from 'digits.png'.\n" + "Then it trains a SVM and KNearest classifiers on it and evaluates\n" + "their accuracy.\n" + "\n" + "Following preprocessing is applied to the dataset:\n" + " - Moment-based image deskew (see deskew())\n" + " - Digit images are split into 4 10x10 cells and 16-bin\n" + " histogram of oriented gradients is computed for each\n" + " cell\n" + " - Transform histograms to space with Hellinger metric (see [1] (RootSIFT))\n" + "\n" + "\n" + "[1] R. Arandjelovic, A. Zisserman\n" + " \"Three things everyone should know to improve object retrieval\"\n" + " http://www.robots.ox.ac.uk/~vgg/publications/2012/Arandjelovic12/arandjelovic12.pdf\n" + "\n" + "Usage:\n" + " ./digits\n" << endl; +} + +static void split2d(const Mat& image, const Size cell_size, vector& cells) +{ + int height = image.rows; + int width = image.cols; + + int sx = cell_size.width; + int sy = cell_size.height; + + cells.clear(); + + for (int i = 0; i < height; i += sy) + { + for (int j = 0; j < width; j += sx) + { + cells.push_back(image(Rect(j, i, sx, sy))); + } + } +} + +static void load_digits(const char* fn, vector& digits, vector& labels) +{ + digits.clear(); + labels.clear(); + + String filename = samples::findFile(fn); + + cout << "Loading " << filename << " ..." << endl; + + Mat digits_img = imread(filename, IMREAD_GRAYSCALE); + split2d(digits_img, Size(SZ, SZ), digits); + + for (int i = 0; i < CLASS_N; i++) + { + for (size_t j = 0; j < digits.size() / CLASS_N; j++) + { + labels.push_back(i); + } + } +} + +static void deskew(const Mat& img, Mat& deskewed_img) +{ + Moments m = moments(img); + + if (abs(m.mu02) < 0.01) + { + deskewed_img = img.clone(); + return; + } + + float skew = (float)(m.mu11 / m.mu02); + float M_vals[2][3] = {{1, skew, -0.5f * SZ * skew}, {0, 1, 0}}; + Mat M(Size(3, 2), CV_32F); + + for (int i = 0; i < M.rows; i++) + { + for (int j = 0; j < M.cols; j++) + { + M.at(i, j) = M_vals[i][j]; + } + } + + warpAffine(img, deskewed_img, M, Size(SZ, SZ), WARP_INVERSE_MAP | INTER_LINEAR); +} + +static void mosaic(const int width, const vector& images, Mat& grid) +{ + int mat_width = SZ * width; + int mat_height = SZ * (int)ceil((double)images.size() / width); + + if (!images.empty()) + { + grid = Mat(Size(mat_width, mat_height), images[0].type()); + + for (size_t i = 0; i < images.size(); i++) + { + Mat location_on_grid = grid(Rect(SZ * ((int)i % width), SZ * ((int)i / width), SZ, SZ)); + images[i].copyTo(location_on_grid); + } + } +} + +static void evaluate_model(const vector& predictions, const vector& digits, const vector& labels, Mat& mos) +{ + double err = 0; + + for (size_t i = 0; i < predictions.size(); i++) + { + if ((int)predictions[i] != labels[i]) + { + err++; + } + } + + err /= predictions.size(); + + cout << format("error: %.2f %%", err * 100) << endl; + + int confusion[10][10] = {}; + + for (size_t i = 0; i < labels.size(); i++) + { + confusion[labels[i]][(int)predictions[i]]++; + } + + cout << "confusion matrix:" << endl; + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 10; j++) + { + cout << format("%2d ", confusion[i][j]); + } + cout << endl; + } + + cout << endl; + + vector vis; + + for (size_t i = 0; i < digits.size(); i++) + { + Mat img; + cvtColor(digits[i], img, COLOR_GRAY2BGR); + + if ((int)predictions[i] != labels[i]) + { + for (int j = 0; j < img.rows; j++) + { + for (int k = 0; k < img.cols; k++) + { + img.at(j, k)[0] = 0; + img.at(j, k)[1] = 0; + } + } + } + + vis.push_back(img); + } + + mosaic(25, vis, mos); +} + +static void bincount(const Mat& x, const Mat& weights, const int min_length, vector& bins) +{ + double max_x_val = 0; + minMaxLoc(x, NULL, &max_x_val); + + bins = vector(max((int)max_x_val, min_length)); + + for (int i = 0; i < x.rows; i++) + { + for (int j = 0; j < x.cols; j++) + { + bins[x.at(i, j)] += weights.at(i, j); + } + } +} + +static void preprocess_hog(const vector& digits, Mat& hog) +{ + int bin_n = 16; + int half_cell = SZ / 2; + double eps = 1e-7; + + hog = Mat(Size(4 * bin_n, (int)digits.size()), CV_32F); + + for (size_t img_index = 0; img_index < digits.size(); img_index++) + { + Mat gx; + Sobel(digits[img_index], gx, CV_32F, 1, 0); + + Mat gy; + Sobel(digits[img_index], gy, CV_32F, 0, 1); + + Mat mag; + Mat ang; + cartToPolar(gx, gy, mag, ang); + + Mat bin(ang.size(), CV_32S); + + for (int i = 0; i < ang.rows; i++) + { + for (int j = 0; j < ang.cols; j++) + { + bin.at(i, j) = (int)(bin_n * ang.at(i, j) / (2 * CV_PI)); + } + } + + Mat bin_cells[] = { + bin(Rect(0, 0, half_cell, half_cell)), + bin(Rect(half_cell, 0, half_cell, half_cell)), + bin(Rect(0, half_cell, half_cell, half_cell)), + bin(Rect(half_cell, half_cell, half_cell, half_cell)) + }; + Mat mag_cells[] = { + mag(Rect(0, 0, half_cell, half_cell)), + mag(Rect(half_cell, 0, half_cell, half_cell)), + mag(Rect(0, half_cell, half_cell, half_cell)), + mag(Rect(half_cell, half_cell, half_cell, half_cell)) + }; + + vector hist; + hist.reserve(4 * bin_n); + + for (int i = 0; i < 4; i++) + { + vector partial_hist; + bincount(bin_cells[i], mag_cells[i], bin_n, partial_hist); + hist.insert(hist.end(), partial_hist.begin(), partial_hist.end()); + } + + // transform to Hellinger kernel + double sum = 0; + + for (size_t i = 0; i < hist.size(); i++) + { + sum += hist[i]; + } + + for (size_t i = 0; i < hist.size(); i++) + { + hist[i] /= sum + eps; + hist[i] = sqrt(hist[i]); + } + + double hist_norm = norm(hist); + + for (size_t i = 0; i < hist.size(); i++) + { + hog.at((int)img_index, (int)i) = (float)(hist[i] / (hist_norm + eps)); + } + } +} + +static void shuffle(vector& digits, vector& labels) +{ + vector shuffled_indexes(digits.size()); + + for (size_t i = 0; i < digits.size(); i++) + { + shuffled_indexes[i] = (int)i; + } + + randShuffle(shuffled_indexes); + + vector shuffled_digits(digits.size()); + vector shuffled_labels(labels.size()); + + for (size_t i = 0; i < shuffled_indexes.size(); i++) + { + shuffled_digits[shuffled_indexes[i]] = digits[i]; + shuffled_labels[shuffled_indexes[i]] = labels[i]; + } + + digits = shuffled_digits; + labels = shuffled_labels; +} + +int main() +{ + help(); + + vector digits; + vector labels; + + load_digits(DIGITS_FN, digits, labels); + + cout << "preprocessing..." << endl; + + // shuffle digits + shuffle(digits, labels); + + vector digits2; + + for (size_t i = 0; i < digits.size(); i++) + { + Mat deskewed_digit; + deskew(digits[i], deskewed_digit); + digits2.push_back(deskewed_digit); + } + + Mat samples; + + preprocess_hog(digits2, samples); + + int train_n = (int)(0.9 * samples.rows); + Mat test_set; + + vector digits_test(digits2.begin() + train_n, digits2.end()); + mosaic(25, digits_test, test_set); + imshow("test set", test_set); + + Mat samples_train = samples(Rect(0, 0, samples.cols, train_n)); + Mat samples_test = samples(Rect(0, train_n, samples.cols, samples.rows - train_n)); + vector labels_train(labels.begin(), labels.begin() + train_n); + vector labels_test(labels.begin() + train_n, labels.end()); + + Ptr k_nearest; + Ptr svm; + vector predictions; + Mat vis; + + cout << "training KNearest..." << endl; + k_nearest = ml::KNearest::create(); + k_nearest->train(samples_train, ml::ROW_SAMPLE, labels_train); + + // predict digits with KNearest + k_nearest->findNearest(samples_test, 4, predictions); + evaluate_model(predictions, digits_test, labels_test, vis); + imshow("KNearest test", vis); + k_nearest.release(); + + cout << "training SVM..." << endl; + svm = ml::SVM::create(); + svm->setGamma(5.383); + svm->setC(2.67); + svm->setKernel(ml::SVM::RBF); + svm->setType(ml::SVM::C_SVC); + svm->train(samples_train, ml::ROW_SAMPLE, labels_train); + + // predict digits with SVM + svm->predict(samples_test, predictions); + evaluate_model(predictions, digits_test, labels_test, vis); + imshow("SVM test", vis); + cout << "Saving SVM as \"digits_svm.yml\"..." << endl; + svm->save("digits_svm.yml"); + svm.release(); + + waitKey(); + + return 0; +} From 00861b668a548c4546e56fa3becd0c0a00fcd0de Mon Sep 17 00:00:00 2001 From: vdgussem Date: Tue, 16 Apr 2019 01:38:59 +0200 Subject: [PATCH 02/22] Changed the way Images are converted into rgba Mats so that JavaCamera2View also shows correct colors when the Image contains I420 or YV12 data --- .../org/opencv/android/JavaCamera2View.java | 105 +++++++++++------- 1 file changed, 66 insertions(+), 39 deletions(-) diff --git a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java index 045917edab..cdb03a7108 100644 --- a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java +++ b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java @@ -181,17 +181,7 @@ public class JavaCamera2View extends CameraBridgeViewBase { assert (planes.length == 3); assert (image.getFormat() == mPreviewFormat); - // see also https://developer.android.com/reference/android/graphics/ImageFormat.html#YUV_420_888 - // Y plane (0) non-interleaved => stride == 1; U/V plane interleaved => stride == 2 - assert (planes[0].getPixelStride() == 1); - assert (planes[1].getPixelStride() == 2); - assert (planes[2].getPixelStride() == 2); - - ByteBuffer y_plane = planes[0].getBuffer(); - ByteBuffer uv_plane = planes[1].getBuffer(); - Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane); - Mat uv_mat = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane); - JavaCamera2Frame tempFrame = new JavaCamera2Frame(y_mat, uv_mat, w, h); + JavaCamera2Frame tempFrame = new JavaCamera2Frame(image); deliverAndDrawFrame(tempFrame); tempFrame.release(); image.close(); @@ -334,50 +324,87 @@ public class JavaCamera2View extends CameraBridgeViewBase { private class JavaCamera2Frame implements CvCameraViewFrame { @Override public Mat gray() { - return mYuvFrameData.submat(0, mHeight, 0, mWidth); + Image.Plane[] planes = mImage.getPlanes(); + int w = mImage.getWidth(); + int h = mImage.getHeight(); + ByteBuffer y_plane = planes[0].getBuffer(); + mGray = new Mat(h, w, CvType.CV_8UC1, y_plane); + return mGray; } @Override public Mat rgba() { - if (mPreviewFormat == ImageFormat.NV21) - Imgproc.cvtColor(mYuvFrameData, mRgba, Imgproc.COLOR_YUV2RGBA_NV21, 4); - else if (mPreviewFormat == ImageFormat.YV12) - Imgproc.cvtColor(mYuvFrameData, mRgba, Imgproc.COLOR_YUV2RGB_I420, 4); // COLOR_YUV2RGBA_YV12 produces inverted colors - else if (mPreviewFormat == ImageFormat.YUV_420_888) { - assert (mUVFrameData != null); - Imgproc.cvtColorTwoPlane(mYuvFrameData, mUVFrameData, mRgba, Imgproc.COLOR_YUV2RGBA_NV21); - } else - throw new IllegalArgumentException("Preview Format can be NV21 or YV12"); + Image.Plane[] planes = mImage.getPlanes(); + int w = mImage.getWidth(); + int h = mImage.getHeight(); + int chromaPixelStride = planes[1].getPixelStride(); - return mRgba; + + if (chromaPixelStride == 2) { // Chroma channels are interleaved + ByteBuffer y_plane = planes[0].getBuffer(); + ByteBuffer uv_plane = planes[1].getBuffer(); + Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane); + Mat uv_mat = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane); + Imgproc.cvtColorTwoPlane(y_mat, uv_mat, mRgba, Imgproc.COLOR_YUV2RGBA_NV21); + return mRgba; + } else { // Chroma channels are not interleaved + byte[] yuv_bytes = new byte[w*(h+h/2)]; + ByteBuffer y_plane = planes[0].getBuffer(); + ByteBuffer u_plane = planes[1].getBuffer(); + ByteBuffer v_plane = planes[2].getBuffer(); + + y_plane.get(yuv_bytes, 0, w*h); + + int chromaRowStride = planes[1].getRowStride(); + int chromaRowPadding = chromaRowStride - w/2; + + int offset = w*h; + if (chromaRowPadding == 0){ + // When the row stride of the chroma channels equals their width, we can copy + // the entire channels in one go + u_plane.get(yuv_bytes, offset, w*h/4); + offset += w*h/4; + v_plane.get(yuv_bytes, offset, w*h/4); + } else { + // When not equal, we need to copy the channels row by row + for (int i = 0; i < h/2; i++){ + u_plane.get(yuv_bytes, offset, w/2); + offset += w/2; + if (i < h/2-1){ + u_plane.position(u_plane.position() + chromaRowPadding); + } + } + for (int i = 0; i < h/2; i++){ + v_plane.get(yuv_bytes, offset, w/2); + offset += w/2; + if (i < h/2-1){ + v_plane.position(v_plane.position() + chromaRowPadding); + } + } + } + + Mat yuv_mat = new Mat(h+h/2, w, CvType.CV_8UC1); + yuv_mat.put(0, 0, yuv_bytes); + Imgproc.cvtColor(yuv_mat, mRgba, Imgproc.COLOR_YUV2RGBA_I420, 4); + return mRgba; + } } - public JavaCamera2Frame(Mat Yuv420sp, int width, int height) { - super(); - mWidth = width; - mHeight = height; - mYuvFrameData = Yuv420sp; - mUVFrameData = null; - mRgba = new Mat(); - } - public JavaCamera2Frame(Mat Y, Mat UV, int width, int height) { + public JavaCamera2Frame(Image image) { super(); - mWidth = width; - mHeight = height; - mYuvFrameData = Y; - mUVFrameData = UV; + mImage = image; mRgba = new Mat(); + mGray = new Mat(); } public void release() { mRgba.release(); + mGray.release(); } - private Mat mYuvFrameData; - private Mat mUVFrameData; + private Image mImage; private Mat mRgba; - private int mWidth; - private int mHeight; + private Mat mGray; }; } From c1981f28ad98e933d5a89ca1ae36ff7008175d08 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 18 Apr 2019 18:50:52 +0000 Subject: [PATCH 03/22] build: +OPENCV_ENABLE_MEMORY_SANITIZER flag --- CMakeLists.txt | 1 + cmake/OpenCVCompilerOptions.cmake | 3 +++ modules/core/include/opencv2/core/utility.hpp | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 759de6ece6..d177730006 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -470,6 +470,7 @@ OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers" OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) ) OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF IF CV_GCC ) OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC ) +OCV_OPTION(OPENCV_ENABLE_MEMORY_SANITIZER "Better support for memory/address sanitizers" OFF) OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) ) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index ee67599053..fc29d279c4 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -381,4 +381,7 @@ macro(ocv_add_modules_compiler_options) string(REPLACE "/W3" "/W4" ${flags} "${${flags}}") endforeach() endif() + if(OPENCV_ENABLE_MEMORY_SANITIZER) + add_definitions(-DOPENCV_ENABLE_MEMORY_SANITIZER=1) + endif() endmacro() diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 7a7158f5f9..cbec10b41b 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -116,7 +116,11 @@ CV_EXPORTS void setUseCollection(bool flag); // set implementation collection st } \endcode */ +#ifdef OPENCV_ENABLE_MEMORY_SANITIZER +template class AutoBuffer +#else template class AutoBuffer +#endif { public: typedef _Tp value_type; From 7d2190a5d4e0b16a4f2fa9dbf3cb154e15db42c6 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 13 Apr 2019 15:46:38 +0000 Subject: [PATCH 04/22] ts: +ENV{OPENCV_TEST_REQUIRE_DATA} similar to '--test_require_data' parameter --- modules/ts/src/ts.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index 810825bca6..3c3f0740d7 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -40,6 +40,8 @@ //M*/ #include "precomp.hpp" +#include + #include "opencv2/core/core_c.h" #include @@ -741,7 +743,7 @@ void checkIppStatus() } } -static bool checkTestData = false; +static bool checkTestData = cv::utils::getConfigurationParameterBool("OPENCV_TEST_REQUIRE_DATA", false); bool skipUnstableTests = false; bool runBigDataTests = false; int testThreads = 0; @@ -828,16 +830,16 @@ void testTearDown() void parseCustomOptions(int argc, char **argv) { - const char * const command_line_keys = + const string command_line_keys = string( "{ ipp test_ipp_check |false |check whether IPP works without failures }" "{ test_seed |809564 |seed for random numbers generator }" "{ test_threads |-1 |the number of worker threads, if parallel execution is enabled}" "{ skip_unstable |false |skip unstable tests }" "{ test_bigdata |false |run BigData tests (>=2Gb) }" - "{ test_require_data |false |fail on missing non-required test data instead of skip}" + "{ test_require_data |") + (checkTestData ? "true" : "false") + string("|fail on missing non-required test data instead of skip (env:OPENCV_TEST_REQUIRE_DATA)}" CV_TEST_TAGS_PARAMS "{ h help |false |print help info }" - ; + ); cv::CommandLineParser parser(argc, argv, command_line_keys); if (parser.get("help")) @@ -860,7 +862,8 @@ void parseCustomOptions(int argc, char **argv) skipUnstableTests = parser.get("skip_unstable"); runBigDataTests = parser.get("test_bigdata"); - checkTestData = parser.get("test_require_data"); + if (parser.has("test_require_data")) + checkTestData = parser.get("test_require_data"); activateTestTags(parser); } From ec39f0dc9eb51c4c85750088f8b0ae48d9932f60 Mon Sep 17 00:00:00 2001 From: Sheel-Patel <43507241+Sheel-Patel@users.noreply.github.com> Date: Tue, 23 Apr 2019 15:45:27 -0700 Subject: [PATCH 05/22] Merge pull request #14122 from Sheel-Patel:patch-1 Grammatical errors for help() in detect_mser.cpp (#14122) * Grammatical errors for help() statement Corrected spelling of "synthetic" and added grammatical clarification for keys to press to change view or use mouse. * Adjustment of superfluous spaces --- samples/cpp/detect_mser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/cpp/detect_mser.cpp b/samples/cpp/detect_mser.cpp index 874978944c..5ab15fe98d 100644 --- a/samples/cpp/detect_mser.cpp +++ b/samples/cpp/detect_mser.cpp @@ -36,11 +36,11 @@ using namespace cv; static void help() { - cout << "\n This program demonstrates how to use MSER to detect extremal regions \n" - "Usage: \n" - " ./detect_mser \n" + cout << "\nThis program demonstrates how to use MSER to detect extremal regions\n" + "Usage:\n" + " ./detect_mser \n" "Press esc key when image window is active to change descriptor parameter\n" - "Press 2, 8, 4, 6, +,- or 5 keys in openGL windows to change view or use mouse\n"; + "Press 2, 8, 4, 6, +, -, or 5 keys in openGL windows to change view or use mouse\n"; } struct MSERParams From 2f7fc1a598e87eb5fb013f57655d664416605130 Mon Sep 17 00:00:00 2001 From: Thang Tran Date: Tue, 23 Apr 2019 16:53:17 +0200 Subject: [PATCH 06/22] calib3d: add find4QuadCornerSubpix java wrapper Fixes: https://github.com/opencv/opencv/issues/14169 --- modules/calib3d/include/opencv2/calib3d.hpp | 2 +- modules/calib3d/misc/java/test/Calib3dTest.java | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 3808526631..53b23d6035 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -849,7 +849,7 @@ CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, Out int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE ); //! finds subpixel-accurate positions of the chessboard corners -CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size ); +CV_EXPORTS_W bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size ); /** @brief Renders the detected chessboard corners. diff --git a/modules/calib3d/misc/java/test/Calib3dTest.java b/modules/calib3d/misc/java/test/Calib3dTest.java index 67193d9586..99153dfb21 100644 --- a/modules/calib3d/misc/java/test/Calib3dTest.java +++ b/modules/calib3d/misc/java/test/Calib3dTest.java @@ -188,6 +188,15 @@ public class Calib3dTest extends OpenCVTestCase { assertTrue(!corners.empty()); } + public void testFind4QuadCornerSubpix() { + Size patternSize = new Size(9, 6); + MatOfPoint2f corners = new MatOfPoint2f(); + Size region_size = new Size(5, 5); + Calib3d.findChessboardCorners(grayChess, patternSize, corners); + Calib3d.find4QuadCornerSubpix(grayChess, corners, region_size); + assertTrue(!corners.empty()); + } + public void testFindCirclesGridMatSizeMat() { int size = 300; Mat img = new Mat(size, size, CvType.CV_8U); From 520e3514896823a758ae1c6a305566bd997660c8 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 24 Apr 2019 12:08:49 +0300 Subject: [PATCH 07/22] Fix batching in DetectionOutput layer --- .../dnn/src/layers/detection_output_layer.cpp | 7 +-- modules/dnn/test/test_caffe_importer.cpp | 54 +++++++++++-------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index e095e72dfd..5c413df00b 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -206,8 +206,9 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { + const int num = inputs[0][0]; CV_Assert(inputs.size() >= 3); - CV_Assert(inputs[0][0] == inputs[1][0]); + CV_Assert(num == inputs[1][0]); int numPriors = inputs[2][2] / 4; CV_Assert((numPriors * _numLocClasses * 4) == total(inputs[0], 1)); @@ -216,10 +217,10 @@ public: // num() and channels() are 1. // Since the number of bboxes to be kept is unknown before nms, we manually - // set it to maximal number of detections, [keep_top_k] parameter. + // set it to maximal number of detections, [keep_top_k] parameter multiplied by batch size. // Each row is a 7 dimension std::vector, which stores // [image_id, label, confidence, xmin, ymin, xmax, ymax] - outputs.resize(1, shape(1, 1, _keepTopK, 7)); + outputs.resize(1, shape(1, 1, _keepTopK * num, 7)); return false; } diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index b73aa43bad..dc981233f1 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -207,60 +207,72 @@ TEST(Reproducibility_SSD, Accuracy) normAssertDetections(ref, out); } -typedef testing::TestWithParam Reproducibility_MobileNet_SSD; +typedef testing::TestWithParam > Reproducibility_MobileNet_SSD; TEST_P(Reproducibility_MobileNet_SSD, Accuracy) { const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false); const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false); Net net = readNetFromCaffe(proto, model); - int targetId = GetParam(); - const float l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 1.5e-4 : 1e-5; - const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-4; + int backendId = get<0>(GetParam()); + int targetId = get<1>(GetParam()); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backendId); net.setPreferableTarget(targetId); Mat sample = imread(_tf("street.png")); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); net.setInput(inp); - Mat out = net.forward(); + Mat out = net.forward().clone(); - const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-5; - const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-3 : 1e-4; + const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5; + const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4; Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); - normAssertDetections(ref, out, "", 0.0, scores_diff, boxes_iou_diff); + normAssertDetections(ref, out, "", FLT_MIN, scores_diff, boxes_iou_diff); // Check that detections aren't preserved. inp.setTo(0.0f); net.setInput(inp); - out = net.forward(); - out = out.reshape(1, out.total() / 7); + Mat zerosOut = net.forward(); + zerosOut = zerosOut.reshape(1, zerosOut.total() / 7); - const int numDetections = out.rows; + const int numDetections = zerosOut.rows; ASSERT_NE(numDetections, 0); for (int i = 0; i < numDetections; ++i) { - float confidence = out.ptr(i)[2]; + float confidence = zerosOut.ptr(i)[2]; ASSERT_EQ(confidence, 0); } + // There is something wrong with Reshape layer in Myriad plugin and + // regression with DLIE/OCL_FP16 target. + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + { + if ((targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2) || + targetId == DNN_TARGET_OPENCL_FP16) + return; + } + // Check batching mode. - ref = ref.reshape(1, numDetections); inp = blobFromImages(std::vector(2, sample), 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); net.setInput(inp); Mat outBatch = net.forward(); // Output blob has a shape 1x1x2Nx7 where N is a number of detection for // a single sample in batch. The first numbers of detection vectors are batch id. - outBatch = outBatch.reshape(1, outBatch.total() / 7); - EXPECT_EQ(outBatch.rows, 2 * numDetections); - normAssert(outBatch.rowRange(0, numDetections), ref, "", l1, lInf); - normAssert(outBatch.rowRange(numDetections, 2 * numDetections).colRange(1, 7), ref.colRange(1, 7), - "", l1, lInf); + // For Inference Engine backend there is -1 delimiter which points the end of detections. + const int numRealDetections = ref.size[2]; + EXPECT_EQ(outBatch.size[2], 2 * numDetections); + out = out.reshape(1, numDetections).rowRange(0, numRealDetections); + outBatch = outBatch.reshape(1, 2 * numDetections); + for (int i = 0; i < 2; ++i) + { + Mat pred = outBatch.rowRange(i * numRealDetections, (i + 1) * numRealDetections); + EXPECT_EQ(countNonZero(pred.col(0) != i), 0); + normAssert(pred.colRange(1, 7), out.colRange(1, 7)); + } } -INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD, - Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)); +INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD, dnnBackendsAndTargets()); typedef testing::TestWithParam Reproducibility_ResNet50; TEST_P(Reproducibility_ResNet50, Accuracy) From 45ced8e02290bac83363df46d1a6adfab969441a Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 24 Apr 2019 14:18:14 +0300 Subject: [PATCH 08/22] Fix ONNX deconvolution --- modules/dnn/src/onnx/onnx_importer.cpp | 8 ++++++++ modules/dnn/test/test_onnx_importer.cpp | 1 + 2 files changed, 9 insertions(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index f20f56547b..bd585bac84 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -622,6 +622,14 @@ void ONNXImporter::populateNet(Net dstNet) layerParams.set("adj_h", (outH - kernelH) % strideY); } } + else if (layerParams.has("output_padding")) + { + const DictValue& adj_pad = layerParams.get("output_padding"); + if (adj_pad.size() != 2) + CV_Error(Error::StsNotImplemented, "Deconvolution3D layer is not supported"); + layerParams.set("adj_w", adj_pad.get(1)); + layerParams.set("adj_h", adj_pad.get(0)); + } } else if (layer_type == "Transpose") { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 3500688696..992a9f31cf 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -100,6 +100,7 @@ TEST_P(Test_ONNX_layers, Deconvolution) testONNXModels("two_deconvolution"); testONNXModels("deconvolution_group"); testONNXModels("deconvolution_output_shape"); + testONNXModels("deconv_adjpad_2d"); } TEST_P(Test_ONNX_layers, Dropout) From 18d10d6b86d5e8e5591de16b484fa678711cbfa8 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Mon, 22 Apr 2019 11:19:08 +0300 Subject: [PATCH 09/22] Fixed v_reduce_sad intrinsics implementation and added tests --- .../core/include/opencv2/core/hal/intrin_avx.hpp | 8 ++++++-- .../core/include/opencv2/core/hal/intrin_sse.hpp | 7 ++++--- modules/core/test/test_intrin_utils.hpp | 16 ++++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 58db71467d..ba16feadea 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -1141,12 +1141,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b, inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b) { - return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(a.val, b.val)); + __m256i half = _mm256_sad_epu8(a.val, b.val); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); } inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b) { __m256i half = _mm256_set1_epi8(0x7f); - return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half))); + half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); } inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b) { diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index a5adad04c5..36499c0117 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min) inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) { - return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(a.val, b.val)); + __m128i half = _mm_sad_epu8(a.val, b.val); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); } inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) { __m128i half = _mm_set1_epi8(0x7f); - return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(_mm_add_epi8(a.val, half), - _mm_add_epi8(b.val, half))); + half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); } inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) { diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index b28929c582..3b85d68dea 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -770,6 +770,15 @@ template struct TheTest return *this; } + TheTest & test_reduce_sad() + { + Data dataA, dataB(R::nlanes/2); + R a = dataA; + R b = dataB; + EXPECT_EQ((unsigned)(R::nlanes*R::nlanes/4), v_reduce_sad(a, b)); + return *this; + } + TheTest & test_mask() { typedef typename V_RegTraits::int_reg int_reg; @@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8() .test_logic() .test_min_max() .test_absdiff() + .test_reduce_sad() .test_mask() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() @@ -1358,6 +1368,7 @@ void test_hal_intrin_int8() .test_absdiff() .test_absdiffs() .test_abs() + .test_reduce_sad() .test_mask() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() @@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16() .test_min_max() .test_absdiff() .test_reduce() + .test_reduce_sad() .test_mask() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() @@ -1418,6 +1430,7 @@ void test_hal_intrin_int16() .test_absdiffs() .test_abs() .test_reduce() + .test_reduce_sad() .test_mask() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() @@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32() .test_min_max() .test_absdiff() .test_reduce() + .test_reduce_sad() .test_mask() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() @@ -1473,6 +1487,7 @@ void test_hal_intrin_int32() .test_min_max() .test_absdiff() .test_reduce() + .test_reduce_sad() .test_mask() .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() .test_unpack() @@ -1528,6 +1543,7 @@ void test_hal_intrin_float32() .test_min_max() .test_float_absdiff() .test_reduce() + .test_reduce_sad() .test_mask() .test_unpack() .test_float_math() From a164a131f999c5089b3a769bcd9296d5d53a945e Mon Sep 17 00:00:00 2001 From: Adam Rankin Date: Wed, 24 Apr 2019 14:36:29 -0400 Subject: [PATCH 10/22] BUG: Qt5 components not correctly detected if only Qt5_DIR is provided, when this is supported by Qt CMake system. Fixed by updating Qt5 find_package call to use modern CMake syntax --- cmake/OpenCVFindLibsGUI.cmake | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index efabb3ce15..367ceb01b0 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -15,15 +15,11 @@ endif() ocv_clear_vars(HAVE_QT HAVE_QT5) if(WITH_QT) if(NOT WITH_QT EQUAL 4) - find_package(Qt5Core) - find_package(Qt5Gui) - find_package(Qt5Widgets) - find_package(Qt5Test) - find_package(Qt5Concurrent) - if(Qt5Core_FOUND AND Qt5Gui_FOUND AND Qt5Widgets_FOUND AND Qt5Test_FOUND AND Qt5Concurrent_FOUND) + find_package(Qt5 COMPONENTS Core Gui Widgets Test Concurrent REQUIRED NO_MODULE) + if(Qt5_FOUND) set(HAVE_QT5 ON) set(HAVE_QT ON) - find_package(Qt5OpenGL) + find_package(Qt5 COMPONENTS OpenGL QUIET) if(Qt5OpenGL_FOUND) set(QT_QTOPENGL_FOUND ON) endif() From 4f6be11c0e91882012c0f4231aa77640bfb42f4f Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 19 Apr 2019 14:54:08 +0300 Subject: [PATCH 11/22] Check if Inference Engine networks are fully supported by backend --- modules/dnn/src/layers/lrn_layer.cpp | 2 +- modules/dnn/test/test_backends.cpp | 22 +++++++++++++++++++-- modules/dnn/test/test_caffe_importer.cpp | 2 ++ modules/dnn/test/test_common.hpp | 25 ++++++++++++++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 18 +++++++++++------ modules/dnn/test/test_tf_importer.cpp | 6 ++++++ 6 files changed, 66 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 21daa9f65f..7c85fd36cc 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -91,7 +91,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { if (backendId == DNN_BACKEND_INFERENCE_ENGINE) - return bias == 1; + return bias == (int)bias; return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; } diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index e5c0f7a732..015d4e6c7c 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -45,7 +45,7 @@ public: netDefault.setInput(inp); Mat outDefault = netDefault.forward(outputLayer).clone(); - Net net = readNet(weights, proto); + net = readNet(weights, proto); net.setInput(inp); net.setPreferableBackend(backend); net.setPreferableTarget(target); @@ -94,6 +94,8 @@ public: else normAssert(ref, out, msg, l1, lInf); } + + Net net; }; TEST_P(DNNTestNetwork, AlexNet) @@ -103,6 +105,7 @@ TEST_P(DNNTestNetwork, AlexNet) Size(227, 227), "prob", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" : "dnn/halide_scheduler_alexnet.yml"); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, ResNet_50) @@ -112,6 +115,7 @@ TEST_P(DNNTestNetwork, ResNet_50) Size(224, 224), "prob", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" : "dnn/halide_scheduler_resnet_50.yml"); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, SqueezeNet_v1_1) @@ -120,6 +124,7 @@ TEST_P(DNNTestNetwork, SqueezeNet_v1_1) Size(227, 227), "prob", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" : "dnn/halide_scheduler_squeezenet_v1_1.yml"); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, GoogLeNet) @@ -127,6 +132,7 @@ TEST_P(DNNTestNetwork, GoogLeNet) applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB); processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt", Size(224, 224), "prob"); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, Inception_5h) @@ -142,6 +148,7 @@ TEST_P(DNNTestNetwork, Inception_5h) target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_inception_5h.yml" : "dnn/halide_scheduler_inception_5h.yml", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, ENet) @@ -168,6 +175,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : 0.0; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) @@ -185,7 +193,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) float diffSquares = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : 0.0; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", inp, "detection_out", "", diffScores, diffSquares); - + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow) @@ -200,6 +208,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow) float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.216 : 0.2; processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", inp, "detection_out", "", l1, lInf, detectionConfThresh); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height) @@ -217,6 +226,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height) float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0; processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", inp, "detection_out", "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow) @@ -230,6 +240,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow) float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0; processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt", inp, "detection_out", "", l1, lInf, 0.25); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, SSD_VGG16) @@ -244,6 +255,7 @@ TEST_P(DNNTestNetwork, SSD_VGG16) Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false); processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", "dnn/ssd_vgg16.prototxt", inp, "detection_out", "", scoreThreshold, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, OpenPose_pose_coco) @@ -262,6 +274,7 @@ TEST_P(DNNTestNetwork, OpenPose_pose_coco) const float lInf = (target == DNN_TARGET_MYRIAD) ? 0.072 : 0.0; processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt", Size(46, 46), "", "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, OpenPose_pose_mpi) @@ -280,6 +293,7 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi) const float lInf = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.16 : 0.0; processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt", Size(46, 46), "", "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages) @@ -296,6 +310,7 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages) // See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt", Size(46, 46)); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, OpenFace) @@ -324,6 +339,7 @@ TEST_P(DNNTestNetwork, opencv_face_detector) Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", inp, "detection_out"); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) @@ -342,6 +358,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0; processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", inp, "detection_out", "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, DenseNet_121) @@ -360,6 +377,7 @@ TEST_P(DNNTestNetwork, DenseNet_121) l1 = 0.1; lInf = 0.6; } processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index b73aa43bad..767b391323 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -405,6 +405,7 @@ TEST_P(Test_Caffe_nets, Colorization) l1 = 0.6; lInf = 15; } normAssert(out, ref, "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST_P(Test_Caffe_nets, DenseNet_121) @@ -436,6 +437,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121) l1 = 0.11; lInf = 0.5; } normAssert(out, ref, "", l1, lInf); + expectNoFallbacksFromIE(net); } TEST(Test_Caffe, multiple_inputs) diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index be8bd85d75..0904a4fe0b 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -110,6 +110,31 @@ public: } } + void expectNoFallbacks(Net& net) + { + // Check if all the layers are supported with current backend and target. + // Some layers might be fused so their timings equal to zero. + std::vector timings; + net.getPerfProfile(timings); + std::vector names = net.getLayerNames(); + CV_Assert(names.size() == timings.size()); + + for (int i = 0; i < names.size(); ++i) + { + Ptr l = net.getLayer(net.getLayerId(names[i])); + bool fused = !timings[i]; + if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused) + CV_Error(Error::StsNotImplemented, "Layer [" + l->name + "] of type [" + + l->type + "] is expected to has backend implementation"); + } + } + + void expectNoFallbacksFromIE(Net& net) + { + if (backend == DNN_BACKEND_INFERENCE_ENGINE) + expectNoFallbacks(net); + } + protected: void checkBackend(Mat* inp = 0, Mat* ref = 0) { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 992a9f31cf..af0c2fb06a 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -28,7 +28,8 @@ public: }; void testONNXModels(const String& basename, const Extension ext = npy, - const double l1 = 0, const float lInf = 0, const bool useSoftmax = false) + const double l1 = 0, const float lInf = 0, const bool useSoftmax = false, + bool checkNoFallbacks = true) { String onnxmodel = _tf("models/" + basename + ".onnx"); Mat inp, ref; @@ -67,6 +68,8 @@ public: ref = netSoftmax.forward(); } normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf); + if (checkNoFallbacks) + expectNoFallbacksFromIE(net); } }; @@ -96,11 +99,11 @@ TEST_P(Test_ONNX_layers, Two_convolution) TEST_P(Test_ONNX_layers, Deconvolution) { - testONNXModels("deconvolution"); - testONNXModels("two_deconvolution"); - testONNXModels("deconvolution_group"); - testONNXModels("deconvolution_output_shape"); - testONNXModels("deconv_adjpad_2d"); + testONNXModels("deconvolution", npy, 0, 0, false, false); + testONNXModels("two_deconvolution", npy, 0, 0, false, false); + testONNXModels("deconvolution_group", npy, 0, 0, false, false); + testONNXModels("deconvolution_output_shape", npy, 0, 0, false, false); + testONNXModels("deconv_adjpad_2d", npy, 0, 0, false, false); } TEST_P(Test_ONNX_layers, Dropout) @@ -199,6 +202,7 @@ TEST_P(Test_ONNX_layers, MultyInputs) Mat out = net.forward(); normAssert(ref, out, "", default_l1, default_lInf); + expectNoFallbacksFromIE(net); } TEST_P(Test_ONNX_layers, DynamicReshape) @@ -236,6 +240,7 @@ TEST_P(Test_ONNX_nets, Alexnet) Mat out = net.forward(); normAssert(out, ref, "", default_l1, default_lInf); + expectNoFallbacksFromIE(net); } TEST_P(Test_ONNX_nets, Squeezenet) @@ -268,6 +273,7 @@ TEST_P(Test_ONNX_nets, Googlenet) Mat out = net.forward(); normAssert(ref, out, "", default_l1, default_lInf); + expectNoFallbacksFromIE(net); } TEST_P(Test_ONNX_nets, CaffeNet) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 8b750bbb44..e3a5d51c1c 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -335,6 +335,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_SSD) double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0043 : default_l1; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.037 : default_lInf; normAssertDetections(ref, out, "", 0.2, scoreDiff, iouDiff); + expectNoFallbacksFromIE(net); } TEST_P(Test_TensorFlow_nets, Inception_v2_SSD) @@ -372,6 +373,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD) double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0097 : default_l1; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf; normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff); + expectNoFallbacksFromIE(net); } TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD) @@ -402,6 +404,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD) float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1.5e-5; float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 1e-3; normAssertDetections(ref, out, "", 0.3, scoreDiff, iouDiff); + expectNoFallbacksFromIE(net); } TEST_P(Test_TensorFlow_nets, Faster_RCNN) @@ -460,6 +463,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN) double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.048 : 1.1e-5; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.058 : default_lInf; normAssertDetections(ref, out, "", 0.45, scoreDiff, iouDiff); + expectNoFallbacksFromIE(net); } TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8) @@ -489,6 +493,7 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8) double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 3.4e-3; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.024 : 1e-2; normAssertDetections(ref, out, "", 0.9, scoreDiff, iouDiff); + expectNoFallbacksFromIE(net); } // inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png') @@ -553,6 +558,7 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection) } normAssert(scores, blobFromNPY(refScoresPath), "scores", l1_scores, lInf_scores); normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", l1_geometry, lInf_geometry); + expectNoFallbacksFromIE(net); } INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets()); From 90c34c5c17ccb9f4b7cd5973cda58da67ca9262b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 26 Apr 2019 15:13:16 +0300 Subject: [PATCH 12/22] cmake: fix pkg-config handling backporting commit c9f3f4d1d32ec065eb6431c7fbe78466e08a2fbe --- CMakeLists.txt | 2 +- cmake/OpenCVUtils.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 759de6ece6..4048378626 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -719,7 +719,7 @@ ocv_cmake_hook(POST_COMPILER_OPTIONS) # ---------------------------------------------------------------------------- if(UNIX) if(NOT APPLE_FRAMEWORK OR OPENCV_ENABLE_PKG_CONFIG) - if(CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{PKG_CONFIG_LIBDIR} + if(CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{PKG_CONFIG_LIBDIR} AND NOT DEFINED ENV{PKG_CONFIG_SYSROOT_DIR} AND NOT OPENCV_ENABLE_PKG_CONFIG ) if(NOT PkgConfig_FOUND) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index c530a4c983..f8ff242948 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -743,7 +743,7 @@ macro(ocv_check_modules define) endif() unset(${define}_${__modname}_FOUND) endforeach() - if(COMMAND pkg_check_modules) + if(PKG_CONFIG_FOUND OR PkgConfig_FOUND) pkg_check_modules(${define} ${ARGN}) endif() if(${define}_FOUND) From afb157df676a536125abcc58cf743cc6e9cb6c88 Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Sat, 27 Apr 2019 02:01:24 +0200 Subject: [PATCH 13/22] core:vsx fix sum of v_reduce_sad --- modules/core/include/opencv2/core/hal/intrin_vsx.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index c5ceb11324..390977b55e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -763,7 +763,7 @@ inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) { vec_ushort8 ad = vec_absd(a.val, b.val); - VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)), vec_int4_c(vec_unpacklu(ad))); + VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z); return (unsigned)vec_extract(sum, 3); } inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) From 61b928d9bdc3a5b2a4e9f7a00d66cd141c4cdbe2 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 29 Apr 2019 08:12:13 +0300 Subject: [PATCH 14/22] Add an assertion for Async request status --- modules/dnn/misc/python/test/test_dnn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 5cafbc531d..58f68856ea 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -209,8 +209,10 @@ class dnn_test(NewOpenCVTests): outs.insert(0, netAsync.forwardAsync()) for i in reversed(range(numInputs)): - if outs[i].wait_for(timeout) == 1: + ret = outs[i].wait_for(timeout) + if ret == 1: self.fail("Timeout") + self.assertEqual(ret, 0) # is ready normAssert(self, refs[i], outs[i].get(), 'Index: %d' % i, 1e-10) From 643d9df42db6eacc417c9b47711fd3ce41456908 Mon Sep 17 00:00:00 2001 From: ostarling Date: Mon, 29 Apr 2019 15:12:04 +0100 Subject: [PATCH 15/22] Merge pull request #14411 from ostarling:3.4_fix_for_14242 * Fix for Homogenous precision #14242: - moved scale computation to an inline function - use std::numeric_limits::epsilon() instead of != 0.0 * Fix for Homogenous precision #14242: - fixed warnings for type conversion * Fix for Homogenous precision #14242: - use float epsilon() for truncation of doubles --- modules/calib3d/src/fundam.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/calib3d/src/fundam.cpp b/modules/calib3d/src/fundam.cpp index e0693497ad..0c6f114e38 100644 --- a/modules/calib3d/src/fundam.cpp +++ b/modules/calib3d/src/fundam.cpp @@ -909,6 +909,14 @@ void cv::computeCorrespondEpilines( InputArray _points, int whichImage, } } +static inline double scaleFor(double x){ + return (std::fabs(x) > std::numeric_limits::epsilon()) ? 1./x : 1.; +} +static inline float scaleFor(float x){ + return (std::fabs(x) > std::numeric_limits::epsilon()) ? 1.f/x : 1.f; +} + + void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst ) { CV_INSTRUMENT_REGION(); @@ -967,7 +975,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst ) Point2f* dptr = dst.ptr(); for( i = 0; i < npoints; i++ ) { - float scale = sptr[i].z != 0.f ? 1.f/sptr[i].z : 1.f; + float scale = scaleFor(sptr[i].z); dptr[i] = Point2f(sptr[i].x*scale, sptr[i].y*scale); } } @@ -977,7 +985,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst ) Point3f* dptr = dst.ptr(); for( i = 0; i < npoints; i++ ) { - float scale = sptr[i][3] != 0.f ? 1.f/sptr[i][3] : 1.f; + float scale = scaleFor(sptr[i][3]); dptr[i] = Point3f(sptr[i][0]*scale, sptr[i][1]*scale, sptr[i][2]*scale); } } @@ -990,7 +998,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst ) Point2d* dptr = dst.ptr(); for( i = 0; i < npoints; i++ ) { - double scale = sptr[i].z != 0. ? 1./sptr[i].z : 1.; + double scale = scaleFor(sptr[i].z); dptr[i] = Point2d(sptr[i].x*scale, sptr[i].y*scale); } } @@ -1000,7 +1008,7 @@ void cv::convertPointsFromHomogeneous( InputArray _src, OutputArray _dst ) Point3d* dptr = dst.ptr(); for( i = 0; i < npoints; i++ ) { - double scale = sptr[i][3] != 0.f ? 1./sptr[i][3] : 1.; + double scale = scaleFor(sptr[i][3]); dptr[i] = Point3d(sptr[i][0]*scale, sptr[i][1]*scale, sptr[i][2]*scale); } } From 683acd9ae23f4221978aabfc2b0ad4f433bb3244 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 29 Apr 2019 18:14:58 +0300 Subject: [PATCH 16/22] Support very old V4L --- modules/videoio/src/cap_v4l.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp index c79d9044a9..f89ec92896 100644 --- a/modules/videoio/src/cap_v4l.cpp +++ b/modules/videoio/src/cap_v4l.cpp @@ -251,6 +251,13 @@ make & enjoy! #define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH (V4L2_CID_MPEG_BASE+365) #endif +#ifndef V4L2_CID_ROTATE +#define V4L2_CID_ROTATE (V4L2_CID_BASE+34) +#endif +#ifndef V4L2_CID_IRIS_ABSOLUTE +#define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17) +#endif + /* Defaults - If your board can do better, set it here. Set for the most common type inputs. */ #define DEFAULT_V4L_WIDTH 640 #define DEFAULT_V4L_HEIGHT 480 From 38c0278be3413094f3e01928a280e74c3ef1365a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 29 Apr 2019 18:24:54 +0000 Subject: [PATCH 17/22] ts: silence zero memory usage messages --- modules/ts/src/ts.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index 3c3f0740d7..1963988a34 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -790,7 +790,10 @@ void testTearDown() { size_t peak = malloc_peak(); memory_usage = peak - memory_usage_base; - CV_LOG_INFO(NULL, "Memory_usage (malloc): " << memory_usage << " (base=" << memory_usage_base << ")"); + if (peak > 0) + { + CV_LOG_INFO(NULL, "Memory_usage (malloc): " << memory_usage << " (base=" << memory_usage_base << ")"); + } } { // core/src/alloc.cpp: #define OPENCV_ALLOC_ENABLE_STATISTICS @@ -799,7 +802,10 @@ void testTearDown() cv::utils::AllocatorStatisticsInterface& ocv_stats = cv::getAllocatorStatistics(); ocv_peak = ocv_stats.getPeakUsage(); ocv_memory_usage = ocv_peak - memory_usage_base_opencv; - CV_LOG_INFO(NULL, "Memory_usage (OpenCV): " << ocv_memory_usage << " (base=" << memory_usage_base_opencv << " current=" << ocv_stats.getCurrentUsage() << ")"); + if (ocv_peak) + { + CV_LOG_INFO(NULL, "Memory_usage (OpenCV): " << ocv_memory_usage << " (base=" << memory_usage_base_opencv << " current=" << ocv_stats.getCurrentUsage() << ")"); + } if (memory_usage == 0) // external profiler has higher priority (and accuracy) memory_usage = ocv_memory_usage; } @@ -809,7 +815,10 @@ void testTearDown() cv::utils::AllocatorStatisticsInterface& ocl_stats = cv::ocl::getOpenCLAllocatorStatistics(); ocl_peak = ocl_stats.getPeakUsage(); ocl_memory_usage = ocl_peak - memory_usage_base_opencl; - CV_LOG_INFO(NULL, "Memory_usage (OpenCL): " << ocl_memory_usage << " (base=" << memory_usage_base_opencl << " current=" << ocl_stats.getCurrentUsage() << ")"); + if (ocl_memory_usage > 0) + { + CV_LOG_INFO(NULL, "Memory_usage (OpenCL): " << ocl_memory_usage << " (base=" << memory_usage_base_opencl << " current=" << ocl_stats.getCurrentUsage() << ")"); + } ::testing::Test::RecordProperty("ocl_memory_usage", cv::format("%llu", (unsigned long long)ocl_memory_usage)); } From cd4926778e45a7a76612f8e191983d087d10eec1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 29 Apr 2019 19:05:44 +0000 Subject: [PATCH 18/22] ts: fix perf calibation properly handle "check" mode with: "--perf_min_samples=1 --perf_force_samples=1" --- modules/ts/src/ts_perf.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index e7d8998d2f..7771b25d09 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -25,7 +25,7 @@ using namespace cvtest; using namespace perf; int64 TestBase::timeLimitDefault = 0; -unsigned int TestBase::iterationsLimitDefault = (unsigned int)(-1); +unsigned int TestBase::iterationsLimitDefault = UINT_MAX; int64 TestBase::_timeadjustment = 0; // Item [0] will be considered the default implementation. @@ -1158,7 +1158,7 @@ void TestBase::Init(const std::vector & availableImpls, } timeLimitDefault = param_time_limit == 0.0 ? 1 : (int64)(param_time_limit * cv::getTickFrequency()); - iterationsLimitDefault = param_force_samples == 0 ? (unsigned)(-1) : param_force_samples; + iterationsLimitDefault = param_force_samples == 0 ? UINT_MAX : param_force_samples; _timeadjustment = _calibrate(); } @@ -1197,9 +1197,13 @@ enum PERF_STRATEGY TestBase::getCurrentModulePerformanceStrategy() int64 TestBase::_calibrate() { CV_TRACE_FUNCTION(); + if (iterationsLimitDefault <= 1) + return 0; + class _helper : public ::perf::TestBase { - public: + public: + _helper() { testStrategy = PERF_STRATEGY_BASE; } performance_metrics& getMetrics() { return calcMetrics(); } virtual void TestBody() {} virtual void PerfTestBody() @@ -1210,13 +1214,17 @@ int64 TestBase::_calibrate() cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2)); declare.time(30); double s = 0; - for(declare.iterations(20); next() && startTimer(); stopTimer()) + declare.iterations(20); + minIters = nIters = 20; + for(; next() && startTimer(); stopTimer()) s+=a.dot(b); declare.time(s); //self calibration SetUp(); - for(declare.iterations(1000); next() && startTimer(); stopTimer()){} + declare.iterations(1000); + minIters = nIters = 1000; + for(int iters = 0; next() && startTimer(); iters++, stopTimer()) { /*std::cout << iters << nIters << std::endl;*/ } } }; From 1c180f4c7f20c2f4dfb168c33d76c5ee1b6159c0 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 29 Apr 2019 21:11:27 +0000 Subject: [PATCH 19/22] imgproc: fix RemoveOverlaps() with empty input vector --- modules/imgproc/src/hough.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index eb4acdad49..5862319738 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -1355,6 +1355,8 @@ static void GetCircleCenters(const std::vector ¢ers, std::vector template static void RemoveOverlaps(std::vector& circles, float minDist) { + if (circles.size() <= 1u) + return; float minDist2 = minDist * minDist; size_t endIdx = 1; for (size_t i = 1; i < circles.size(); ++i) From 9408c3e6405c2a921923e12fa9c58382df5dc910 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 29 Apr 2019 18:55:09 +0300 Subject: [PATCH 20/22] Refactored TensorFlow subgraphs fusion --- .../src/tensorflow/tf_graph_simplifier.cpp | 73 ++++++++++++------- modules/dnn/src/tensorflow/tf_importer.cpp | 41 ++++++----- modules/dnn/test/test_tf_importer.cpp | 7 ++ 3 files changed, 78 insertions(+), 43 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 7f1001888a..086f0ae637 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -79,9 +79,9 @@ public: } } - static const tensorflow::NodeDef& getInputNode(const tensorflow::GraphDef& net, - const tensorflow::NodeDef& node, - int inpId) + static int getInputNodeId(const tensorflow::GraphDef& net, + const tensorflow::NodeDef& node, + int inpId) { CV_Assert(inpId < node.input_size()); std::string name = node.input(inpId); @@ -92,7 +92,7 @@ public: for (int i = 0; i < numNodes; ++i) { if (net.node(i).name() == name) - return net.node(i); + return i; } CV_Error(Error::StsParseError, "Input node with name " + name + " not found"); } @@ -104,36 +104,46 @@ public: matchedNodesIds.clear(); matchedNodesIds.reserve(nodesToFuse.size()); - int numNodes = net.node_size(); - for (int i = 0; i < nodesToFuse.size(); ++i) + std::queue nodesToMatch; + std::queue targetNodes; + nodesToMatch.push(nodeId); + targetNodes.push(nodesToFuse.back()); + while (!nodesToMatch.empty()) { - while (nodeId < numNodes && net.node(nodeId).op() == "Const") - { - nodeId += 1; - } - if (nodeId > numNodes - 1) + int nodeToMatch = nodesToMatch.front(); + int targetNodeId = targetNodes.front(); + nodesToMatch.pop(); + targetNodes.pop(); + + if (std::find(matchedNodesIds.begin(), matchedNodesIds.end(), nodeToMatch) != + matchedNodesIds.end()) + continue; + + const tensorflow::NodeDef& node = net.node(nodeToMatch); + if (node.op() != nodes[targetNodeId]) return false; - const tensorflow::NodeDef& node = net.node(nodeId); - - if (node.op() != nodes[nodesToFuse[i]]) - return false; - - std::vector& inputNodes = inputs[nodesToFuse[i]]; + std::vector& inputNodes = inputs[targetNodeId]; if (inputNodes.size() != node.input_size()) return false; + for (int j = 0; j < inputNodes.size(); ++j) { if (nodes[inputNodes[j]].empty()) // Unknown input node type. continue; - const tensorflow::NodeDef& inpNode = getInputNode(net, node, j); - if (inpNode.op() != nodes[inputNodes[j]]) + nodeId = getInputNodeId(net, node, j); + const tensorflow::NodeDef& inpNode = net.node(nodeId); + if (inpNode.op() != "Const") + { + nodesToMatch.push(nodeId); + targetNodes.push(inputNodes[j]); + } + else if (nodes[inputNodes[j]] != "Const") return false; } - - matchedNodesIds.push_back(nodeId); - nodeId += 1; + matchedNodesIds.push_back(nodeToMatch); } + std::sort(matchedNodesIds.begin(), matchedNodesIds.end()); return true; } @@ -181,7 +191,7 @@ public: std::vector inputNodes(inputsNames.size()); for (int i = 0; i < inputsNames.size(); ++i) { - inputNodes[i] = (tensorflow::NodeDef*)&getInputNode(net, *node, i); + inputNodes[i] = net.mutable_node(getInputNodeId(net, *node, i)); } finalize(net, node, inputNodes); } @@ -354,7 +364,7 @@ public: { if (!Subgraph::match(net, nodeId, matchedNodesIds)) return false; - Mat maxValue = getTensorContent(net.node(nodeId + 1).attr().at("value").tensor()); + Mat maxValue = getTensorContent(net.node(matchedNodesIds.front() + 1).attr().at("value").tensor()); return maxValue.type() == CV_32FC1 && maxValue.total() == 1 && maxValue.at(0) == 6; } }; @@ -384,6 +394,17 @@ public: setFusedNode("Reshape", ids); } + virtual bool match(const tensorflow::GraphDef& net, int nodeId, std::vector& matchedNodesIds) CV_OVERRIDE + { + const tensorflow::NodeDef& node = net.node(nodeId); + if (node.input_size() == 0) + return false; + + inpName = node.input(0); + return Subgraph::match(net, nodeId, matchedNodesIds); + } + + virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode, std::vector& inputNodes) CV_OVERRIDE { @@ -395,6 +416,7 @@ public: } tensorflow::TensorProto* shapeTensor = inputNodes[1]->mutable_attr()->at("value").mutable_tensor(); fusedNode->mutable_input()->DeleteSubrange(2, numOutDims - 1); + fusedNode->set_input(0, inpName); shapeTensor->clear_int_val(); for (int i = 0; i < shape.size(); ++i) @@ -405,6 +427,7 @@ public: private: int numOutDims; + std::string inpName; }; class L2NormalizeSubgraph : public Subgraph @@ -685,9 +708,9 @@ void simplifySubgraphs(tensorflow::GraphDef& net) subgraphs.push_back(Ptr(new DeconvolutionSameKerasSubgraph())); subgraphs.push_back(Ptr(new ResizeBilinearSubgraph())); subgraphs.push_back(Ptr(new UpsamplingKerasSubgraph())); - subgraphs.push_back(Ptr(new ReshapeAsShapeSubgraph())); subgraphs.push_back(Ptr(new SoftMaxSlimSubgraph())); subgraphs.push_back(Ptr(new SoftMaxSlimV2Subgraph())); + subgraphs.push_back(Ptr(new ReshapeAsShapeSubgraph())); int numNodes = net.node_size(); std::vector matchedNodesIds; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ef0b196f44..0ff155f927 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1079,25 +1079,28 @@ void TFImporter::populateNet(Net dstNet) { Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); - if (newShape.total() != 4 && inpLayout == DATA_LAYOUT_NHWC) + if (inpLayout == DATA_LAYOUT_NHWC) { - LayerParams permLP; - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - permLP.set("order", DictValue::arrayInt(order, 4)); + if (newShape.total() == 4) + { + // NHWC->NCHW + std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); + std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); + } + if (newShape.total() != 4 || newShape.at(1) == 1) + { + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + permLP.set("order", DictValue::arrayInt(order, 4)); - std::string permName = name + "/nchw"; - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); - inpLayout = DATA_LAYOUT_NCHW; - } - else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC) - { - // NHWC->NCHW - std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); - std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); + std::string permName = name + "/nchw"; + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); + inpLayout = DATA_LAYOUT_NCHW; + } } layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShape.total())); @@ -1335,7 +1338,9 @@ void TFImporter::populateNet(Net dstNet) // num_split // 1st blob is dims tensor int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); - layerParams.set("axis", toNCHW(axis)); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + axis = toNCHW(axis); + layerParams.set("axis", axis); int id = dstNet.addLayer(name, "Slice", layerParams); layer_id[name] = id; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 8b750bbb44..497300814b 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -654,6 +654,13 @@ TEST_P(Test_TensorFlow_layers, relu6) runTensorFlowNet("keras_relu6", /*hasText*/ true); } +TEST_P(Test_TensorFlow_layers, subpixel) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE) + throw SkipTestException(""); + runTensorFlowNet("subpixel"); +} + TEST_P(Test_TensorFlow_layers, keras_mobilenet_head) { runTensorFlowNet("keras_mobilenet_head"); From 138a80611c088a18d4e20b594ffce0134767d84c Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 29 Apr 2019 19:03:10 +0300 Subject: [PATCH 21/22] build: avoid MSVC ICE --- modules/dnn/src/op_inf_engine.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index e5b025f945..e74813fdd6 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -901,7 +901,7 @@ void InfEngineBackendNet::forward(const std::vector >& outBl InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req; infRequestPtr->SetUserData(reqWrapper.get(), 0); - infRequestPtr->SetCompletionCallback({ + infRequestPtr->SetCompletionCallback( [](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status) { InfEngineReqWrapper* wrapper; @@ -927,7 +927,7 @@ void InfEngineBackendNet::forward(const std::vector >& outBl } wrapper->isReady = true; } - }); + ); } if (isAsync) { From 77fa59c3daff1112afe6ced4ebf45182ccad9c14 Mon Sep 17 00:00:00 2001 From: Lubov Batanina Date: Tue, 30 Apr 2019 17:08:17 +0300 Subject: [PATCH 22/22] Merge pull request #14301 from l-bat:conv3d Support Convolution3D layer on IE backend (#14301) * Add Convolution3D layer * Disable CXX11 * Fixed tests * Add Pooling3D layer * Merge Conv2d with Conv3d and Pool2d with Pool3d layers * Split pads * Add Deconvolution layer * Refactoring * Deduplication * Refactoring * Add utils for Convolution and Pooling layers --- .../dnn/include/opencv2/dnn/all_layers.hpp | 12 +- modules/dnn/src/dnn.cpp | 1 + modules/dnn/src/layers/convolution_layer.cpp | 200 ++++++++++-------- modules/dnn/src/layers/layers_common.cpp | 142 +++++++------ modules/dnn/src/layers/layers_common.hpp | 24 +-- modules/dnn/src/layers/pooling_layer.cpp | 107 ++++++---- modules/dnn/src/onnx/onnx_importer.cpp | 67 +++--- modules/dnn/src/tensorflow/tf_importer.cpp | 106 +++++++--- modules/dnn/test/test_onnx_importer.cpp | 21 ++ modules/dnn/test/test_tf_importer.cpp | 21 ++ 10 files changed, 412 insertions(+), 289 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index c6fe6d05bc..72064843b9 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -210,7 +210,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS BaseConvolutionLayer : public Layer { public: - Size kernel, stride, pad, dilation, adjustPad; + CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad; + std::vector adjust_pads; + std::vector kernel_size, strides, dilations; + std::vector pads_begin, pads_end; String padMode; int numOutput; }; @@ -243,9 +246,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN { public: int type; - Size kernel, stride; - int pad_l, pad_t, pad_r, pad_b; - CV_DEPRECATED_EXTERNAL Size pad; + std::vector kernel_size, strides; + std::vector pads_begin, pads_end; + CV_DEPRECATED_EXTERNAL Size kernel, stride, pad; + CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b; bool globalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 2cc3f9a640..9bffdbe74a 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2263,6 +2263,7 @@ struct Net::Impl if (isAsync) CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); + CV_Assert(layer->supportBackend(DNN_BACKEND_OPENCV)); if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 7aaa8bc989..3b298e616d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -66,31 +66,34 @@ public: BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); - int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0; - getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t, - pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height, - dilation.width, padMode); - - if (pad_t != pad_b || pad_l != pad_r) - CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); - - pad.width = pad_l; - pad.height = pad_t; + getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode); numOutput = params.get("num_output"); int ngroups = params.get("group", 1); - - adjustPad.height = params.get("adj_h", 0); - adjustPad.width = params.get("adj_w", 0); - CV_Assert(numOutput % ngroups == 0); - CV_Assert(adjustPad.width < stride.width && - adjustPad.height < stride.height); + if (kernel_size.size() == 2) { + kernel = Size(kernel_size[1], kernel_size[0]); + stride = Size(strides[1], strides[0]); + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + dilation = Size(dilations[1], dilations[0]); + + adjust_pads.push_back(params.get("adj_h", 0)); + adjust_pads.push_back(params.get("adj_w", 0)); + + adjustPad.height = adjust_pads[0]; + adjustPad.width = adjust_pads[1]; + CV_Assert(adjustPad.width < stride.width && + adjustPad.height < stride.height); + } newWeightAndBias = false; } - void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { std::vector inputs, outputs; inputs_arr.getMatVector(inputs); @@ -98,31 +101,38 @@ public: CV_Assert(inputs.size() > 0); - CV_Assert(blobs.size() >= 1 && blobs.size() <= 2); - CV_Assert(blobs[0].dims == 4 && blobs[0].size[3] == kernel.width && blobs[0].size[2] == kernel.height); + CV_Assert(blobs.size() == 1 || blobs.size() == 2); + CV_Assert(inputs[0].dims == outputs[0].dims); + CV_Assert(blobs[0].dims == kernel_size.size() + 2); + for (int i = 0; i < kernel_size.size(); i++) { + CV_Assert(blobs[0].size[i + 2] == kernel_size[i]); + } const Mat &input = inputs[0]; - CV_Assert(input.dims == 4 && (input.type() == CV_32F || input.type() == CV_64F || input.type() == CV_16S)); + CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S)); for (size_t i = 0; i < inputs.size(); i++) { CV_Assert(inputs[i].type() == input.type()); - CV_Assert(inputs[i].dims == 4 && inputs[i].size[1] == input.size[1]); - CV_Assert(inputs[i].size[2] == input.size[2] && inputs[i].size[3] == input.size[3]); + CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); + for (int j = 0; j < inputs[i].dims; j++) { + CV_Assert(inputs[i].size[j] == input.size[j]); + } } - Size outSize = Size(outputs[0].size[3], outputs[0].size[2]); - - int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width; - - getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize, - kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r); - - - if (pad_t != pad_b || pad_l != pad_r) - CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); - - pad.width = pad_l; - pad.height = pad_t; + std::vector inpShape; + std::vector outShape; + for (int i = 2; i < inputs[0].dims; i++) { + inpShape.push_back(inputs[0].size[i]); + outShape.push_back(outputs[0].size[i]); + } + getConvPoolPaddings(inpShape, outShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end); + if (pads_begin.size() == 2) { + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + } } bool hasBias() const @@ -134,8 +144,8 @@ public: bool is1x1() const { return (kernel.height == 1 && kernel.width == 1) && - (stride.height == 1 && stride.width == 1) && - (dilation.height == 1 && dilation.width == 1); + (stride.height == 1 && stride.width == 1) && + (dilation.height == 1 && dilation.width == 1); } virtual bool tryFuse(Ptr& top) CV_OVERRIDE @@ -237,12 +247,14 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { + if (kernel_size.size() == 3) + return preferableTarget == DNN_TARGET_CPU; return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R4) || (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height); } else #endif - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + return (kernel_size.size() == 2) && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE); } bool getMemoryShapes(const std::vector &inputs, @@ -256,21 +268,23 @@ public: internals.clear(); - int inpCn = inputs[0][1]; - int inpH = inputs[0][2]; - int inpW = inputs[0][3]; + CV_Assert(inputs.size() != 0); + std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); int outCn = blobs[0].size[0]; - Size out; + std::vector outShape; + outShape.push_back(inputs[0][0]); + outShape.push_back(outCn); + int inpCn = inputs[0][1]; if (padMode.empty()) { - out.height = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1; - out.width = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1; + for (int i = 0; i < inpShape.size(); i++) + outShape.push_back((inpShape[i] + pads_begin[i] + pads_end[i] - dilations[i] * (kernel_size[i] - 1) - 1) / strides[i] + 1); } else { - getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out); + getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape); } int ngroups = inpCn / blobs[0].size[1]; @@ -279,8 +293,7 @@ public: "be multiple of %d but got %d", blobs[0].size[1], inpCn)); CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0); - int dims[] = {inputs[0][0], outCn, out.height, out.width}; - outputs.resize(inputs.size(), shape(dims, 4)); + outputs.resize(1, outShape); return false; } @@ -451,25 +464,28 @@ public: { #ifdef HAVE_INF_ENGINE InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - CV_Assert(input->dims.size() == 4); + CV_Assert(input->dims.size() == 4 || input->dims.size() == 5); - const int inpCn = input->dims[2]; // NOTE: input->dims are reversed (whcn) + const int inpCn = input->dims[input->dims.size() - 2]; // NOTE: input->dims are reversed (WHIO or WHDIO) const int outCn = blobs[0].size[0]; const int inpGroupCn = blobs[0].size[1]; const int group = inpCn / inpGroupCn; - auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW); + InferenceEngine::Layout layout = (input->dims.size() == 4) ? InferenceEngine::Layout::OIHW : + InferenceEngine::Layout::NCDHW; + + auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); if (newWeightAndBias) { if (weightsMat.isContinuous()) { Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size); - ieWeights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW); + ieWeights = wrapToInfEngineBlob(fusedWeights, layout); } else { ieWeights = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW, + InferenceEngine::Precision::FP32, layout, ieWeights->dims()); ieWeights->allocate(); @@ -488,11 +504,11 @@ public: #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5) InferenceEngine::Builder::ConvolutionLayer ieLayer(name); - ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width}); - ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); - ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width}); - ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width}); - ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width}); + ieLayer.setKernel(kernel_size); + ieLayer.setStrides(strides); + ieLayer.setDilation(dilations); + ieLayer.setPaddingsBegin(pads_begin); + ieLayer.setPaddingsEnd(pads_end); ieLayer.setGroup((size_t)group); ieLayer.setOutDepth((size_t)outCn); @@ -1085,6 +1101,10 @@ public: CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0, outputs.size() == 1, inputs[0].data != outputs[0].data); + if (inputs[0].dims == 5) { + CV_Error(Error::StsNotImplemented, "Convolution3D layer is not supported on OCV backend"); + } + int ngroups = inputs[0].size[1]/blobs[0].size[1]; CV_Assert(outputs[0].size[1] % ngroups == 0); int outCn = blobs[0].size[0]; @@ -1157,6 +1177,9 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { + if (kernel_size.size() == 3) + CV_Error(Error::StsNotImplemented, "Unsupported deconvolution3D layer"); + if (INF_ENGINE_RELEASE >= 2018050000 && (adjustPad.height || adjustPad.width)) return false; @@ -1172,7 +1195,7 @@ public: } else #endif // HAVE_INF_ENGINE - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + return kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE); } bool getMemoryShapes(const std::vector &inputs, @@ -1183,39 +1206,36 @@ public: CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput); CV_Assert(inputs.size() != 0); - int inpCn = inputs[0][1]; - int inpH = inputs[0][2]; - int inpW = inputs[0][3]; - - int outH = -1, outW = -1; + int outCn = numOutput; + std::vector outShape; + outShape.push_back(inputs[0][0]); // batch + outShape.push_back(outCn); if (padMode.empty()) { - outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; - outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; + for (int i = 0; i < kernel_size.size(); i++) + outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + kernel_size[i] - pads_begin[i] - pads_end[i] + adjust_pads[i]); } else if (padMode == "VALID") { - outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height; - outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width; + for (int i = 0; i < kernel_size.size(); i++) + outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + kernel_size[i] + adjust_pads[i]); } else if (padMode == "SAME") { - outH = stride.height * (inpH - 1) + 1 + adjustPad.height; - outW = stride.width * (inpW - 1) + 1 + adjustPad.width; + for (int i = 0; i < kernel_size.size(); i++) + outShape.push_back(strides[i] * (inputs[0][2 + i] - 1) + 1 + adjust_pads[i]); } else CV_Error(Error::StsError, "Unsupported padding mode " + padMode); - int outCn = numOutput; - CV_Assert(outCn % blobs[0].size[1] == 0); int ngroups = outCn / blobs[0].size[1]; + int inpCn = inputs[0][1]; CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0); CV_Assert(blobs[0].size[0] == inpCn); - int dims[] = {inputs[0][0], outCn, outH, outW}; - outputs.resize(inputs.size(), shape(dims, 4)); + outputs.resize(1, outShape); if (!is1x1()) internals.push_back(computeColRowShape(inputs[0], outputs[0])); @@ -1231,16 +1251,20 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width; - getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]), - Size(inputs[0].size[3], inputs[0].size[2]), - kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r); - - if (pad_t != pad_b || pad_l != pad_r) - CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); - - pad.width = pad_l; - pad.height = pad_t; + std::vector inpShape; + std::vector outShape; + for (int i = 2; i < inputs[0].dims; i++) { + inpShape.push_back(inputs[0].size[i]); + outShape.push_back(outputs[0].size[i]); + } + getConvPoolPaddings(outShape, inpShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end); + if (pads_begin.size() == 2) { + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in deconvolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + } weightsMultipliers.assign(numOutput, 1.0); if (weightsMat.empty()) @@ -1760,11 +1784,11 @@ public: InferenceEngine::Builder::DeconvolutionLayer ieLayer(name); - ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width}); - ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); - ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width}); - ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width}); - ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width}); + ieLayer.setKernel(kernel_size); + ieLayer.setStrides(strides); + ieLayer.setDilation(dilations); + ieLayer.setPaddingsBegin(pads_begin); + ieLayer.setPaddingsEnd(pads_end); ieLayer.setGroup((size_t)group); ieLayer.setOutDepth((size_t)numOutput); diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 2dbb12109d..627f79c784 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -57,20 +57,19 @@ std::string makeName(const std::string& str1, const std::string& str2) } bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const std::string& nameAll, - int ¶meterH, int ¶meterW, bool hasDefault = false, const int& defaultValue = 0) + std::vector& parameter, bool hasDefault = false, const std::vector& defaultValue = std::vector(2, 0)) { std::string nameH = makeName(nameBase, std::string("_h")); std::string nameW = makeName(nameBase, std::string("_w")); std::string nameAll_ = nameAll; - if(nameAll_ == "") - { + if (nameAll_ == "") nameAll_ = nameBase; - } if (params.has(nameH) && params.has(nameW)) { - parameterH = params.get(nameH); - parameterW = params.get(nameW); + CV_Assert(params.get(nameH) >= 0 && params.get(nameW) >= 0); + parameter.push_back(params.get(nameH)); + parameter.push_back(params.get(nameW)); return true; } else @@ -78,26 +77,19 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const if (params.has(nameAll_)) { DictValue param = params.get(nameAll_); - parameterH = param.get(0); - if (param.size() == 1) - { - parameterW = parameterH; - } - else if (param.size() == 2) - { - parameterW = param.get(1); - } - else - { - return false; + for (int i = 0; i < param.size(); i++) { + CV_Assert(param.get(i) >= 0); + parameter.push_back(param.get(i)); } + if (parameter.size() == 1) + parameter.resize(2, parameter[0]); return true; } else { - if(hasDefault) + if (hasDefault) { - parameterH = parameterW = defaultValue; + parameter = defaultValue; return true; } else @@ -108,30 +100,38 @@ bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const } } -void getKernelSize(const LayerParams ¶ms, int &kernelH, int &kernelW) +void getKernelSize(const LayerParams ¶ms, std::vector& kernel) { - if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW)) - { + if (!util::getParameter(params, "kernel", "kernel_size", kernel)) CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified"); - } - CV_Assert(kernelH > 0 && kernelW > 0); + for (int i = 0; i < kernel.size(); i++) + CV_Assert(kernel[i] > 0); } -void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode) +void getStrideAndPadding(const LayerParams ¶ms, std::vector& pads_begin, std::vector& pads_end, + std::vector& strides, cv::String& padMode, size_t kernel_size = 2) { if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) { - padT = params.get("pad_t"); - padL = params.get("pad_l"); - padB = params.get("pad_b"); - padR = params.get("pad_r"); + CV_Assert(params.get("pad_t") >= 0 && params.get("pad_l") >= 0 && + params.get("pad_b") >= 0 && params.get("pad_r") >= 0); + pads_begin.push_back(params.get("pad_t")); + pads_begin.push_back(params.get("pad_l")); + pads_end.push_back(params.get("pad_b")); + pads_end.push_back(params.get("pad_r")); } else { - util::getParameter(params, "pad", "pad", padT, padL, true, 0); - padB = padT; - padR = padL; + util::getParameter(params, "pad", "pad", pads_begin, true, std::vector(kernel_size, 0)); + if (pads_begin.size() < 4) + pads_end = pads_begin; + else + { + pads_end = std::vector(pads_begin.begin() + pads_begin.size() / 2, pads_begin.end()); + pads_begin.resize(pads_begin.size() / 2); + } + CV_Assert(pads_begin.size() == pads_end.size()); } - util::getParameter(params, "stride", "stride", strideH, strideW, true, 1); + util::getParameter(params, "stride", "stride", strides, true, std::vector(kernel_size, 1)); padMode = ""; if (params.has("pad_mode")) @@ -139,15 +139,16 @@ void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &p padMode = params.get("pad_mode"); } - CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0); + for (int i = 0; i < strides.size(); i++) + CV_Assert(strides[i] > 0); } } - -void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, - int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode) +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, + std::vector& pads_begin, std::vector& pads_end, + std::vector& strides, cv::String &padMode) { - util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode); + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode); globalPooling = params.has("global_pooling") && params.get("global_pooling"); @@ -158,25 +159,30 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel { CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); } - if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1) - { - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1"); + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != 0 || pads_end[i] != 0) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0"); + } + for (int i = 0; i < strides.size(); i++) { + if (strides[i] != 1) + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1"); } } else { - util::getKernelSize(params, kernelH, kernelW); + util::getKernelSize(params, kernel); } } -void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, - int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode) +void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, + std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode) { - util::getKernelSize(params, kernelH, kernelW); - util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode); - util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1); + util::getKernelSize(params, kernel); + util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); + util::getParameter(params, "dilation", "dilation", dilations, true, std::vector(kernel.size(), 1)); - CV_Assert(dilationH > 0 && dilationW > 0); + for (int i = 0; i < dilations.size(); i++) + CV_Assert(dilations[i] > 0); } // From TensorFlow code: @@ -188,19 +194,19 @@ void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &ke // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top // and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means // we pad more on the right and bottom than on the top and left. -void getConvPoolOutParams(const Size& inp, const Size &kernel, - const Size &stride, const String &padMode, - const Size &dilation, Size& out) +void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, + const std::vector& stride, const String &padMode, + const std::vector& dilation, std::vector& out) { if (padMode == "VALID") { - out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height; - out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width; + for (int i = 0; i < inp.size(); i++) + out.push_back((inp[i] - dilation[i] * (kernel[i] - 1) - 1 + stride[i]) / stride[i]); } else if (padMode == "SAME") { - out.height = (inp.height - 1 + stride.height) / stride.height; - out.width = (inp.width - 1 + stride.width) / stride.width; + for (int i = 0; i < inp.size(); i++) + out.push_back((inp[i] - 1 + stride[i]) / stride[i]); } else { @@ -208,22 +214,26 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel, } } -void getConvPoolPaddings(const Size& inp, const Size& out, - const Size &kernel, const Size &stride, - const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR) +void getConvPoolPaddings(const std::vector& inp, const std::vector& out, + const std::vector& kernel, const std::vector& strides, + const String &padMode, const std::vector& dilation, + std::vector& pads_begin, std::vector& pads_end) { if (padMode == "VALID") { - padT = padL = padB = padR = 0; + pads_begin.assign(kernel.size(), 0); + pads_end.assign(kernel.size(), 0); } else if (padMode == "SAME") { - int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height); - int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width); - // For odd values of total padding, add more padding at the 'right' - // side of the given dimension. - padT= padB = Ph / 2; - padL = padR = Pw / 2; + CV_Assert_N(kernel.size() == dilation.size(), kernel.size() == strides.size(), + kernel.size() == inp.size(), kernel.size() == out.size()); + pads_begin.resize(kernel.size()); + pads_end.resize(kernel.size()); + for (int i = 0; i < pads_begin.size(); i++) { + int pad = ((out[i] - 1) * strides[i] + dilation[i] * (kernel[i] - 1) + 1 - inp[i]) / 2; + pads_begin[i] = pads_end[i] = std::max(0, pad); + } } } diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index 7fce183d6e..fd1e430a54 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -59,22 +59,20 @@ namespace cv { namespace dnn { +void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, + std::vector& pads_end, std::vector& strides, std::vector& dilations, cv::String &padMode); -void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, - int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode); +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, bool &globalPooling, + std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); -void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, - int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode); - -void getConvPoolOutParams(const Size& inp, const Size &kernel, - const Size &stride, const String &padMode, - const Size &dilation, Size& out); - - -void getConvPoolPaddings(const Size& inp, const Size& out, - const Size &kernel, const Size &stride, - const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR); +void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, + const std::vector& stride, const String &padMode, + const std::vector& dilation, std::vector& out); + void getConvPoolPaddings(const std::vector& inp, const std::vector& out, + const std::vector& kernel, const std::vector& strides, + const String &padMode, const std::vector& dilation, + std::vector& pads_begin, std::vector& pads_end); } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 28945c7440..78946b4b63 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -72,6 +72,7 @@ public: computeMaxIdx = true; globalPooling = false; stride = Size(1, 1); + pad_t = pad_l = pad_b = pad_r = 0; if (params.has("pool") || params.has("kernel_size") || params.has("kernel_w") || params.has("kernel_h")) @@ -86,11 +87,17 @@ public: else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, - pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode); + getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode); + if (kernel_size.size() == 2) { + kernel = Size(kernel_size[1], kernel_size[0]); + stride = Size(strides[1], strides[0]); + pad = Size(pads_begin[1], pads_begin[0]); - pad.width = pad_l; - pad.height = pad_t; + pad_t = pads_begin[0]; + pad_l = pads_begin[1]; + pad_b = pads_end[0]; + pad_r = pads_end[1]; + } } else if (params.has("pooled_w") || params.has("pooled_h")) { @@ -125,17 +132,24 @@ public: CV_Assert(!inputs.empty()); - cv::Size inp(inputs[0].size[3], inputs[0].size[2]), - out(outputs[0].size[3], outputs[0].size[2]); - - if(globalPooling) - { - kernel = inp; + std::vector inp; + std::vector out; + for (int i = 2; i < inputs[0].dims; i++) { + inp.push_back(inputs[0].size[i]); + out.push_back(outputs[0].size[i]); + } + if (globalPooling) { + kernel = Size(inp[1], inp[0]); + kernel_size = std::vector(inp.begin(), inp.end()); } - getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r); - pad.width = pad_l; - pad.height = pad_t; + getConvPoolPaddings(inp, out, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), pads_begin, pads_end); + if (pads_begin.size() == 2) { + pad_t = pads_begin[0]; + pad_l = pads_begin[1]; + pad_b = pads_end[0]; + pad_r = pads_end[1]; + } #ifdef HAVE_OPENCL poolOp.release(); @@ -148,6 +162,8 @@ public: if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { #ifdef HAVE_INF_ENGINE + if (kernel_size.size() == 3) + return preferableTarget == DNN_TARGET_CPU; if (preferableTarget == DNN_TARGET_MYRIAD) { if (type == MAX && (pad_l == 1 && pad_t == 1) && stride == Size(2, 2) ) { return !isMyriadX(); @@ -161,9 +177,9 @@ public: #endif } else - return backendId == DNN_BACKEND_OPENCV || + return (kernel_size.empty() || kernel_size.size() == 2) && (backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && haveHalide() && - (type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r))); + (type == MAX || (type == AVE && !pad_t && !pad_l && !pad_b && !pad_r)))); } #ifdef HAVE_OPENCL @@ -269,10 +285,12 @@ public: if (type == MAX || type == AVE) { InferenceEngine::Builder::PoolingLayer ieLayer(name); - ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width}); - ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width}); - ieLayer.setPaddingsBegin({(size_t)pad_t, (size_t)pad_l}); - ieLayer.setPaddingsEnd({(size_t)pad_b, (size_t)pad_r}); + + ieLayer.setKernel(kernel_size); + ieLayer.setStrides(strides); + ieLayer.setPaddingsBegin(pads_begin); + ieLayer.setPaddingsEnd(pads_end); + ieLayer.setPoolingType(type == MAX ? InferenceEngine::Builder::PoolingLayer::PoolingType::MAX : InferenceEngine::Builder::PoolingLayer::PoolingType::AVG); @@ -916,59 +934,56 @@ public: std::vector &internals) const CV_OVERRIDE { CV_Assert(inputs.size() != 0); - Size in(inputs[0][3], inputs[0][2]), out; + + std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); + std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); if (globalPooling) { - out.height = 1; - out.width = 1; + outShape.push_back(1); + outShape.push_back(1); } else if (type == ROI || type == PSROI) { - out.height = pooledSize.height; - out.width = pooledSize.width; + outShape.push_back(pooledSize.height); + outShape.push_back(pooledSize.width); } else if (padMode.empty()) { - float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height; - float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width; - out.height = 1 + (ceilMode ? ceil(height) : floor(height)); - out.width = 1 + (ceilMode ? ceil(width) : floor(width)); + for (int i = 0; i < kernel_size.size(); i++) { + float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i]; + outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); + } - if (pad_r || pad_b) - { - // If we have padding, ensure that the last pooling starts strictly - // inside the image (instead of at the padding); otherwise clip the last. - if ((out.height - 1) * stride.height >= in.height + pad_b) - --out.height; - if ((out.width - 1) * stride.width >= in.width + pad_r) - --out.width; - CV_Assert((out.height - 1) * stride.height < in.height + pad_b); - CV_Assert((out.width - 1) * stride.width < in.width + pad_r); + // If we have padding, ensure that the last pooling starts strictly + // inside the image (instead of at the padding); otherwise clip the last. + for (int i = 0; i < pads_end.size(); i++) { + if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { + --outShape[2 + i]; + CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + } } } else { - getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out); + getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), outShape); } - - int dims[] = {inputs[0][0], inputs[0][1], out.height, out.width}; if (type == ROI) { CV_Assert(inputs.size() == 2); - dims[0] = inputs[1][0]; // Number of proposals; + outShape[0] = inputs[1][0]; // Number of proposals; } else if (type == PSROI) { CV_Assert(inputs.size() == 2); CV_Assert(psRoiOutChannels * pooledSize.width * pooledSize.height == inputs[0][1]); - dims[0] = inputs[1][0]; // Number of proposals; - dims[1] = psRoiOutChannels; + outShape[0] = inputs[1][0]; // Number of proposals; + outShape[1] = psRoiOutChannels; } - int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1); CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX)); - outputs.assign(numOutputs, shape(dims, 4)); + + outputs.assign(numOutputs, outShape); return false; } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 7396e00fdc..e722b4a735 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -184,6 +184,12 @@ std::map ONNXImporter::getGraphTensors( return layers_weights; } +static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) { + std::vector dst(src.size()); + convertInt64ToInt32(src, dst, src.size()); + return DictValue::arrayInt(&dst[0], src.size()); +} + LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto) { LayerParams lp; @@ -194,15 +200,13 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot if(attribute_name == "kernel_shape") { - CV_Assert(attribute_proto.ints_size() == 2); - lp.set("kernel_h", saturate_cast(attribute_proto.ints(0))); - lp.set("kernel_w", saturate_cast(attribute_proto.ints(1))); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("kernel_size", parse(attribute_proto.ints())); } else if(attribute_name == "strides") { - CV_Assert(attribute_proto.ints_size() == 2); - lp.set("stride_h", saturate_cast(attribute_proto.ints(0))); - lp.set("stride_w", saturate_cast(attribute_proto.ints(1))); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("stride", parse(attribute_proto.ints())); } else if(attribute_name == "pads") { @@ -225,11 +229,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot else { // Convolution or pooling. - CV_Assert(attribute_proto.ints_size() == 4); - lp.set("pad_t", saturate_cast(attribute_proto.ints(0))); - lp.set("pad_l", saturate_cast(attribute_proto.ints(1))); - lp.set("pad_b", saturate_cast(attribute_proto.ints(2))); - lp.set("pad_r", saturate_cast(attribute_proto.ints(3))); + CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); + lp.set("pad", parse(attribute_proto.ints())); } } else if(attribute_name == "auto_pad") @@ -243,9 +244,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot } else if(attribute_name == "dilations") { - CV_Assert(attribute_proto.ints_size() == 2); - lp.set("dilation_h", saturate_cast(attribute_proto.ints(0))); - lp.set("dilation_w", saturate_cast(attribute_proto.ints(1))); + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("dilation", parse(attribute_proto.ints())); } else if (attribute_proto.has_i()) { @@ -270,10 +270,7 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot } else if (attribute_proto.ints_size() > 0) { - const ::google::protobuf::RepeatedField< ::google::protobuf::int64> src = attribute_proto.ints(); - std::vector dst(attribute_proto.ints_size()); - convertInt64ToInt32(src, dst, attribute_proto.ints_size()); - lp.set(attribute_proto.name(), DictValue::arrayInt(&dst[0], attribute_proto.ints_size())); + lp.set(attribute_proto.name(), parse(attribute_proto.ints())); } else if (attribute_proto.has_t()) { @@ -305,19 +302,6 @@ Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, return constBlob->second; } - -bool ONNXImporter::isCeilMode(const LayerParams& layerParams) { - if (!layerParams.has("pad_mode")) { - if (layerParams.has("pad_h")) { - return layerParams.get("pad_h") != layerParams.get("pad_b") || - layerParams.get("pad_w") != layerParams.get("pad_r"); - } - else - return false; // all pads == 0 - } - return true; -} - void ONNXImporter::populateNet(Net dstNet) { CV_Assert(model_proto.has_graph()); @@ -384,13 +368,13 @@ void ONNXImporter::populateNet(Net dstNet) { layerParams.type = "Pooling"; layerParams.set("pool", "MAX"); - layerParams.set("ceil_mode", isCeilMode(layerParams)); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); } else if (layer_type == "AveragePool") { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); - layerParams.set("ceil_mode", isCeilMode(layerParams)); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); } else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool") @@ -600,8 +584,9 @@ void ONNXImporter::populateNet(Net dstNet) if (outShape.size() != 4) CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements."); - const int strideY = layerParams.get("stride_h", 1); - const int strideX = layerParams.get("stride_w", 1); + DictValue stride = layerParams.get("stride"); + const int strideY = stride.getIntValue(0); + const int strideX = stride.getIntValue(1); const int outH = outShape.getIntValue(2); const int outW = outShape.getIntValue(3); @@ -612,15 +597,13 @@ void ONNXImporter::populateNet(Net dstNet) } else if (layerParams.get("pad_mode") == "VALID") { - if (!layerParams.has("kernel_h") || !layerParams.has("kernel_w")) + if (!layerParams.has("kernel_size")) CV_Error(Error::StsNotImplemented, - "Required attributes 'kernel_h' and 'kernel_w' are not present."); + "Required attribute 'kernel_size' is not present."); - int kernelH = layerParams.get("kernel_h"); - int kernelW = layerParams.get("kernel_w"); - - layerParams.set("adj_w", (outW - kernelW) % strideX); - layerParams.set("adj_h", (outH - kernelH) % strideY); + DictValue kernel = layerParams.get("kernel_size"); + layerParams.set("adj_h", (outH - kernel.getIntValue(0)) % strideY); + layerParams.set("adj_w", (outW - kernel.getIntValue(1)) % strideX); } } else if (layerParams.has("output_padding")) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ef0b196f44..a7a681c140 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -51,6 +51,7 @@ enum DataLayout { DATA_LAYOUT_NHWC, DATA_LAYOUT_NCHW, + DATA_LAYOUT_NDHWC, DATA_LAYOUT_UNKNOWN, DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) }; @@ -258,6 +259,8 @@ static int getDataLayout(const tensorflow::NodeDef& layer) return DATA_LAYOUT_NHWC; else if (format == "NCHW" || format == "channels_first") return DATA_LAYOUT_NCHW; + else if (format == "NDHWC") + return DATA_LAYOUT_NDHWC; else CV_Error(Error::StsParseError, "Unknown data_format value: " + format); } @@ -281,21 +284,34 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) if (hasLayerAttr(layer, "strides")) { const tensorflow::AttrValue& val = getLayerAttr(layer, "strides"); - int dimX, dimY, dimC; + int dimX, dimY, dimC, dimD; int layout = getDataLayout(layer); if (layout == DATA_LAYOUT_NCHW) { dimC = 1; dimY = 2; dimX = 3; } + else if (layout == DATA_LAYOUT_NDHWC) + { + dimD = 1; dimY = 2; dimX = 3; dimC = 4; + } else { dimY = 1; dimX = 2; dimC = 3; } - if (val.list().i_size() != 4 || + if (!(val.list().i_size() == 4 || val.list().i_size() == 5) || val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported strides"); - layerParams.set("stride_h", static_cast(val.list().i(dimY))); - layerParams.set("stride_w", static_cast(val.list().i(dimX))); + if (layout == DATA_LAYOUT_NDHWC) { + int strides[] = {static_cast(val.list().i(dimD)), + static_cast(val.list().i(dimY)), + static_cast(val.list().i(dimX))}; + layerParams.set("stride", DictValue::arrayInt(strides, 3)); + } + else + { + layerParams.set("stride_h", static_cast(val.list().i(dimY))); + layerParams.set("stride_w", static_cast(val.list().i(dimX))); + } } } @@ -318,21 +334,35 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) if (hasLayerAttr(layer, "ksize")) { const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize"); - int dimX, dimY, dimC; + int dimX, dimY, dimC, dimD; int layout = getDataLayout(layer); if (layout == DATA_LAYOUT_NCHW) { dimC = 1; dimY = 2; dimX = 3; } + else if (layout == DATA_LAYOUT_NDHWC) + { + dimD = 1; dimY = 2; dimX = 3; dimC = 4; + } else { dimY = 1; dimX = 2; dimC = 3; } - if (val.list().i_size() != 4 || + if (!(val.list().i_size() == 4 || val.list().i_size() == 5) || val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported ksize"); - layerParams.set("kernel_h", static_cast(val.list().i(dimY))); - layerParams.set("kernel_w", static_cast(val.list().i(dimX))); + + if (layout == DATA_LAYOUT_NDHWC) { + int kernel[] = {static_cast(val.list().i(dimD)), + static_cast(val.list().i(dimY)), + static_cast(val.list().i(dimX))}; + layerParams.set("kernel_size", DictValue::arrayInt(kernel, 3)); + } + else + { + layerParams.set("kernel_h", static_cast(val.list().i(dimY))); + layerParams.set("kernel_w", static_cast(val.list().i(dimX))); + } } else { @@ -456,12 +486,26 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds // TODO: other blob types CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || tensor.dtype() == tensorflow::DT_HALF); - CV_Assert(dims == 4); + CV_Assert(dims == 4 || dims == 5); - // REORDER kernel HWIO to OIHW - swap(shape[0], shape[2]); // IWHO - swap(shape[1], shape[3]); // IOHW - swap(shape[0], shape[1]); // OIHW + int out_c, input_c, depth, height, width; + if (dims == 4) + { + // REORDER kernel HWIO to OIHW + swap(shape[0], shape[2]); // IWHO + swap(shape[1], shape[3]); // IOHW + swap(shape[0], shape[1]); // OIHW + depth = 1; height = shape[2]; width = shape[3]; + } + else + { + // REORDER kernel DHWIO to OIDHW + swap(shape[0], shape[4]); // OHWID + swap(shape[1], shape[3]); // OIWHD + swap(shape[2], shape[4]); // OIDHW + depth = shape[2]; height = shape[3]; width = shape[4]; + } + out_c = shape[0]; input_c = shape[1]; dstBlob.create(shape, CV_32F); @@ -472,17 +516,20 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds float *dstData = dstBlob.ptr(); const float *data = reinterpret_cast(tensorContent.data); - int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3]; - int total = out_c*input_c*height*width; - for(int i_oc = 0; i_oc < out_c; i_oc++) { - for(int i_ic = 0; i_ic < input_c; i_ic++) { - for(int i_h = 0; i_h < height; i_h++) { - for(int i_w = 0; i_w < width; i_w++) { - int dst_i = input_c*height*width*i_oc + height*width*i_ic + width*i_h + i_w; - int src_i = out_c*input_c*width*i_h + out_c*input_c*i_w + out_c*i_ic + i_oc; - CV_Assert(dst_i < total); - CV_Assert(src_i < total); - dstData[dst_i] = data[src_i]; + int total = out_c * input_c * depth * height * width; + for (int i_oc = 0; i_oc < out_c; i_oc++) { + for (int i_ic = 0; i_ic < input_c; i_ic++) { + for (int i_d = 0; i_d < depth; i_d++) { + for (int i_h = 0; i_h < height; i_h++) { + for (int i_w = 0; i_w < width; i_w++) { + int dst_i = input_c * depth * height * width * i_oc + + depth * height * width * i_ic + height * width * i_d + width * i_h + i_w; + int src_i = out_c * input_c * width * height * i_d + + out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc; + CV_Assert(dst_i < total); + CV_Assert(src_i < total); + dstData[dst_i] = data[src_i]; + } } } } @@ -745,7 +792,7 @@ void TFImporter::populateNet(Net dstNet) int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); data_layouts[name] = predictedLayout; - if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad") + if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "Conv3D") { // The first node of dilated convolution subgraph. // Extract input node, dilation rate and paddings. @@ -917,9 +964,9 @@ void TFImporter::populateNet(Net dstNet) { layerParams.blobs[0] = sharedWeightsIt->second; } + Mat weights = layerParams.blobs[0]; + layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2)); - layerParams.set("kernel_h", layerParams.blobs[0].size[2]); - layerParams.set("kernel_w", layerParams.blobs[0].size[3]); layerParams.set("num_output", layerParams.blobs[0].size[0]); setStrides(layerParams, layer); @@ -1290,7 +1337,7 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, inp, id, ii - from); } } - else if (type == "MaxPool") + else if (type == "MaxPool" || type == "MaxPool3D") { layerParams.set("pool", "max"); @@ -1303,11 +1350,10 @@ void TFImporter::populateNet(Net dstNet) connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); } - else if (type == "AvgPool") + else if (type == "AvgPool" || type == "AvgPool3D") { layerParams.set("pool", "ave"); layerParams.set("ave_pool_padded_area", false); - setKSize(layerParams, layer); setStrides(layerParams, layer); setPadding(layerParams, layer); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 992a9f31cf..c0782e96b2 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -81,6 +81,13 @@ TEST_P(Test_ONNX_layers, Convolution) testONNXModels("convolution"); } +TEST_P(Test_ONNX_layers, Convolution3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("conv3d"); + testONNXModels("conv3d_bias"); +} TEST_P(Test_ONNX_layers, Two_convolution) { @@ -138,6 +145,20 @@ TEST_P(Test_ONNX_layers, AveragePooling) testONNXModels("average_pooling"); } +TEST_P(Test_ONNX_layers, MaxPooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("max_pool3d"); +} + +TEST_P(Test_ONNX_layers, AvePooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + testONNXModels("ave_pool3d"); +} + TEST_P(Test_ONNX_layers, BatchNormalization) { testONNXModels("batch_norm"); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 8b750bbb44..ff2e14b8b6 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -131,6 +131,13 @@ TEST_P(Test_TensorFlow_layers, conv) runTensorFlowNet("conv_pool_nchw"); } +TEST_P(Test_TensorFlow_layers, Convolution3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + runTensorFlowNet("conv3d"); +} + TEST_P(Test_TensorFlow_layers, padding) { runTensorFlowNet("padding_valid"); @@ -212,6 +219,20 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same) runTensorFlowNet("ave_pool_same"); } +TEST_P(Test_TensorFlow_layers, MaxPooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + runTensorFlowNet("max_pool3d"); +} + +TEST_P(Test_TensorFlow_layers, AvePooling3D) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + runTensorFlowNet("ave_pool3d"); +} + TEST_P(Test_TensorFlow_layers, deconvolution) { runTensorFlowNet("deconvolution");