mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-06 00:19:46 +01:00
[hal_rvv] Add cv::integral implementation and more types of input for test #27060 This patch introduces an RVV-optimized implementation of `cv::integral()` in hal_rvv, along with performance and accuracy tests for all valid input/output type combinations specified in `modules/imgproc/src/hal_replacement.hpp`:2a8d4b8e43/modules/imgproc/src/hal_replacement.hpp (L960-L974)The vectorized prefix sum algorithm follows the approach described in [Prefix Sum with SIMD - Algorithmica](https://en.algorithmica.org/hpc/algorithms/prefix/). I intentionally omitted support for the following cases by returning `CV_HAL_ERROR_NOT_IMPLEMENTED`, as they are harder to implement or show limited performance gains: 1. **Tilted Sum**: The data access pattern for tilted sums requires multi-row operations, making effective vectorization difficult. 2. **3-channel images (`cn == 3`)**: Current implementation requires `VLEN/SEW` (a.k.a. number of elements in a vector register) to be a multiple of channel count, which 3-channel formats typically cannot satisfy. - Support for 1, 2 and 4 channel images is implemented 4. **Small images (`!(width >> 8 || height >> 8)`)**: The scalar implementation demonstrates better performance for images with limited dimensions. - This is the same as `3rdparty/ndsrvp/src/integral.cpp`09c71aed14/3rdparty/ndsrvp/src/integral.cpp (L24-L26)Test configuration: - Platform: SpacemiT Muse Pi (K1 @ 1.60 Ghz) - Toolchain: GCC 14.2.0 - `integral_sqsum_full` test is disabled by default, so `--gtest_also_run_disabled_tests` is needed Test results: ```plaintext Geometric mean (ms) Name of Test imgproc-gcc-scalar imgproc-gcc-hal imgproc-gcc-hal vs imgproc-gcc-scalar (x-factor) integral::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32F) 1.973 1.415 1.39 integral::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32S) 1.343 1.351 0.99 integral::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_64F) 2.021 2.756 0.73 integral::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32F) 4.695 2.874 1.63 integral::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32S) 4.028 2.801 1.44 integral::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_64F) 5.965 4.926 1.21 integral::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32F) 9.970 4.440 2.25 integral::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32S) 7.934 4.244 1.87 integral::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_64F) 14.696 8.431 1.74 integral::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32F) 5.949 4.108 1.45 integral::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32S) 4.064 4.080 1.00 integral::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_64F) 6.137 7.975 0.77 integral::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32F) 13.896 8.721 1.59 integral::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32S) 10.948 8.513 1.29 integral::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_64F) 18.046 15.234 1.18 integral::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32F) 35.105 13.778 2.55 integral::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32S) 27.135 13.417 2.02 integral::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_64F) 43.477 25.616 1.70 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32F) 13.386 9.281 1.44 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32S) 9.159 9.194 1.00 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_64F) 13.776 17.836 0.77 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32F) 31.943 19.435 1.64 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32S) 24.747 18.946 1.31 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_64F) 35.925 33.943 1.06 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32F) 66.493 29.692 2.24 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32S) 54.737 28.250 1.94 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_64F) 91.880 57.495 1.60 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32F) 4.384 4.016 1.09 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32S) 3.676 3.960 0.93 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_64F) 5.620 5.224 1.08 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32F) 9.971 7.696 1.30 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32S) 8.934 7.632 1.17 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_64F) 9.927 9.759 1.02 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32F) 21.556 12.288 1.75 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32S) 21.261 12.089 1.76 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_64F) 23.989 16.278 1.47 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32F) 15.232 11.752 1.30 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32S) 12.976 11.721 1.11 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_64F) 16.450 15.627 1.05 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32F) 25.932 23.243 1.12 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32S) 24.750 23.019 1.08 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_64F) 28.228 29.605 0.95 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32F) 61.665 37.477 1.65 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32S) 61.536 37.126 1.66 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_64F) 73.989 48.994 1.51 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32F) 49.640 26.529 1.87 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32S) 35.869 26.417 1.36 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_64F) 34.378 35.056 0.98 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32F) 82.138 52.661 1.56 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32S) 54.644 52.089 1.05 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_64F) 75.073 66.670 1.13 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32F) 143.283 83.943 1.71 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32S) 156.851 82.378 1.90 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_64F) 521.594 111.375 4.68 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32F_32F)) 3.529 2.787 1.27 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32F_64F)) 4.396 3.998 1.10 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32S_32F)) 3.229 2.774 1.16 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32S_32S)) 2.945 2.780 1.06 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32S_64F)) 3.857 3.995 0.97 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_64F_64F)) 5.872 5.228 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16UC1, DEPTH_64F_64F)) 6.075 5.277 1.15 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16SC1, DEPTH_64F_64F)) 5.680 5.296 1.07 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC1, DEPTH_32F_32F)) 3.355 2.896 1.16 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC1, DEPTH_32F_64F)) 4.183 4.000 1.05 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC1, DEPTH_64F_64F)) 6.237 5.143 1.21 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (64FC1, DEPTH_64F_64F)) 4.753 4.783 0.99 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32F_32F)) 8.021 5.793 1.38 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32F_64F)) 9.963 7.704 1.29 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32S_32F)) 7.864 5.720 1.37 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32S_32S)) 7.141 5.699 1.25 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32S_64F)) 9.228 7.646 1.21 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_64F_64F)) 9.940 9.759 1.02 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16UC2, DEPTH_64F_64F)) 10.606 9.716 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16SC2, DEPTH_64F_64F)) 9.933 9.751 1.02 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC2, DEPTH_32F_32F)) 7.986 5.962 1.34 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC2, DEPTH_32F_64F)) 9.243 7.598 1.22 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC2, DEPTH_64F_64F)) 10.573 9.425 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (64FC2, DEPTH_64F_64F)) 11.029 8.977 1.23 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32F_32F)) 17.236 8.881 1.94 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32F_64F)) 20.905 12.322 1.70 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32S_32F)) 16.011 8.666 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32S_32S)) 15.932 8.507 1.87 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32S_64F)) 20.713 12.115 1.71 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_64F_64F)) 23.953 16.284 1.47 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16UC4, DEPTH_64F_64F)) 25.127 16.341 1.54 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16SC4, DEPTH_64F_64F)) 24.950 16.441 1.52 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC4, DEPTH_32F_32F)) 17.261 8.906 1.94 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC4, DEPTH_32F_64F)) 21.944 12.073 1.82 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC4, DEPTH_64F_64F)) 25.921 15.539 1.67 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (64FC4, DEPTH_64F_64F)) 27.938 14.824 1.88 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32F_32F)) 11.156 8.260 1.35 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32F_64F)) 14.777 11.869 1.24 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32S_32F)) 9.693 8.221 1.18 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32S_32S)) 9.023 8.256 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32S_64F)) 13.276 11.821 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_64F_64F)) 15.406 15.618 0.99 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16UC1, DEPTH_64F_64F)) 16.799 15.749 1.07 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16SC1, DEPTH_64F_64F)) 15.054 15.806 0.95 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC1, DEPTH_32F_32F)) 10.055 7.999 1.26 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC1, DEPTH_32F_64F)) 13.506 11.253 1.20 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC1, DEPTH_64F_64F)) 14.952 15.021 1.00 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (64FC1, DEPTH_64F_64F)) 13.761 14.002 0.98 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32F_32F)) 22.677 17.330 1.31 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32F_64F)) 26.283 23.237 1.13 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32S_32F)) 20.126 17.118 1.18 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32S_32S)) 19.337 17.041 1.13 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32S_64F)) 24.973 23.004 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_64F_64F)) 29.959 29.585 1.01 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16UC2, DEPTH_64F_64F)) 33.598 29.599 1.14 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16SC2, DEPTH_64F_64F)) 46.213 29.741 1.55 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC2, DEPTH_32F_32F)) 33.077 17.556 1.88 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC2, DEPTH_32F_64F)) 33.960 22.991 1.48 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC2, DEPTH_64F_64F)) 41.792 28.803 1.45 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (64FC2, DEPTH_64F_64F)) 34.660 28.532 1.21 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32F_32F)) 52.989 27.659 1.92 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32F_64F)) 62.418 37.515 1.66 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32S_32F)) 50.902 27.310 1.86 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32S_32S)) 47.301 27.019 1.75 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32S_64F)) 61.982 37.140 1.67 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_64F_64F)) 79.403 49.041 1.62 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16UC4, DEPTH_64F_64F)) 86.550 49.180 1.76 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16SC4, DEPTH_64F_64F)) 85.715 49.468 1.73 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC4, DEPTH_32F_32F)) 63.932 28.019 2.28 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC4, DEPTH_32F_64F)) 68.180 36.858 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC4, DEPTH_64F_64F)) 83.063 46.483 1.79 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (64FC4, DEPTH_64F_64F)) 91.990 44.545 2.07 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32F_32F)) 25.503 18.609 1.37 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32F_64F)) 29.544 26.635 1.11 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32S_32F)) 22.581 18.514 1.22 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32S_32S)) 20.860 18.547 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32S_64F)) 26.046 26.373 0.99 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_64F_64F)) 34.831 34.997 1.00 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16UC1, DEPTH_64F_64F)) 36.428 35.214 1.03 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16SC1, DEPTH_64F_64F)) 32.435 35.314 0.92 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC1, DEPTH_32F_32F)) 22.548 18.845 1.20 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC1, DEPTH_32F_64F)) 28.589 25.790 1.11 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC1, DEPTH_64F_64F)) 32.625 33.791 0.97 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (64FC1, DEPTH_64F_64F)) 30.158 31.889 0.95 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32F_32F)) 53.374 38.938 1.37 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32F_64F)) 73.892 52.747 1.40 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32S_32F)) 47.392 38.572 1.23 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32S_32S)) 45.638 38.225 1.19 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32S_64F)) 69.966 52.156 1.34 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_64F_64F)) 68.560 66.963 1.02 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16UC2, DEPTH_64F_64F)) 71.487 65.420 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16SC2, DEPTH_64F_64F)) 68.127 65.718 1.04 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC2, DEPTH_32F_32F)) 72.967 39.987 1.82 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC2, DEPTH_32F_64F)) 63.933 51.408 1.24 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC2, DEPTH_64F_64F)) 73.334 63.354 1.16 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (64FC2, DEPTH_64F_64F)) 80.983 60.778 1.33 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32F_32F)) 116.981 59.908 1.95 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32F_64F)) 155.085 83.974 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32S_32F)) 109.567 58.525 1.87 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32S_32S)) 105.457 57.124 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32S_64F)) 157.325 82.485 1.91 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_64F_64F)) 265.776 111.577 2.38 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16UC4, DEPTH_64F_64F)) 585.218 110.583 5.29 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16SC4, DEPTH_64F_64F)) 585.418 111.302 5.26 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC4, DEPTH_32F_32F)) 126.456 60.415 2.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC4, DEPTH_32F_64F)) 169.278 81.460 2.08 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC4, DEPTH_64F_64F)) 281.256 104.732 2.69 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (64FC4, DEPTH_64F_64F)) 620.885 99.953 6.21 ``` The vectorized implementation shows progressively better acceleration for larger image sizes and higher channel counts, achieving up to 6.21× speedup for 64FC4 (1920×1080) inputs with `DEPTH_64F_64F` configuration. This is my first time proposing patch for the OpenCV Project 🥹, if there's anything that can be improved, please tell me. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
64 lines
2.2 KiB
C++
64 lines
2.2 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#ifndef OPENCV_HAL_RVV_HPP_INCLUDED
|
|
#define OPENCV_HAL_RVV_HPP_INCLUDED
|
|
|
|
#include "opencv2/core/base.hpp"
|
|
#include "opencv2/core/hal/interface.h"
|
|
#include "opencv2/imgproc/hal/interface.h"
|
|
|
|
#ifndef CV_HAL_RVV_071_ENABLED
|
|
# if defined(__GNUC__) && __GNUC__ == 10 && __GNUC_MINOR__ == 4 && defined(__THEAD_VERSION__) && defined(__riscv_v) && __riscv_v == 7000
|
|
# define CV_HAL_RVV_071_ENABLED 1
|
|
# else
|
|
# define CV_HAL_RVV_071_ENABLED 0
|
|
# endif
|
|
#endif
|
|
|
|
#if CV_HAL_RVV_071_ENABLED
|
|
#include "version/hal_rvv_071.hpp"
|
|
#endif
|
|
|
|
#if defined(__riscv_v) && __riscv_v == 1000000
|
|
#include "hal_rvv_1p0/types.hpp"
|
|
#include "hal_rvv_1p0/merge.hpp" // core
|
|
#include "hal_rvv_1p0/mean.hpp" // core
|
|
#include "hal_rvv_1p0/dxt.hpp" // core
|
|
#include "hal_rvv_1p0/norm.hpp" // core
|
|
#include "hal_rvv_1p0/norm_diff.hpp" // core
|
|
#include "hal_rvv_1p0/norm_hamming.hpp" // core
|
|
#include "hal_rvv_1p0/convert_scale.hpp" // core
|
|
#include "hal_rvv_1p0/minmax.hpp" // core
|
|
#include "hal_rvv_1p0/atan.hpp" // core
|
|
#include "hal_rvv_1p0/split.hpp" // core
|
|
#include "hal_rvv_1p0/magnitude.hpp" // core
|
|
#include "hal_rvv_1p0/cart_to_polar.hpp" // core
|
|
#include "hal_rvv_1p0/polar_to_cart.hpp" // core
|
|
#include "hal_rvv_1p0/flip.hpp" // core
|
|
#include "hal_rvv_1p0/lut.hpp" // core
|
|
#include "hal_rvv_1p0/exp.hpp" // core
|
|
#include "hal_rvv_1p0/log.hpp" // core
|
|
#include "hal_rvv_1p0/lu.hpp" // core
|
|
#include "hal_rvv_1p0/cholesky.hpp" // core
|
|
#include "hal_rvv_1p0/qr.hpp" // core
|
|
#include "hal_rvv_1p0/svd.hpp" // core
|
|
#include "hal_rvv_1p0/sqrt.hpp" // core
|
|
#include "hal_rvv_1p0/copy_mask.hpp" // core
|
|
#include "hal_rvv_1p0/div.hpp" // core
|
|
#include "hal_rvv_1p0/dotprod.hpp" // core
|
|
|
|
#include "hal_rvv_1p0/moments.hpp" // imgproc
|
|
#include "hal_rvv_1p0/filter.hpp" // imgproc
|
|
#include "hal_rvv_1p0/pyramids.hpp" // imgproc
|
|
#include "hal_rvv_1p0/color.hpp" // imgproc
|
|
#include "hal_rvv_1p0/warp.hpp" // imgproc
|
|
#include "hal_rvv_1p0/thresh.hpp" // imgproc
|
|
#include "hal_rvv_1p0/histogram.hpp" // imgproc
|
|
#include "hal_rvv_1p0/resize.hpp" // imgproc
|
|
#include "hal_rvv_1p0/integral.hpp" // imgproc
|
|
#endif
|
|
|
|
#endif
|