hal/imgproc: add hal for calcHist and implement in hal:riscv-rvv (#27332)

hal/imgproc: add hal for calcHist and implement in hal/riscv-rvv #27332 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2025-12-06 12:19:50 +01:00 · 2025-05-21 12:07:22 +08:00 · 2025-05-21 12:07:22 +08:00 · 9b08167769
commit 9b08167769
parent 23f8e523a0
4 changed files with 206 additions and 0 deletions
--- a/hal/riscv-rvv/include/imgproc.hpp
+++ b/hal/riscv-rvv/include/imgproc.hpp
@ -215,6 +215,11 @@ int equalize_hist(const uchar* src_data, size_t src_step, uchar* dst_data, size_
 #undef cv_hal_equalize_hist
 #define cv_hal_equalize_hist cv::rvv_hal::imgproc::equalize_hist

+int calc_hist(const uchar* src_data, size_t src_step, int src_type, int src_width, int src_height, float* hist_data, int hist_size, const float** ranges, bool uniform, bool accumulate);
+
+#undef cv_hal_calcHist
+#define cv_hal_calcHist cv::rvv_hal::imgproc::calc_hist
+
 /* ############ resize ############ */

 int resize(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, double inv_scale_x, double inv_scale_y, int interpolation);
--- a/hal/riscv-rvv/src/imgproc/histogram.cpp
+++ b/hal/riscv-rvv/src/imgproc/histogram.cpp
@ -3,8 +3,12 @@
 // of this distribution and at http://opencv.org/license.html.

 // Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
+// Copyright (C) 2025, SpaceMIT Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.

 #include "rvv_hal.hpp"
+#include <cstring>
+#include <vector>

 namespace cv { namespace rvv_hal { namespace imgproc {

@ -101,6 +105,178 @@ int equalize_hist(const uchar* src_data, size_t src_step, uchar* dst_data, size_
    return CV_HAL_ERROR_OK;
 }

+// ############ calc_hist ############
+
+namespace {
+
+constexpr int MAX_VLEN = 1024;
+constexpr int MAX_E8M1 = MAX_VLEN / 8;
+
+inline void cvt_32s32f(const int* ihist, float* fhist, int hist_size) {
+    int vl;
+    for (int i = 0; i < hist_size; i += vl) {
+        vl = __riscv_vsetvl_e32m8(hist_size - i);
+        auto iv = __riscv_vle32_v_i32m8(ihist + i, vl);
+        __riscv_vse32(fhist + i, __riscv_vfcvt_f(iv, vl), vl);
+    }
+}
+
+inline void cvt32s32f_add32f(const int* ihist, float* fhist, int hist_size) {
+    int vl;
+    for (int i = 0; i < hist_size; i += vl) {
+        vl = __riscv_vsetvl_e32m8(hist_size - i);
+        auto iv = __riscv_vle32_v_i32m8(ihist + i, vl);
+        auto fv = __riscv_vle32_v_f32m8(fhist + i, vl);
+        auto s = __riscv_vfadd(__riscv_vfcvt_f(iv, vl), fv, vl);
+        __riscv_vse32(fhist + i, s, vl);
+    }
+}
+
+}
+
+int calc_hist(const uchar* src_data, size_t src_step, int src_type, int src_width, int src_height,
+              float* hist_data, int hist_size, const float** ranges, bool uniform, bool accumulate) {
+    int depth = CV_MAT_DEPTH(src_type), cn = CV_MAT_CN(src_type);
+
+    // [TODO] support non-uniform
+    // In case of CV_8U, it is already fast enough with lut
+    if ((depth != CV_16U && depth != CV_32F) || !uniform) {
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+    }
+
+    std::vector<int> buf_ihist(hist_size+1, 0);
+    int* ihist = buf_ihist.data();
+
+    double low = ranges[0][0], high = ranges[0][1];
+    double t = hist_size / (high - low);
+    double a = t, b = -t * low;
+    double v0_lo = low, v0_hi = high;
+
+    int sz = hist_size, d0 = cn, step0 = (int)(src_step / CV_ELEM_SIZE1(src_type));
+    int buf_idx[MAX_E8M1];
+
+    if (depth == CV_16U) {
+        const ushort* p0 = (const ushort*)src_data;
+        if (d0 == 1) {
+            while (src_height--) {
+                int vl;
+                for (int x = 0; x < src_width; x += vl) {
+                    vl = __riscv_vsetvl_e16m2(src_width - x);
+
+                    auto v = __riscv_vfcvt_f(__riscv_vwcvtu_x(__riscv_vwcvtu_x(__riscv_vle16_v_u16m2(p0 + x, vl), vl), vl), vl);
+
+                    auto m0 = __riscv_vmflt(v, v0_lo, vl);
+                    auto m1 = __riscv_vmfge(v, v0_hi, vl);
+                    auto m = __riscv_vmor(m0, m1, vl);
+
+                    auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
+                    auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
+                    idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
+                    idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
+                    idx = __riscv_vmerge(idx, -1, m, vl);
+                    __riscv_vse32(buf_idx, idx, vl);
+
+                    for (int i = 0; i < vl; i++) {
+                        int _idx = buf_idx[i] + 1;
+                        ihist[_idx]++;
+                    }
+                }
+                p0 += step0;
+            }
+        } else {
+            while (src_height--) {
+                int vl;
+                for (int x = 0; x < src_width; x += vl) {
+                    vl = __riscv_vsetvl_e16m2(src_width - x);
+
+                    auto v = __riscv_vfcvt_f(__riscv_vwcvtu_x(__riscv_vwcvtu_x(__riscv_vlse16_v_u16m2(p0 + x*d0, sizeof(ushort)*d0, vl), vl), vl), vl);
+
+                    auto m0 = __riscv_vmflt(v, v0_lo, vl);
+                    auto m1 = __riscv_vmfge(v, v0_hi, vl);
+                    auto m = __riscv_vmor(m0, m1, vl);
+
+                    auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
+                    auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
+                    idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
+                    idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
+                    idx = __riscv_vmerge(idx, -1, m, vl);
+                    __riscv_vse32(buf_idx, idx, vl);
+
+                    for (int i = 0; i < vl; i++) {
+                        int _idx = buf_idx[i] + 1;
+                        ihist[_idx]++;
+                    }
+                }
+                p0 += step0;
+            }
+        }
+    } else if (depth == CV_32F) {
+        const float* p0 = (const float*)src_data;
+        if (d0 == 1) {
+            while (src_height--) {
+                int vl;
+                for (int x = 0; x < src_width; x += vl) {
+                    vl = __riscv_vsetvl_e32m4(src_width - x);
+
+                    auto v = __riscv_vfwcvt_f(__riscv_vle32_v_f32m4(p0 + x, vl), vl);
+
+                    auto m0 = __riscv_vmflt(v, v0_lo, vl);
+                    auto m1 = __riscv_vmfge(v, v0_hi, vl);
+                    auto m = __riscv_vmor(m0, m1, vl);
+
+                    auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
+                    auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
+                    idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
+                    idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
+                    idx = __riscv_vmerge(idx, -1, m, vl);
+                    __riscv_vse32(buf_idx, idx, vl);
+
+                    for (int i = 0; i < vl; i++) {
+                        int _idx = buf_idx[i] + 1;
+                        ihist[_idx]++;
+                    }
+                }
+                p0 += step0;
+            }
+        } else {
+            while (src_height--) {
+                int vl;
+                for (int x = 0; x < src_width; x += vl) {
+                    vl = __riscv_vsetvl_e32m4(src_width - x);
+
+                    auto v = __riscv_vfwcvt_f(__riscv_vlse32_v_f32m4(p0 + x*d0, sizeof(float)*d0, vl), vl);
+
+                    auto m0 = __riscv_vmflt(v, v0_lo, vl);
+                    auto m1 = __riscv_vmfge(v, v0_hi, vl);
+                    auto m = __riscv_vmor(m0, m1, vl);
+
+                    auto fidx = __riscv_vfadd(__riscv_vfmul(v, a, vl), b, vl);
+                    auto idx = __riscv_vfncvt_x(__riscv_vfsub(fidx, 0.5f - 1e-6, vl), vl);
+                    idx = __riscv_vmerge(idx, 0, __riscv_vmslt(idx, 0, vl), vl);
+                    idx = __riscv_vmerge(idx, sz-1, __riscv_vmsgt(idx, sz-1, vl), vl);
+                    idx = __riscv_vmerge(idx, -1, m, vl);
+                    __riscv_vse32(buf_idx, idx, vl);
+
+                    for (int i = 0; i < vl; i++) {
+                        int _idx = buf_idx[i] + 1;
+                        ihist[_idx]++;
+                    }
+                }
+                p0 += step0;
+            }
+        }
+    }
+
+    if (accumulate) {
+        cvt32s32f_add32f(ihist+1, hist_data, hist_size);
+    } else {
+        std::memset(hist_data, 0, sizeof(float)*hist_size);
+        cvt_32s32f(ihist+1, hist_data, hist_size);
+    }
+
+    return CV_HAL_ERROR_OK;
+}
+
 #endif // CV_HAL_RVV_1P0_ENABLED

 }}} // cv::rvv_hal::imgproc
--- a/modules/imgproc/src/hal_replacement.hpp
+++ b/modules/imgproc/src/hal_replacement.hpp
@ -1395,6 +1395,26 @@ inline int hal_ni_polygonMoments(const uchar* src_data, size_t src_size, int src
 #define cv_hal_polygonMoments hal_ni_polygonMoments
 //! @endcond

+/**
+   @brief Calculates a histogram of a set of arrays
+   @param src_data Source imgage data
+   @param src_step Source image step
+   @param src_type Source image type
+   @param src_width Source image width
+   @param src_height Source image height
+   @param hist_data Histogram data
+   @param hist_size Histogram size
+   @param ranges Array of dims arrays of the histogram bin boundaries
+   @param uniform Flag indicating whether the histogram is uniform or not
+   @param accumulate Accumulation flag
+*/
+inline int hal_ni_calcHist(const uchar* src_data, size_t src_step, int src_type, int src_width, int src_height, float* hist_data, int hist_size, const float** ranges, bool uniform, bool accumulate)
+{ return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+//! @cond IGNORED
+#define cv_hal_calcHist hal_ni_calcHist
+//! @endcond
+
 //! @}

 #if defined(__clang__)
--- a/modules/imgproc/src/histogram.cpp
+++ b/modules/imgproc/src/histogram.cpp
@ -978,6 +978,11 @@ void cv::calcHist( const Mat* images, int nimages, const int* channels,
            && _mask.empty() && images[0].dims <= 2 && ranges && ranges[0],
        ipp_calchist(images[0], hist, histSize[0], ranges, uniform, accumulate));

+    if (nimages == 1 && dims == 1 && channels && channels[0] == 0 && _mask.empty() && images[0].dims <= 2 && ranges && ranges[0]) {
+        CALL_HAL(calcHist, cv_hal_calcHist, images[0].data, images[0].step, images[0].type(), images[0].cols, images[0].rows,
+                                            hist.ptr<float>(), histSize[0], ranges, uniform, accumulate);
+    }
+
    Mat ihist = hist;
    ihist.flags = (ihist.flags & ~CV_MAT_TYPE_MASK)|CV_32S;