mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-06 12:19:50 +01:00
Merge pull request #27096 from amane-ame:moments_hal_rvv
Add RISC-V HAL implementation for cv::moments #27096 This patch implements `cv_hal_imageMoments` using native intrinsics, optimizing the performance of `cv::moments` for data types `CV_16U/CV_16S/CV_32F/CV_64F`. Tested on MUSE-PI (Spacemit X60) for both gcc 14.2 and clang 20.0. ``` $ ./opencv_test_imgproc --gtest_filter="*Moments*" $ ./opencv_perf_imgproc --gtest_filter="*Moments*" --perf_min_samples=1000 --perf_force_samples=1000 ```  ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
67ffb230f1
commit
46fbe1895a
1
3rdparty/hal_rvv/hal_rvv.hpp
vendored
1
3rdparty/hal_rvv/hal_rvv.hpp
vendored
|
|
@ -44,6 +44,7 @@
|
|||
#include "hal_rvv_1p0/svd.hpp" // core
|
||||
#include "hal_rvv_1p0/sqrt.hpp" // core
|
||||
|
||||
#include "hal_rvv_1p0/moments.hpp" // imgproc
|
||||
#include "hal_rvv_1p0/filter.hpp" // imgproc
|
||||
#include "hal_rvv_1p0/pyramids.hpp" // imgproc
|
||||
#include "hal_rvv_1p0/color.hpp" // imgproc
|
||||
|
|
|
|||
191
3rdparty/hal_rvv/hal_rvv_1p0/moments.hpp
vendored
Normal file
191
3rdparty/hal_rvv/hal_rvv_1p0/moments.hpp
vendored
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2025, Institute of Software, Chinese Academy of Sciences.
|
||||
|
||||
#ifndef OPENCV_HAL_RVV_MOMENTS_HPP_INCLUDED
|
||||
#define OPENCV_HAL_RVV_MOMENTS_HPP_INCLUDED
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
namespace cv { namespace cv_hal_rvv {
|
||||
|
||||
namespace imageMoments {
|
||||
#undef cv_hal_imageMoments
|
||||
#define cv_hal_imageMoments cv::cv_hal_rvv::imageMoments::imageMoments
|
||||
|
||||
class MomentsInvoker : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
template<typename... Args>
|
||||
MomentsInvoker(std::function<int(int, int, Args...)> _func, Args&&... args)
|
||||
{
|
||||
func = std::bind(_func, std::placeholders::_1, std::placeholders::_2, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
virtual void operator()(const Range& range) const override
|
||||
{
|
||||
func(range.start, range.end);
|
||||
}
|
||||
|
||||
private:
|
||||
std::function<int(int, int)> func;
|
||||
};
|
||||
|
||||
template<typename... Args>
|
||||
static inline int invoke(int width, int height, std::function<int(int, int, Args...)> func, Args&&... args)
|
||||
{
|
||||
cv::parallel_for_(Range(1, height), MomentsInvoker(func, std::forward<Args>(args)...), static_cast<double>((width - 1) * height) / (1 << 10));
|
||||
return func(0, 1, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename helper> struct rvv;
|
||||
template<> struct rvv<RVV_U32M2>
|
||||
{
|
||||
static inline vuint8mf2_t vid(size_t a) { return __riscv_vid_v_u8mf2(a); }
|
||||
static inline RVV_U32M2::VecType vcvt(vuint8mf2_t a, size_t b) { return __riscv_vzext_vf4(a, b); }
|
||||
};
|
||||
template<> struct rvv<RVV_U32M4>
|
||||
{
|
||||
static inline vuint8m1_t vid(size_t a) { return __riscv_vid_v_u8m1(a); }
|
||||
static inline RVV_U32M4::VecType vcvt(vuint8m1_t a, size_t b) { return __riscv_vzext_vf4(a, b); }
|
||||
};
|
||||
template<> struct rvv<RVV_I32M2>
|
||||
{
|
||||
static inline vuint8mf2_t vid(size_t a) { return __riscv_vid_v_u8mf2(a); }
|
||||
static inline RVV_I32M2::VecType vcvt(vuint8mf2_t a, size_t b) { return RVV_I32M2::reinterpret(__riscv_vzext_vf4(a, b)); }
|
||||
};
|
||||
template<> struct rvv<RVV_F64M4>
|
||||
{
|
||||
static inline vuint8mf2_t vid(size_t a) { return __riscv_vid_v_u8mf2(a); }
|
||||
static inline RVV_F64M4::VecType vcvt(vuint8mf2_t a, size_t b) { return __riscv_vfcvt_f(__riscv_vzext_vf8(a, b), b); }
|
||||
};
|
||||
|
||||
constexpr int TILE_SIZE = 32;
|
||||
|
||||
template<bool binary, typename T, typename helperT, typename helperWT, typename helperMT>
|
||||
static inline int imageMoments(int start, int end, const uchar* src_data, size_t src_step, int full_width, int full_height, double* m, std::mutex* mt)
|
||||
{
|
||||
double mm[10] = {0};
|
||||
for (int yy = start; yy < end; yy++)
|
||||
{
|
||||
const int y = yy * TILE_SIZE;
|
||||
const int height = std::min(TILE_SIZE, full_height - y);
|
||||
for (int x = 0; x < full_width; x += TILE_SIZE)
|
||||
{
|
||||
const int width = std::min(TILE_SIZE, full_width - x);
|
||||
double mom[10] = {0};
|
||||
|
||||
for (int i = 0; i < height; i++)
|
||||
{
|
||||
auto id = rvv<helperWT>::vid(helperT::setvlmax());
|
||||
auto v0 = helperWT::vmv(0, helperWT::setvlmax());
|
||||
auto v1 = helperWT::vmv(0, helperWT::setvlmax());
|
||||
auto v2 = helperWT::vmv(0, helperWT::setvlmax());
|
||||
auto v3 = helperMT::vmv(0, helperMT::setvlmax());
|
||||
|
||||
int vl;
|
||||
for (int j = 0; j < width; j += vl)
|
||||
{
|
||||
vl = helperT::setvl(width - j);
|
||||
typename helperWT::VecType p;
|
||||
if (binary)
|
||||
{
|
||||
auto src = RVV_SameLen<T, helperT>::vload(reinterpret_cast<const T*>(src_data + (i + y) * src_step) + j + x, vl);
|
||||
p = __riscv_vmerge(helperWT::vmv(0, vl), helperWT::vmv(255, vl), RVV_SameLen<T, helperT>::vmne(src, 0, vl), vl);
|
||||
}
|
||||
else
|
||||
{
|
||||
p = helperWT::cast(helperT::vload(reinterpret_cast<const typename helperT::ElemType*>(src_data + (i + y) * src_step) + j + x, vl), vl);
|
||||
}
|
||||
auto xx = rvv<helperWT>::vcvt(id, vl);
|
||||
auto xp = helperWT::vmul(xx, p, vl);
|
||||
v0 = helperWT::vadd_tu(v0, v0, p, vl);
|
||||
v1 = helperWT::vadd_tu(v1, v1, xp, vl);
|
||||
auto xxp = helperWT::vmul(xx, xp, vl);
|
||||
v2 = helperWT::vadd_tu(v2, v2, xxp, vl);
|
||||
v3 = helperMT::vadd_tu(v3, v3, helperMT::vmul(helperMT::cast(xx, vl), helperMT::cast(xxp, vl), vl), vl);
|
||||
id = __riscv_vadd(id, vl, vl);
|
||||
}
|
||||
|
||||
auto x0 = RVV_BaseType<helperWT>::vmv_x(helperWT::vredsum(v0, RVV_BaseType<helperWT>::vmv_s(0, RVV_BaseType<helperWT>::setvlmax()), helperWT::setvlmax()));
|
||||
auto x1 = RVV_BaseType<helperWT>::vmv_x(helperWT::vredsum(v1, RVV_BaseType<helperWT>::vmv_s(0, RVV_BaseType<helperWT>::setvlmax()), helperWT::setvlmax()));
|
||||
auto x2 = RVV_BaseType<helperWT>::vmv_x(helperWT::vredsum(v2, RVV_BaseType<helperWT>::vmv_s(0, RVV_BaseType<helperWT>::setvlmax()), helperWT::setvlmax()));
|
||||
auto x3 = RVV_BaseType<helperMT>::vmv_x(helperMT::vredsum(v3, RVV_BaseType<helperMT>::vmv_s(0, RVV_BaseType<helperMT>::setvlmax()), helperMT::setvlmax()));
|
||||
typename helperWT::ElemType py = i * x0, sy = i*i;
|
||||
|
||||
mom[9] += static_cast<typename helperMT::ElemType>(py) * sy;
|
||||
mom[8] += static_cast<typename helperMT::ElemType>(x1) * sy;
|
||||
mom[7] += static_cast<typename helperMT::ElemType>(x2) * i;
|
||||
mom[6] += x3;
|
||||
mom[5] += x0 * sy;
|
||||
mom[4] += x1 * i;
|
||||
mom[3] += x2;
|
||||
mom[2] += py;
|
||||
mom[1] += x1;
|
||||
mom[0] += x0;
|
||||
}
|
||||
|
||||
if (binary)
|
||||
{
|
||||
mom[0] /= 255, mom[1] /= 255, mom[2] /= 255, mom[3] /= 255, mom[4] /= 255;
|
||||
mom[5] /= 255, mom[6] /= 255, mom[7] /= 255, mom[8] /= 255, mom[9] /= 255;
|
||||
}
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
mm[0] += mom[0];
|
||||
mm[1] += mom[1] + xm;
|
||||
mm[2] += mom[2] + ym;
|
||||
mm[3] += mom[3] + x * (mom[1] * 2 + xm);
|
||||
mm[4] += mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
mm[5] += mom[5] + y * (mom[2] * 2 + ym);
|
||||
mm[6] += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
mm[7] += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
mm[8] += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
mm[9] += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(*mt);
|
||||
for (int i = 0; i < 10; i++)
|
||||
m[i] += mm[i];
|
||||
return CV_HAL_ERROR_OK;
|
||||
}
|
||||
|
||||
// the algorithm is copied from imgproc/src/moments.cpp,
|
||||
// in the function cv::Moments cv::moments
|
||||
inline int imageMoments(const uchar* src_data, size_t src_step, int src_type, int width, int height, bool binary, double m[10])
|
||||
{
|
||||
if (src_type != CV_16UC1 && src_type != CV_16SC1 && src_type != CV_32FC1 && src_type != CV_64FC1)
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
std::fill(m, m + 10, 0);
|
||||
const int cnt = (height + TILE_SIZE - 1) / TILE_SIZE;
|
||||
std::mutex mt;
|
||||
switch (static_cast<int>(binary)*100 + src_type)
|
||||
{
|
||||
case CV_16UC1:
|
||||
return invoke(width, cnt, {imageMoments<false, ushort, RVV_U16M1, RVV_U32M2, RVV_U64M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case CV_16SC1:
|
||||
return invoke(width, cnt, {imageMoments<false, short, RVV_I16M1, RVV_I32M2, RVV_I64M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case CV_32FC1:
|
||||
return invoke(width, cnt, {imageMoments<false, float, RVV_F32M2, RVV_F64M4, RVV_F64M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case CV_64FC1:
|
||||
return invoke(width, cnt, {imageMoments<false, double, RVV_F64M4, RVV_F64M4, RVV_F64M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case 100 + CV_16UC1:
|
||||
return invoke(width, cnt, {imageMoments<true, ushort, RVV_U8M1, RVV_U32M4, RVV_U32M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case 100 + CV_16SC1:
|
||||
return invoke(width, cnt, {imageMoments<true, short, RVV_U8M1, RVV_U32M4, RVV_U32M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case 100 + CV_32FC1:
|
||||
return invoke(width, cnt, {imageMoments<true, float, RVV_U8M1, RVV_U32M4, RVV_U32M4>}, src_data, src_step, width, height, m, &mt);
|
||||
case 100 + CV_64FC1:
|
||||
return invoke(width, cnt, {imageMoments<true, double, RVV_U8M1, RVV_U32M4, RVV_U32M4>}, src_data, src_step, width, height, m, &mt);
|
||||
}
|
||||
|
||||
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
} // cv::cv_hal_rvv::imageMoments
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
39
3rdparty/hal_rvv/hal_rvv_1p0/types.hpp
vendored
39
3rdparty/hal_rvv/hal_rvv_1p0/types.hpp
vendored
|
|
@ -94,7 +94,7 @@ using RVV_F64M8 = struct RVV<double, LMUL_8>;
|
|||
// Only for dst type lmul >= 1
|
||||
template <typename Dst_T, typename RVV_T>
|
||||
using RVV_SameLen =
|
||||
RVV<Dst_T, RVV_LMUL(RVV_T::lmul / sizeof(typename RVV_T::ElemType) * sizeof(Dst_T))>;
|
||||
RVV<Dst_T, RVV_LMUL(RVV_T::lmul * sizeof(Dst_T) / sizeof(typename RVV_T::ElemType))>;
|
||||
|
||||
template <size_t DstSize> struct RVV_ToIntHelper;
|
||||
template <size_t DstSize> struct RVV_ToUintHelper;
|
||||
|
|
@ -117,7 +117,7 @@ using RVV_BaseType = RVV<typename RVV_T::ElemType, LMUL_1>;
|
|||
|
||||
// -------------------------------Supported operations--------------------------------
|
||||
|
||||
#define HAL_RVV_SIZE_RELATED(EEW, TYPE, LMUL, S_OR_F, X_OR_F, IS_U, IS_F) \
|
||||
#define HAL_RVV_SIZE_RELATED(EEW, TYPE, LMUL, S_OR_F, X_OR_F, IS_U, IS_F, IS_O) \
|
||||
static inline size_t setvlmax() { return __riscv_vsetvlmax_e##EEW##LMUL(); } \
|
||||
static inline size_t setvl(size_t vl) { return __riscv_vsetvl_e##EEW##LMUL(vl); } \
|
||||
static inline VecType vload(const ElemType* ptr, size_t vl) { \
|
||||
|
|
@ -153,7 +153,7 @@ static inline VecType vmv_s(ElemType a, size_t vl) {
|
|||
} \
|
||||
HAL_RVV_SIZE_RELATED_CUSTOM(EEW, TYPE, LMUL)
|
||||
|
||||
#define HAL_RVV_SIZE_UNRELATED(S_OR_F, X_OR_F, IS_U, IS_F) \
|
||||
#define HAL_RVV_SIZE_UNRELATED(S_OR_F, X_OR_F, IS_U, IS_F, IS_O) \
|
||||
static inline ElemType vmv_x(VecType vs2) { return __riscv_v##IS_F##mv_##X_OR_F(vs2); } \
|
||||
\
|
||||
static inline BoolType vmlt(VecType vs2, VecType vs1, size_t vl) { \
|
||||
|
|
@ -174,6 +174,12 @@ static inline BoolType vmgt(VecType vs2, ElemType vs1, size_t vl) {
|
|||
static inline BoolType vmge(VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_vm##S_OR_F##ge##IS_U(vs2, vs1, vl); \
|
||||
} \
|
||||
static inline BoolType vmeq(VecType vs2, ElemType vs1, size_t vl) { \
|
||||
return __riscv_vm##S_OR_F##eq(vs2, vs1, vl); \
|
||||
} \
|
||||
static inline BoolType vmne(VecType vs2, ElemType vs1, size_t vl) { \
|
||||
return __riscv_vm##S_OR_F##ne(vs2, vs1, vl); \
|
||||
} \
|
||||
static inline BoolType vmlt_mu(BoolType vm, BoolType vd, VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_vm##S_OR_F##lt##IS_U##_mu(vm, vd, vs2, vs1, vl); \
|
||||
} \
|
||||
|
|
@ -187,6 +193,22 @@ static inline BoolType vmge_mu(BoolType vm, BoolType vd, VecType vs2, VecType vs
|
|||
return __riscv_vm##S_OR_F##ge##IS_U##_mu(vm, vd, vs2, vs1, vl); \
|
||||
} \
|
||||
\
|
||||
static inline VecType vadd(VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##add(vs2, vs1, vl); \
|
||||
} \
|
||||
static inline VecType vsub(VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##sub(vs2, vs1, vl); \
|
||||
} \
|
||||
static inline VecType vadd_tu(VecType vd, VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##add_tu(vd, vs2, vs1, vl); \
|
||||
} \
|
||||
static inline VecType vsub_tu(VecType vd, VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##sub_tu(vd, vs2, vs1, vl); \
|
||||
} \
|
||||
static inline VecType vmul(VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##mul(vs2, vs1, vl); \
|
||||
} \
|
||||
\
|
||||
static inline VecType vmin(VecType vs2, VecType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##min##IS_U(vs2, vs1, vl); \
|
||||
} \
|
||||
|
|
@ -211,9 +233,12 @@ static inline BaseType vredmin(VecType vs2, BaseType vs1, size_t vl) {
|
|||
} \
|
||||
static inline BaseType vredmax(VecType vs2, BaseType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##redmax##IS_U(vs2, vs1, vl); \
|
||||
} \
|
||||
static inline BaseType vredsum(VecType vs2, BaseType vs1, size_t vl) { \
|
||||
return __riscv_v##IS_F##red##IS_O##sum(vs2, vs1, vl); \
|
||||
}
|
||||
|
||||
#define HAL_RVV_BOOL_TYPE(S_OR_F, X_OR_F, IS_U, IS_F) \
|
||||
#define HAL_RVV_BOOL_TYPE(S_OR_F, X_OR_F, IS_U, IS_F, IS_O) \
|
||||
decltype(__riscv_vm##S_OR_F##eq(std::declval<VecType>(), std::declval<VecType>(), 0))
|
||||
|
||||
#define HAL_RVV_DEFINE_ONE(ELEM_TYPE, VEC_TYPE, LMUL_TYPE, \
|
||||
|
|
@ -259,9 +284,9 @@ static inline BaseType vredmax(VecType vs2, BaseType vs1, size_t vl) {
|
|||
HAL_RVV_DEFINE_ONE(ELEM_TYPE, VEC_TYPE, LMUL_8, \
|
||||
EEW, TYPE, m8, __VA_ARGS__)
|
||||
|
||||
#define HAL_RVV_SIGNED_PARAM s,x, ,
|
||||
#define HAL_RVV_UNSIGNED_PARAM s,x,u,
|
||||
#define HAL_RVV_FLOAT_PARAM f,f, ,f
|
||||
#define HAL_RVV_SIGNED_PARAM s,x, , ,
|
||||
#define HAL_RVV_UNSIGNED_PARAM s,x,u, ,
|
||||
#define HAL_RVV_FLOAT_PARAM f,f, ,f,o
|
||||
|
||||
// -------------------------------Define Unsigned Integer--------------------------------
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user