core: add ARM64 NEON support for cvRound in fast_math.hpp

2025-12-06 00:19:46 +01:00 · 2025-07-23 14:32:30 +05:30 · 2025-07-23 14:32:30 +05:30 · 6efca656b8
commit 6efca656b8
parent 17d94277f0
1 changed files with 8 additions and 0 deletions
--- a/modules/core/include/opencv2/core/fast_math.hpp
+++ b/modules/core/include/opencv2/core/fast_math.hpp
@ -201,6 +201,10 @@ cvRound( double value )
 {
 #if defined CV_INLINE_ROUND_DBL
    CV_INLINE_ROUND_DBL(value);
+#elif defined _MSC_VER && defined _M_ARM64
+    float64x1_t v = vdup_n_f64(value);
+    int64x1_t r = vcvtn_s64_f64(v);
+    return static_cast<int>(vget_lane_s64(r, 0));
 #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
    __m128d t = _mm_set_sd( value );
    return _mm_cvtsd_si32(t);
@ -323,6 +327,10 @@ CV_INLINE int cvRound(float value)
 {
 #if defined CV_INLINE_ROUND_FLT
    CV_INLINE_ROUND_FLT(value);
+#elif defined _MSC_VER && defined _M_ARM64
+    float32x2_t v = vdup_n_f32(value);
+    int32x2_t r = vcvtn_s32_f32(v);
+    return vget_lane_s32(r, 0);
 #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
    __m128 t = _mm_set_ss( value );
    return _mm_cvtss_si32(t);