diff --git a/hal/riscv-rvv/src/core/copy_mask.cpp b/hal/riscv-rvv/src/core/copy_mask.cpp index 8bde41759a..12a211af5b 100644 --- a/hal/riscv-rvv/src/core/copy_mask.cpp +++ b/hal/riscv-rvv/src/core/copy_mask.cpp @@ -98,7 +98,7 @@ static int copyToMasked_e64c4(const uchar *src_data, size_t src_step, using CopyToMaskedFunc = int (*)(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int); int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int type, const uchar *mask_data, size_t mask_step, int mask_type) { - int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + int cn = CV_MAT_CN(type); int mdepth = CV_MAT_DEPTH(mask_type), mcn = CV_MAT_CN(mask_type); if (mcn > 1 || mdepth != CV_8U) { @@ -121,7 +121,7 @@ int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t return CV_HAL_ERROR_NOT_IMPLEMENTED; } - int elem_size1 = CV_ELEM_SIZE1(type); + size_t elem_size1 = static_cast(CV_ELEM_SIZE1(type)); bool src_continuous = (src_step == width * elem_size1 * cn || (src_step != width * elem_size1 * cn && height == 1)); bool dst_continuous = (dst_step == width * elem_size1 * cn || (dst_step != width * elem_size1 * cn && height == 1)); bool mask_continuous = (mask_step == static_cast(width)); diff --git a/hal/riscv-rvv/src/core/dotprod.cpp b/hal/riscv-rvv/src/core/dotprod.cpp index 6c896a59e1..11a44697de 100644 --- a/hal/riscv-rvv/src/core/dotprod.cpp +++ b/hal/riscv-rvv/src/core/dotprod.cpp @@ -190,7 +190,7 @@ int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size_t b_st return CV_HAL_ERROR_NOT_IMPLEMENTED; } - int elem_size1 = CV_ELEM_SIZE1(type); + size_t elem_size1 = static_cast(CV_ELEM_SIZE1(type)); bool a_continuous = (a_step == width * elem_size1 * cn); bool b_continuous = (b_step == width * elem_size1 * cn); size_t nplanes = 1; diff --git a/hal/riscv-rvv/src/core/norm.cpp b/hal/riscv-rvv/src/core/norm.cpp index e07b34049a..b588b1b77c 100644 --- a/hal/riscv-rvv/src/core/norm.cpp +++ b/hal/riscv-rvv/src/core/norm.cpp @@ -999,7 +999,7 @@ int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, }, }; - int elem_size1 = CV_ELEM_SIZE1(type); + size_t elem_size1 = static_cast(CV_ELEM_SIZE1(type)); bool src_continuous = (src_step == width * elem_size1 * cn || (src_step != width * elem_size1 * cn && height == 1)); bool mask_continuous = (mask_step == static_cast(width)); size_t nplanes = 1; diff --git a/hal/riscv-rvv/src/core/norm_diff.cpp b/hal/riscv-rvv/src/core/norm_diff.cpp index adf2555694..918ce04f99 100644 --- a/hal/riscv-rvv/src/core/norm_diff.cpp +++ b/hal/riscv-rvv/src/core/norm_diff.cpp @@ -1111,7 +1111,7 @@ int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2 }, }; - int elem_size1 = CV_ELEM_SIZE1(type); + size_t elem_size1 = static_cast(CV_ELEM_SIZE1(type)); bool src_continuous = (src1_step == width * elem_size1 * cn || (src1_step != width * elem_size1 * cn && height == 1)); src_continuous &= (src2_step == width * elem_size1 * cn || (src2_step != width * elem_size1 * cn && height == 1)); bool mask_continuous = (mask_step == static_cast(width)); diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 9d5eb56b45..44b41a7673 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -24,6 +24,17 @@ void test_hal_intrin_float16(); //================================================================================================== +#if defined (__GNUC__) && defined(__has_warning) + #if __has_warning("-Wmaybe-uninitialized") + #define CV_DISABLE_GCC_MAYBE_UNINITIALIZED_WARNINGS + #endif +#endif + +#if defined (CV_DISABLE_GCC_MAYBE_UNINITIALIZED_WARNINGS) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + template struct Data { typedef typename VTraits::lane_type LaneType; @@ -2409,6 +2420,10 @@ void test_hal_intrin_float16() } #endif*/ +#if defined (CV_DISABLE_GCC_MAYBE_UNINITIALIZED_WARNINGS) +#pragma GCC diagnostic pop +#endif + #endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY //CV_CPU_OPTIMIZATION_NAMESPACE_END diff --git a/modules/dnn/src/layers/cpu_kernels/softmax.cpp b/modules/dnn/src/layers/cpu_kernels/softmax.cpp index fd55c1c1de..3b670232cc 100644 --- a/modules/dnn/src/layers/cpu_kernels/softmax.cpp +++ b/modules/dnn/src/layers/cpu_kernels/softmax.cpp @@ -37,8 +37,6 @@ void softmax(Mat &dst, const Mat &src, int axis, int axisBias, int axisStep){ #if (CV_SIMD || CV_SIMD_SCALABLE) const int nlanes = VTraits::vlanes(); - // the number of redundant dimension - size_t redundantDim = nlanes - axisStep % nlanes; #endif parallel_for_(Range(0, (int) totalTasks), [&](const Range &range) { @@ -50,61 +48,55 @@ void softmax(Mat &dst, const Mat &src, int axis, int axisBias, int axisStep){ size_t innerDim = i % innerSize; size_t srcOffset = outerDim * outerStep + innerDim; // copy data from src to buf along axis, since the data may not be continuous - for (size_t cnDim = 0; cnDim < axisStep; cnDim++) - axisBuf[cnDim] = srcPtr[srcOffset + (cnDim + axisBias) * cnStep]; + for (size_t _cnDim = 0; _cnDim < axisStep; _cnDim++) + axisBuf[_cnDim] = srcPtr[srcOffset + (_cnDim + axisBias) * cnStep]; - float s = 0.f; + float maxVal = -FLT_MAX; + int cnDim = 0; #if (CV_SIMD || CV_SIMD_SCALABLE) - // make the value of the redundant dimension to be -FLT_MAX - if (redundantDim != nlanes) { - for (size_t j = axisStep; j < axisStep + redundantDim; j++) - axisBuf[j] = -FLT_MAX; - } // calculate the max value along the axis - v_float32 vmax = vx_load(axisBuf); - for (size_t cnDim = nlanes; cnDim < axisStep; cnDim += nlanes) { + v_float32 vmax = vx_setall_f32(-FLT_MAX); + for (; cnDim < axisStep; cnDim += nlanes) { + if (cnDim > axisStep - nlanes) { + if (cnDim == 0) { break; } + cnDim = axisStep - nlanes; + } v_float32 val = vx_load(axisBuf + cnDim); vmax = v_max(vmax, val); } - float maxVal = v_reduce_max(vmax); + maxVal = v_reduce_max(vmax); +#endif + for (; cnDim < axisStep; cnDim++) { + maxVal = std::max(maxVal, axisBuf[cnDim]); + } + float s = 0.f; + cnDim = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) // calculate the exp value along the axis v_float32 vs = vx_setzero_f32(); vmax = vx_setall_f32(maxVal); - v_float32 val; // calculate and sum all data along axis - for (size_t cnDim = 0; cnDim < axisStep; cnDim += nlanes) { - val = vx_load(axisBuf + cnDim); + for (; cnDim <= axisStep - nlanes; cnDim += nlanes) { + // cannot apply halide trick here due to axisBuf is constantly updated + v_float32 val = vx_load(axisBuf + cnDim); val = v_sub(val, vmax); val = v_exp(val); - vs = v_add(vs, val); v_store(axisBuf + cnDim, val); } - s = v_reduce_sum(vs); - // subtract the value of the redundant dimension - if (redundantDim != nlanes) { - float _val[VTraits::max_nlanes]; - v_store(_val, val); - for (size_t j = nlanes - redundantDim; j < nlanes; j++) - s -= _val[j]; - } -#else - float maxVal = axisBuf[0]; - for (size_t cnDim = 1; cnDim < axisStep; cnDim++) { - maxVal = std::max(maxVal, axisBuf[cnDim]); - } - for (size_t j = 0; j < axisStep; j++) { - axisBuf[j] = expf(axisBuf[j] - maxVal); - s += axisBuf[j]; - } #endif + for (; cnDim < axisStep; cnDim++) { + axisBuf[cnDim] = expf(axisBuf[cnDim] - maxVal); + s += axisBuf[cnDim]; + } + s = 1.f / s; // copy back the result to src - for (size_t cnDim = 0; cnDim < axisStep; cnDim++) - dstPtr[srcOffset + (cnDim + axisBias) * cnStep] = axisBuf[cnDim] * s; + for (size_t _cnDim = 0; _cnDim < axisStep; _cnDim++) + dstPtr[srcOffset + (_cnDim + axisBias) * cnStep] = axisBuf[_cnDim] * s; } }, nstripes); } diff --git a/modules/imgproc/src/color_hsv.simd.hpp b/modules/imgproc/src/color_hsv.simd.hpp index c450d609e5..8ae663dff4 100644 --- a/modules/imgproc/src/color_hsv.simd.hpp +++ b/modules/imgproc/src/color_hsv.simd.hpp @@ -850,7 +850,7 @@ struct RGB2HLS_b for ( ; j <= dn*bufChannels - nBlock*bufChannels; j += nBlock*bufChannels, src += nBlock*4) { - v_uint8 rgb0, rgb1, rgb2, rgb3, dummy; + v_uint8 rgb0, rgb1, rgb2, dummy; v_load_deinterleave(src, rgb0, rgb1, rgb2, dummy); v_uint16 d0,d1,d2,d3,d4,d5; diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index c5e1ced02a..b106def4bb 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -769,7 +769,6 @@ template <> int PyrUpVecVOneRow(int** src, uchar* dst, int width) r20 = *(row2 + x); int _2r10 = r10 + r10; int d = r00 + r20 + (_2r10 + _2r10 + _2r10); - int d_shifted = (r10 + r20) << 2; // Similar to v_rshr_pack_u<6>(d, vx_setzero_s16()).get0() *(dst + x) = (int)((((unsigned int)d) + ((1 << (6 - 1)))) >> 6); }