diff --git a/hal/riscv-rvv/src/core/copy_mask.cpp b/hal/riscv-rvv/src/core/copy_mask.cpp
index 8bde41759a..12a211af5b 100644
--- a/hal/riscv-rvv/src/core/copy_mask.cpp
+++ b/hal/riscv-rvv/src/core/copy_mask.cpp
@@ -98,7 +98,7 @@ static int copyToMasked_e64c4(const uchar *src_data, size_t src_step,
 using CopyToMaskedFunc = int (*)(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int);
 int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height,
                  int type, const uchar *mask_data, size_t mask_step, int mask_type) {
-    int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    int cn = CV_MAT_CN(type);
     int mdepth = CV_MAT_DEPTH(mask_type), mcn = CV_MAT_CN(mask_type);
 
     if (mcn > 1 || mdepth != CV_8U) {
@@ -121,7 +121,7 @@ int copyToMasked(const uchar *src_data, size_t src_step, uchar *dst_data, size_t
         return CV_HAL_ERROR_NOT_IMPLEMENTED;
     }
 
-    int elem_size1 = CV_ELEM_SIZE1(type);
+    size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
     bool src_continuous = (src_step == width * elem_size1 * cn || (src_step != width * elem_size1 * cn && height == 1));
     bool dst_continuous = (dst_step == width * elem_size1 * cn || (dst_step != width * elem_size1 * cn && height == 1));
     bool mask_continuous = (mask_step == static_cast<size_t>(width));
diff --git a/hal/riscv-rvv/src/core/dotprod.cpp b/hal/riscv-rvv/src/core/dotprod.cpp
index 6c896a59e1..11a44697de 100644
--- a/hal/riscv-rvv/src/core/dotprod.cpp
+++ b/hal/riscv-rvv/src/core/dotprod.cpp
@@ -190,7 +190,7 @@ int dotprod(const uchar *a_data, size_t a_step, const uchar *b_data, size_t b_st
         return CV_HAL_ERROR_NOT_IMPLEMENTED;
     }
 
-    int elem_size1 = CV_ELEM_SIZE1(type);
+    size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
     bool a_continuous = (a_step == width * elem_size1 * cn);
     bool b_continuous = (b_step == width * elem_size1 * cn);
     size_t nplanes = 1;
diff --git a/hal/riscv-rvv/src/core/norm.cpp b/hal/riscv-rvv/src/core/norm.cpp
index e07b34049a..b588b1b77c 100644
--- a/hal/riscv-rvv/src/core/norm.cpp
+++ b/hal/riscv-rvv/src/core/norm.cpp
@@ -999,7 +999,7 @@ int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step,
         },
     };
 
-    int elem_size1 = CV_ELEM_SIZE1(type);
+    size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
     bool src_continuous = (src_step == width * elem_size1 * cn || (src_step != width * elem_size1 * cn && height == 1));
     bool mask_continuous = (mask_step == static_cast<size_t>(width));
     size_t nplanes = 1;
diff --git a/hal/riscv-rvv/src/core/norm_diff.cpp b/hal/riscv-rvv/src/core/norm_diff.cpp
index adf2555694..918ce04f99 100644
--- a/hal/riscv-rvv/src/core/norm_diff.cpp
+++ b/hal/riscv-rvv/src/core/norm_diff.cpp
@@ -1111,7 +1111,7 @@ int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2
         },
     };
 
-    int elem_size1 = CV_ELEM_SIZE1(type);
+    size_t elem_size1 = static_cast<size_t>(CV_ELEM_SIZE1(type));
     bool src_continuous = (src1_step == width * elem_size1 * cn || (src1_step != width * elem_size1 * cn && height == 1));
     src_continuous &= (src2_step == width * elem_size1 * cn || (src2_step != width * elem_size1 * cn && height == 1));
     bool mask_continuous = (mask_step == static_cast<size_t>(width));
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 9d5eb56b45..44b41a7673 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -24,6 +24,17 @@ void test_hal_intrin_float16();
 
 //==================================================================================================
 
+#if defined (__GNUC__) && defined(__has_warning)
+    #if __has_warning("-Wmaybe-uninitialized")
+        #define CV_DISABLE_GCC_MAYBE_UNINITIALIZED_WARNINGS
+    #endif
+#endif
+
+#if defined (CV_DISABLE_GCC_MAYBE_UNINITIALIZED_WARNINGS)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
 template <typename R> struct Data
 {
     typedef typename VTraits<R>::lane_type LaneType;
@@ -2409,6 +2420,10 @@ void test_hal_intrin_float16()
 }
 #endif*/
 
+#if defined (CV_DISABLE_GCC_MAYBE_UNINITIALIZED_WARNINGS)
+#pragma GCC diagnostic pop
+#endif
+
 #endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
 //CV_CPU_OPTIMIZATION_NAMESPACE_END
diff --git a/modules/dnn/src/layers/cpu_kernels/softmax.cpp b/modules/dnn/src/layers/cpu_kernels/softmax.cpp
index fd55c1c1de..3b670232cc 100644
--- a/modules/dnn/src/layers/cpu_kernels/softmax.cpp
+++ b/modules/dnn/src/layers/cpu_kernels/softmax.cpp
@@ -37,8 +37,6 @@ void softmax(Mat &dst, const Mat &src, int axis, int axisBias, int axisStep){
 
 #if (CV_SIMD || CV_SIMD_SCALABLE)
     const int nlanes = VTraits<v_float32>::vlanes();
-    // the number of redundant dimension
-    size_t redundantDim = nlanes - axisStep % nlanes;
 #endif
 
     parallel_for_(Range(0, (int) totalTasks), [&](const Range &range) {
@@ -50,61 +48,55 @@ void softmax(Mat &dst, const Mat &src, int axis, int axisBias, int axisStep){
             size_t innerDim = i % innerSize;
             size_t srcOffset = outerDim * outerStep + innerDim;
             // copy data from src to buf along axis, since the data may not be continuous
-            for (size_t cnDim = 0; cnDim < axisStep; cnDim++)
-                axisBuf[cnDim] = srcPtr[srcOffset + (cnDim + axisBias) * cnStep];
+            for (size_t _cnDim = 0; _cnDim < axisStep; _cnDim++)
+                axisBuf[_cnDim] = srcPtr[srcOffset + (_cnDim + axisBias) * cnStep];
 
-            float s = 0.f;
+            float maxVal = -FLT_MAX;
+            int cnDim = 0;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
-            // make the value of the redundant dimension to be -FLT_MAX
-            if (redundantDim != nlanes) {
-                for (size_t j = axisStep; j < axisStep + redundantDim; j++)
-                    axisBuf[j] = -FLT_MAX;
-            }
             // calculate the max value along the axis
-            v_float32 vmax = vx_load(axisBuf);
-            for (size_t cnDim = nlanes; cnDim < axisStep; cnDim += nlanes) {
+            v_float32 vmax = vx_setall_f32(-FLT_MAX);
+            for (; cnDim < axisStep; cnDim += nlanes) {
+                if (cnDim > axisStep - nlanes) {
+                    if (cnDim == 0) { break; }
+                    cnDim = axisStep - nlanes;
+                }
                 v_float32 val = vx_load(axisBuf + cnDim);
                 vmax = v_max(vmax, val);
             }
-            float maxVal = v_reduce_max(vmax);
+            maxVal = v_reduce_max(vmax);
+#endif
+            for (; cnDim < axisStep; cnDim++) {
+                maxVal = std::max(maxVal, axisBuf[cnDim]);
+            }
 
+            float s = 0.f;
+            cnDim = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
             // calculate the exp value along the axis
             v_float32 vs = vx_setzero_f32();
             vmax = vx_setall_f32(maxVal);
-            v_float32 val;
             // calculate and sum all data along axis
-            for (size_t cnDim = 0; cnDim < axisStep; cnDim += nlanes) {
-                val = vx_load(axisBuf + cnDim);
+            for (; cnDim <= axisStep - nlanes; cnDim += nlanes) {
+                // cannot apply halide trick here due to axisBuf is constantly updated
+                v_float32 val = vx_load(axisBuf + cnDim);
                 val = v_sub(val, vmax);
                 val = v_exp(val);
-
                 vs = v_add(vs, val);
                 v_store(axisBuf + cnDim, val);
             }
-
             s = v_reduce_sum(vs);
-            // subtract the value of the redundant dimension
-            if (redundantDim != nlanes) {
-                float _val[VTraits<v_float32>::max_nlanes];
-                v_store(_val, val);
-                for (size_t j = nlanes - redundantDim; j < nlanes; j++)
-                    s -= _val[j];
-            }
-#else
-            float maxVal = axisBuf[0];
-            for (size_t cnDim = 1; cnDim < axisStep; cnDim++) {
-                maxVal = std::max(maxVal, axisBuf[cnDim]);
-            }
-            for (size_t j = 0; j < axisStep; j++) {
-                axisBuf[j] = expf(axisBuf[j] - maxVal);
-                s += axisBuf[j];
-            }
 #endif
+            for (; cnDim < axisStep; cnDim++) {
+                axisBuf[cnDim] = expf(axisBuf[cnDim] - maxVal);
+                s += axisBuf[cnDim];
+            }
+
             s = 1.f / s;
 
             // copy back the result to src
-            for (size_t cnDim = 0; cnDim < axisStep; cnDim++)
-                dstPtr[srcOffset + (cnDim + axisBias) * cnStep] = axisBuf[cnDim] * s;
+            for (size_t _cnDim = 0; _cnDim < axisStep; _cnDim++)
+                dstPtr[srcOffset + (_cnDim + axisBias) * cnStep] = axisBuf[_cnDim] * s;
         }
     }, nstripes);
 }
diff --git a/modules/imgproc/src/color_hsv.simd.hpp b/modules/imgproc/src/color_hsv.simd.hpp
index c450d609e5..8ae663dff4 100644
--- a/modules/imgproc/src/color_hsv.simd.hpp
+++ b/modules/imgproc/src/color_hsv.simd.hpp
@@ -850,7 +850,7 @@ struct RGB2HLS_b
                 for ( ; j <= dn*bufChannels - nBlock*bufChannels;
                       j += nBlock*bufChannels, src += nBlock*4)
                 {
-                    v_uint8 rgb0, rgb1, rgb2, rgb3, dummy;
+                    v_uint8 rgb0, rgb1, rgb2, dummy;
                     v_load_deinterleave(src, rgb0, rgb1, rgb2, dummy);
 
                     v_uint16 d0,d1,d2,d3,d4,d5;
diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp
index c5e1ced02a..b106def4bb 100644
--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@@ -769,7 +769,6 @@ template <> int PyrUpVecVOneRow<int, uchar>(int** src, uchar* dst, int width)
             r20 = *(row2 + x);
         int _2r10 = r10 + r10;
         int d = r00 + r20 + (_2r10 + _2r10 + _2r10);
-        int d_shifted = (r10 + r20) << 2;
         // Similar to v_rshr_pack_u<6>(d, vx_setzero_s16()).get0()
         *(dst + x) = (int)((((unsigned int)d) + ((1 << (6 - 1)))) >> 6);
     }