Merge pull request #27385 from CodeLinaro:doc_update

Updating doc markdown to include API in FastCV gemm HAL
Merge pull request #27149 from liane-lin:4.x
2025-12-06 00:19:46 +01:00 · 2025-05-30 15:24:15 +03:00 · 2025-05-30 15:20:01 +03:00 · 2025-05-30 17:01:07 +05:30 · 2025-05-28 16:06:42 +03:00 · 2025-05-28 15:43:04 +05:30
674 changed files with 31150 additions and 25279 deletions
--- a/.github/workflows/PR-4.x.yaml
+++ b/.github/workflows/PR-4.x.yaml
@ -46,9 +46,6 @@ jobs:
  Android-SDK:
    uses: opencv/ci-gha-workflow/.github/workflows/OCV-4.x-Android-SDK.yaml@main

-  Android-Test:
-    uses: opencv/ci-gha-workflow/.github/workflows/OCV-PR-4.x-Android-Test.yaml@main
-
  TIM-VX:
    uses: opencv/ci-gha-workflow/.github/workflows/OCV-timvx-backend-tests-4.x.yml@main

--- a/3rdparty/fastcv/fastcv.cmake
+++ b/3rdparty/fastcv/fastcv.cmake
@ -1,23 +1,23 @@
 function(download_fastcv root_dir)

  # Commit SHA in the opencv_3rdparty repo
-  set(FASTCV_COMMIT "f4413cc2ab7233fdfc383a4cded402c072677fb0")
+  set(FASTCV_COMMIT "abe340d0fb7f19fa9315080e3c8616642e98a296")

  # Define actual FastCV versions
  if(ANDROID)
    if(AARCH64)
      message(STATUS "Download FastCV for Android aarch64")
-      set(FCV_PACKAGE_NAME  "fastcv_android_aarch64_2024_12_11.tgz")
-      set(FCV_PACKAGE_HASH  "9dac41e86597305f846212dae31a4a88")
+      set(FCV_PACKAGE_NAME  "fastcv_android_aarch64_2025_04_29.tgz")
+      set(FCV_PACKAGE_HASH  "d9172a9a3e5d92d080a4192cc5691001")
    else()
      message(STATUS "Download FastCV for Android armv7")
-      set(FCV_PACKAGE_NAME  "fastcv_android_arm32_2024_12_11.tgz")
-      set(FCV_PACKAGE_HASH  "fe2d30334180b17e3031eee92aac43b6")
+      set(FCV_PACKAGE_NAME  "fastcv_android_arm32_2025_04_29.tgz")
+      set(FCV_PACKAGE_HASH  "246b5253233391cd2c74d01d49aee9c3")
    endif()
  elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
    if(AARCH64)
-      set(FCV_PACKAGE_NAME  "fastcv_linux_aarch64_2025_02_12.tgz")
-      set(FCV_PACKAGE_HASH  "33ac2a59cf3e7d6402eee2e010de1202")
+      set(FCV_PACKAGE_NAME  "fastcv_linux_aarch64_2025_04_29.tgz")
+      set(FCV_PACKAGE_HASH  "e2ce60e25c8e4113a7af2bd243118f4c")
    else()
      message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
    endif()
--- a/3rdparty/hal_rvv/CMakeLists.txt
+++ b/3rdparty/hal_rvv/CMakeLists.txt
@ -1,9 +0,0 @@
-cmake_minimum_required(VERSION ${MIN_VER_CMAKE} FATAL_ERROR)
-
-set(HAL_LIB_NAME "")
-
-set(RVV_HAL_FOUND TRUE CACHE INTERNAL "")
-set(RVV_HAL_VERSION "0.0.1" CACHE INTERNAL "")
-set(RVV_HAL_LIBRARIES ${HAL_LIB_NAME} CACHE INTERNAL "")
-set(RVV_HAL_HEADERS "hal_rvv.hpp" CACHE INTERNAL "")
-set(RVV_HAL_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "")
--- a/3rdparty/hal_rvv/hal_rvv.hpp
+++ b/3rdparty/hal_rvv/hal_rvv.hpp
@ -1,33 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-#ifndef OPENCV_HAL_RVV_HPP_INCLUDED
-#define OPENCV_HAL_RVV_HPP_INCLUDED
-
-#include "opencv2/core/hal/interface.h"
-
-#ifndef CV_HAL_RVV_071_ENABLED
-#  if defined(__GNUC__) && __GNUC__ == 10 && __GNUC_MINOR__ == 4 && defined(__THEAD_VERSION__) && defined(__riscv_v) && __riscv_v == 7000
-#    define CV_HAL_RVV_071_ENABLED 1
-#  else
-#    define CV_HAL_RVV_071_ENABLED 0
-#  endif
-#endif
-
-#if CV_HAL_RVV_071_ENABLED
-#include "version/hal_rvv_071.hpp"
-#endif
-
-#if defined(__riscv_v) && __riscv_v == 1000000
-#include "hal_rvv_1p0/merge.hpp" // core
-#include "hal_rvv_1p0/mean.hpp" // core
-#include "hal_rvv_1p0/norm.hpp" // core
-#include "hal_rvv_1p0/norm_diff.hpp" // core
-#include "hal_rvv_1p0/convert_scale.hpp" // core
-#include "hal_rvv_1p0/minmax.hpp" // core
-#include "hal_rvv_1p0/atan.hpp" // core
-#include "hal_rvv_1p0/split.hpp" // core
-#endif
-
-#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/atan.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/atan.hpp
@ -1,128 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level
-// directory of this distribution and at http://opencv.org/license.html.
-#pragma once
-
-#undef cv_hal_fastAtan32f
-#define cv_hal_fastAtan32f cv::cv_hal_rvv::fast_atan_32
-
-#undef cv_hal_fastAtan64f
-#define cv_hal_fastAtan64f cv::cv_hal_rvv::fast_atan_64
-
-#include <riscv_vector.h>
-
-#include <cfloat>
-
-namespace cv::cv_hal_rvv {
-
-namespace detail {
-// ref: mathfuncs_core.simd.hpp
-static constexpr float pi = CV_PI;
-static constexpr float atan2_p1 = 0.9997878412794807F * (180 / pi);
-static constexpr float atan2_p3 = -0.3258083974640975F * (180 / pi);
-static constexpr float atan2_p5 = 0.1555786518463281F * (180 / pi);
-static constexpr float atan2_p7 = -0.04432655554792128F * (180 / pi);
-
-__attribute__((always_inline)) inline vfloat32m4_t
-rvv_atan_f32(vfloat32m4_t vy, vfloat32m4_t vx, size_t vl, float p7,
-             vfloat32m4_t vp5, vfloat32m4_t vp3, vfloat32m4_t vp1,
-             float angle_90_deg) {
-    const auto ax = __riscv_vfabs(vx, vl);
-    const auto ay = __riscv_vfabs(vy, vl);
-    const auto c = __riscv_vfdiv(
-        __riscv_vfmin(ax, ay, vl),
-        __riscv_vfadd(__riscv_vfmax(ax, ay, vl), FLT_EPSILON, vl), vl);
-    const auto c2 = __riscv_vfmul(c, c, vl);
-
-    auto a = __riscv_vfmadd(c2, p7, vp5, vl);
-    a = __riscv_vfmadd(a, c2, vp3, vl);
-    a = __riscv_vfmadd(a, c2, vp1, vl);
-    a = __riscv_vfmul(a, c, vl);
-
-    const auto mask = __riscv_vmflt(ax, ay, vl);
-    a = __riscv_vfrsub_mu(mask, a, a, angle_90_deg, vl);
-
-    a = __riscv_vfrsub_mu(__riscv_vmflt(vx, 0.F, vl), a, a, angle_90_deg * 2,
-                          vl);
-    a = __riscv_vfrsub_mu(__riscv_vmflt(vy, 0.F, vl), a, a, angle_90_deg * 4,
-                          vl);
-
-    return a;
-}
-
-} // namespace detail
-
-inline int fast_atan_32(const float *y, const float *x, float *dst, size_t n,
-                        bool angle_in_deg) {
-    const float scale = angle_in_deg ? 1.f : CV_PI / 180.f;
-    const float p1 = detail::atan2_p1 * scale;
-    const float p3 = detail::atan2_p3 * scale;
-    const float p5 = detail::atan2_p5 * scale;
-    const float p7 = detail::atan2_p7 * scale;
-    const float angle_90_deg = 90.F * scale;
-
-    static size_t vlmax = __riscv_vsetvlmax_e32m4();
-    auto vp1 = __riscv_vfmv_v_f_f32m4(p1, vlmax);
-    auto vp3 = __riscv_vfmv_v_f_f32m4(p3, vlmax);
-    auto vp5 = __riscv_vfmv_v_f_f32m4(p5, vlmax);
-
-    for (size_t vl{}; n > 0; n -= vl) {
-        vl = __riscv_vsetvl_e32m4(n);
-
-        auto vy = __riscv_vle32_v_f32m4(y, vl);
-        auto vx = __riscv_vle32_v_f32m4(x, vl);
-
-        auto a =
-            detail::rvv_atan_f32(vy, vx, vl, p7, vp5, vp3, vp1, angle_90_deg);
-
-        __riscv_vse32(dst, a, vl);
-
-        x += vl;
-        y += vl;
-        dst += vl;
-    }
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int fast_atan_64(const double *y, const double *x, double *dst, size_t n,
-                        bool angle_in_deg) {
-    // this also uses float32 version, ref: mathfuncs_core.simd.hpp
-
-    const float scale = angle_in_deg ? 1.f : CV_PI / 180.f;
-    const float p1 = detail::atan2_p1 * scale;
-    const float p3 = detail::atan2_p3 * scale;
-    const float p5 = detail::atan2_p5 * scale;
-    const float p7 = detail::atan2_p7 * scale;
-    const float angle_90_deg = 90.F * scale;
-
-    static size_t vlmax = __riscv_vsetvlmax_e32m4();
-    auto vp1 = __riscv_vfmv_v_f_f32m4(p1, vlmax);
-    auto vp3 = __riscv_vfmv_v_f_f32m4(p3, vlmax);
-    auto vp5 = __riscv_vfmv_v_f_f32m4(p5, vlmax);
-
-    for (size_t vl{}; n > 0; n -= vl) {
-        vl = __riscv_vsetvl_e64m8(n);
-
-        auto wy = __riscv_vle64_v_f64m8(y, vl);
-        auto wx = __riscv_vle64_v_f64m8(x, vl);
-
-        auto vy = __riscv_vfncvt_f_f_w_f32m4(wy, vl);
-        auto vx = __riscv_vfncvt_f_f_w_f32m4(wx, vl);
-
-        auto a =
-            detail::rvv_atan_f32(vy, vx, vl, p7, vp5, vp3, vp1, angle_90_deg);
-
-        auto wa = __riscv_vfwcvt_f_f_v_f64m8(a, vl);
-
-        __riscv_vse64(dst, wa, vl);
-
-        x += vl;
-        y += vl;
-        dst += vl;
-    }
-
-    return CV_HAL_ERROR_OK;
-}
-
-} // namespace cv::cv_hal_rvv
--- a/3rdparty/hal_rvv/hal_rvv_1p0/merge.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/merge.hpp
@ -1,397 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-#ifndef OPENCV_HAL_RVV_MERGE_HPP_INCLUDED
-#define OPENCV_HAL_RVV_MERGE_HPP_INCLUDED
-
-#include <riscv_vector.h>
-
-namespace cv { namespace cv_hal_rvv {
-
-#undef cv_hal_merge8u
-#define cv_hal_merge8u cv::cv_hal_rvv::merge8u
-#undef cv_hal_merge16u
-#define cv_hal_merge16u cv::cv_hal_rvv::merge16u
-#undef cv_hal_merge32s
-#define cv_hal_merge32s cv::cv_hal_rvv::merge32s
-#undef cv_hal_merge64s
-#define cv_hal_merge64s cv::cv_hal_rvv::merge64s
-
-#if defined __GNUC__
-__attribute__((optimize("no-tree-vectorize")))
-#endif
-inline int merge8u(const uchar** src, uchar* dst, int len, int cn ) {
-    int k = cn % 4 ? cn % 4 : 4;
-    int i = 0;
-    int vl = __riscv_vsetvlmax_e8m1();
-    if( k == 1 )
-    {
-        const uchar* src0 = src[0];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*cn, a, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++)
-            dst[i*cn] = src0[i];
-    }
-    else if( k == 2 )
-    {
-        const uchar *src0 = src[0], *src1 = src[1];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
-            auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*cn, a, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*cn, b, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[i*cn] = src0[i];
-            dst[i*cn+1] = src1[i];
-        }
-    }
-    else if( k == 3 )
-    {
-        const uchar *src0 = src[0], *src1 = src[1], *src2 = src[2];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
-            auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
-            auto c = __riscv_vle8_v_u8m1(src2 + i, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*cn, a, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*cn, b, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn + 2, sizeof(uchar)*cn, c, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[i*cn] = src0[i];
-            dst[i*cn+1] = src1[i];
-            dst[i*cn+2] = src2[i];
-        }
-    }
-    else
-    {
-        const uchar *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
-            auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
-            auto c = __riscv_vle8_v_u8m1(src2 + i, vl);
-            auto d = __riscv_vle8_v_u8m1(src3 + i, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*cn, a, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*cn, b, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn + 2, sizeof(uchar)*cn, c, vl);
-            __riscv_vsse8_v_u8m1(dst + i*cn + 3, sizeof(uchar)*cn, d, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[i*cn] = src0[i];
-            dst[i*cn+1] = src1[i];
-            dst[i*cn+2] = src2[i];
-            dst[i*cn+3] = src3[i];
-        }
-    }
-    #if defined(__clang__)
-    #pragma clang loop vectorize(disable)
-    #endif
-    for( ; k < cn; k += 4 )
-    {
-        const uchar *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
-        i = 0;
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
-            auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
-            auto c = __riscv_vle8_v_u8m1(src2 + i, vl);
-            auto d = __riscv_vle8_v_u8m1(src3 + i, vl);
-            __riscv_vsse8_v_u8m1(dst + k+i*cn, sizeof(uchar)*cn, a, vl);
-            __riscv_vsse8_v_u8m1(dst + k+i*cn + 1, sizeof(uchar)*cn, b, vl);
-            __riscv_vsse8_v_u8m1(dst + k+i*cn + 2, sizeof(uchar)*cn, c, vl);
-            __riscv_vsse8_v_u8m1(dst + k+i*cn + 3, sizeof(uchar)*cn, d, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[k+i*cn] = src0[i];
-            dst[k+i*cn+1] = src1[i];
-            dst[k+i*cn+2] = src2[i];
-            dst[k+i*cn+3] = src3[i];
-        }
-    }
-    return CV_HAL_ERROR_OK;
-}
-
-#if defined __GNUC__
-__attribute__((optimize("no-tree-vectorize")))
-#endif
-inline int merge16u(const ushort** src, ushort* dst, int len, int cn ) {
-    int k = cn % 4 ? cn % 4 : 4;
-    int i = 0;
-    int vl = __riscv_vsetvlmax_e16m1();
-    if( k == 1 )
-    {
-        const ushort* src0 = src[0];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*cn, a, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++)
-            dst[i*cn] = src0[i];
-    }
-    else if( k == 2 )
-    {
-        const ushort *src0 = src[0], *src1 = src[1];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
-            auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*cn, a, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*cn, b, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[i*cn] = src0[i];
-            dst[i*cn+1] = src1[i];
-        }
-    }
-    else if( k == 3 )
-    {
-        const ushort *src0 = src[0], *src1 = src[1], *src2 = src[2];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
-            auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
-            auto c = __riscv_vle16_v_u16m1(src2 + i, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*cn, a, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*cn, b, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn + 2, sizeof(ushort)*cn, c, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[i*cn] = src0[i];
-            dst[i*cn+1] = src1[i];
-            dst[i*cn+2] = src2[i];
-        }
-    }
-    else
-    {
-        const ushort *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
-            auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
-            auto c = __riscv_vle16_v_u16m1(src2 + i, vl);
-            auto d = __riscv_vle16_v_u16m1(src3 + i, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*cn, a, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*cn, b, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn + 2, sizeof(ushort)*cn, c, vl);
-            __riscv_vsse16_v_u16m1(dst + i*cn + 3, sizeof(ushort)*cn, d, vl);
-        }
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++ )
-        {
-            dst[i*cn] = src0[i];
-            dst[i*cn+1] = src1[i];
-            dst[i*cn+2] = src2[i];
-            dst[i*cn+3] = src3[i];
-        }
-    }
-    #if defined(__clang__)
-    #pragma clang loop vectorize(disable)
-    #endif
-    for( ; k < cn; k += 4 )
-    {
-        const uint16_t *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
-        i = 0;
-        for( ; i <= len - vl; i += vl)
-        {
-            auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
-            auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
-            auto c = __riscv_vle16_v_u16m1(src2 + i, vl);
-            auto d = __riscv_vle16_v_u16m1(src3 + i, vl);
-            __riscv_vsse16_v_u16m1(dst + k+i*cn, sizeof(ushort)*cn, a, vl);
-            __riscv_vsse16_v_u16m1(dst + k+i*cn + 1, sizeof(ushort)*cn, b, vl);
-            __riscv_vsse16_v_u16m1(dst + k+i*cn + 2, sizeof(ushort)*cn, c, vl);
-            __riscv_vsse16_v_u16m1(dst + k+i*cn + 3, sizeof(ushort)*cn, d, vl);
-        }
-        for( ; i < len; i++ )
-        {
-            dst[k+i*cn] = src0[i];
-            dst[k+i*cn+1] = src1[i];
-            dst[k+i*cn+2] = src2[i];
-            dst[k+i*cn+3] = src3[i];
-        }
-    }
-    return CV_HAL_ERROR_OK;
-}
-
-#if defined __GNUC__
-__attribute__((optimize("no-tree-vectorize")))
-#endif
-inline int merge32s(const int** src, int* dst, int len, int cn ) {
-    int k = cn % 4 ? cn % 4 : 4;
-    int i, j;
-    if( k == 1 )
-    {
-        const int* src0 = src[0];
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( i = j = 0; i < len; i++, j += cn )
-            dst[j] = src0[i];
-    }
-    else if( k == 2 )
-    {
-        const int *src0 = src[0], *src1 = src[1];
-        i = j = 0;
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i];
-            dst[j+1] = src1[i];
-        }
-    }
-    else if( k == 3 )
-    {
-        const int *src0 = src[0], *src1 = src[1], *src2 = src[2];
-        i = j = 0;
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i];
-            dst[j+1] = src1[i];
-            dst[j+2] = src2[i];
-        }
-    }
-    else
-    {
-        const int *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
-        i = j = 0;
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i]; dst[j+1] = src1[i];
-            dst[j+2] = src2[i]; dst[j+3] = src3[i];
-        }
-    }
-    #if defined(__clang__)
-    #pragma clang loop vectorize(disable)
-    #endif
-    for( ; k < cn; k += 4 )
-    {
-        const int *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
-        for( i = 0, j = k; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i]; dst[j+1] = src1[i];
-            dst[j+2] = src2[i]; dst[j+3] = src3[i];
-        }
-    }
-    return CV_HAL_ERROR_OK;
-}
-
-#if defined __GNUC__
-__attribute__((optimize("no-tree-vectorize")))
-#endif
-inline int merge64s(const int64** src, int64* dst, int len, int cn ) {
-    int k = cn % 4 ? cn % 4 : 4;
-    int i, j;
-    if( k == 1 )
-    {
-        const int64* src0 = src[0];
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( i = j = 0; i < len; i++, j += cn )
-            dst[j] = src0[i];
-    }
-    else if( k == 2 )
-    {
-        const int64 *src0 = src[0], *src1 = src[1];
-        i = j = 0;
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i];
-            dst[j+1] = src1[i];
-        }
-    }
-    else if( k == 3 )
-    {
-        const int64 *src0 = src[0], *src1 = src[1], *src2 = src[2];
-        i = j = 0;
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i];
-            dst[j+1] = src1[i];
-            dst[j+2] = src2[i];
-        }
-    }
-    else
-    {
-        const int64 *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
-        i = j = 0;
-        #if defined(__clang__)
-        #pragma clang loop vectorize(disable)
-        #endif
-        for( ; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i]; dst[j+1] = src1[i];
-            dst[j+2] = src2[i]; dst[j+3] = src3[i];
-        }
-    }
-    #if defined(__clang__)
-    #pragma clang loop vectorize(disable)
-    #endif
-    for( ; k < cn; k += 4 )
-    {
-        const int64 *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
-        for( i = 0, j = k; i < len; i++, j += cn )
-        {
-            dst[j] = src0[i]; dst[j+1] = src1[i];
-            dst[j+2] = src2[i]; dst[j+3] = src3[i];
-        }
-    }
-    return CV_HAL_ERROR_OK;
-}
-
-}}
-
-#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp
@ -1,335 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-#ifndef OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED
-#define OPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED
-
-#include <riscv_vector.h>
-
-namespace cv { namespace cv_hal_rvv {
-
-#undef cv_hal_minMaxIdx
-#define cv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx
-#undef cv_hal_minMaxIdxMaskStep
-#define cv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx
-
-namespace
-{
-    template<typename T> struct rvv;
-
-    #define HAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \
-    template<> struct rvv<T> \
-    { \
-        using vec_t = v##IS_U##int##EEW##EMUL##_t; \
-        using bool_t = vbool##B_LEN##_t; \
-        static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \
-        static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \
-        static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \
-        static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
-        static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \
-        static inline vec_t vmin_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmin##IS_U##_tu(a, b, c, d); } \
-        static inline vec_t vmax_tu(vec_t a, vec_t b, vec_t c, size_t d) { return __riscv_vmax##IS_U##_tu(a, b, c, d); } \
-        static inline vec_t vmin_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \
-        static inline vec_t vmax_tumu(bool_t a, vec_t b, vec_t c, vec_t d, size_t e) { return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \
-        static inline vec_t vredmin(vec_t a, vec_t b, size_t c) { return __riscv_vredmin##IS_U(a, b, c); } \
-        static inline vec_t vredmax(vec_t a, vec_t b, size_t c) { return __riscv_vredmax##IS_U(a, b, c); } \
-    };
-    HAL_RVV_GENERATOR(uchar , 8 , u8 , u, m1, m1 , 8 )
-    HAL_RVV_GENERATOR(schar , 8 , i8 ,  , m1, m1 , 8 )
-    HAL_RVV_GENERATOR(ushort, 16, u16, u, m1, mf2, 16)
-    HAL_RVV_GENERATOR(short , 16, i16,  , m1, mf2, 16)
-    #undef HAL_RVV_GENERATOR
-
-    #define HAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \
-    template<> struct rvv<T> \
-    { \
-        using vec_t = v##NAME##EEW##EMUL##_t; \
-        using bool_t = vbool##B_LEN##_t; \
-        static inline size_t vsetvlmax() { return __riscv_vsetvlmax_e##EEW##EMUL(); } \
-        static inline size_t vsetvl(size_t a) { return __riscv_vsetvl_e##EEW##EMUL(a); } \
-        static inline vec_t vmv_v_x(T a, size_t b) { return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \
-        static inline vuint32##P_EMUL##_t vid(size_t a) { return __riscv_vid_v_u32##P_EMUL(a); } \
-        static inline vuint32##P_EMUL##_t vundefined() { return __riscv_vundefined_u32##P_EMUL(); } \
-        static inline vec_t vle(const T* a, size_t b) { return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
-        static inline vuint8##M_EMUL##_t vle_mask(const uchar* a, size_t b) { return __riscv_vle8_v_u8##M_EMUL(a, b); } \
-        static inline bool_t vmlt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##lt(a, b, c); } \
-        static inline bool_t vmgt(vec_t a, vec_t b, size_t c) { return __riscv_vm##F_OR_S##gt(a, b, c); } \
-        static inline bool_t vmlt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \
-        static inline bool_t vmgt_mu(bool_t a, bool_t b, vec_t c, vec_t d, size_t e) { return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \
-        static inline T vmv_x_s(vec_t a) { return __riscv_v##IS_F##mv_##F_OR_X(a); } \
-    };
-    HAL_RVV_GENERATOR(int   , int  , 32, i32,  , s, x, m4, m1 , m4, 8 )
-    HAL_RVV_GENERATOR(float , float, 32, f32, f, f, f, m4, m1 , m4, 8 )
-    HAL_RVV_GENERATOR(double, float, 64, f64, f, f, f, m4, mf2, m2, 16)
-    #undef HAL_RVV_GENERATOR
-}
-
-template<typename T>
-inline int minMaxIdxReadTwice(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal,
-                              int* minIdx, int* maxIdx, uchar* mask, size_t mask_step)
-{
-    int vlmax = rvv<T>::vsetvlmax();
-    auto vec_min = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
-    auto vec_max = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
-    T val_min, val_max;
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = rvv<T>::vsetvl(width - j);
-                auto vec_src = rvv<T>::vle(src_row + j, vl);
-                auto vec_mask = rvv<T>::vle_mask(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                vec_min = rvv<T>::vmin_tumu(bool_mask, vec_min, vec_min, vec_src, vl);
-                vec_max = rvv<T>::vmax_tumu(bool_mask, vec_max, vec_max, vec_src, vl);
-            }
-        }
-
-        auto sc_minval = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
-        auto sc_maxval = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
-        sc_minval = rvv<T>::vredmin(vec_min, sc_minval, vlmax);
-        sc_maxval = rvv<T>::vredmax(vec_max, sc_maxval, vlmax);
-        val_min = __riscv_vmv_x(sc_minval);
-        val_max = __riscv_vmv_x(sc_maxval);
-
-        bool found_min = !minIdx, found_max = !maxIdx;
-        for (int i = 0; i < height && (!found_min || !found_max); i++)
-        {
-            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width && (!found_min || !found_max); j += vl)
-            {
-                vl = rvv<T>::vsetvl(width - j);
-                auto vec_src = rvv<T>::vle(src_row + j, vl);
-                auto vec_mask = rvv<T>::vle_mask(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto bool_zero = __riscv_vmxor(bool_mask, bool_mask, vl);
-                if (!found_min)
-                {
-                    auto bool_minpos = __riscv_vmseq_mu(bool_mask, bool_zero, vec_src, val_min, vl);
-                    int index = __riscv_vfirst(bool_minpos, vl);
-                    if (index != -1)
-                    {
-                        found_min = true;
-                        minIdx[0] = i;
-                        minIdx[1] = j + index;
-                    }
-                }
-                if (!found_max)
-                {
-                    auto bool_maxpos = __riscv_vmseq_mu(bool_mask, bool_zero, vec_src, val_max, vl);
-                    int index = __riscv_vfirst(bool_maxpos, vl);
-                    if (index != -1)
-                    {
-                        found_max = true;
-                        maxIdx[0] = i;
-                        maxIdx[1] = j + index;
-                    }
-                }
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = rvv<T>::vsetvl(width - j);
-                auto vec_src = rvv<T>::vle(src_row + j, vl);
-                vec_min = rvv<T>::vmin_tu(vec_min, vec_min, vec_src, vl);
-                vec_max = rvv<T>::vmax_tu(vec_max, vec_max, vec_src, vl);
-            }
-        }
-
-        auto sc_minval = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
-        auto sc_maxval = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
-        sc_minval = rvv<T>::vredmin(vec_min, sc_minval, vlmax);
-        sc_maxval = rvv<T>::vredmax(vec_max, sc_maxval, vlmax);
-        val_min = __riscv_vmv_x(sc_minval);
-        val_max = __riscv_vmv_x(sc_maxval);
-
-        bool found_min = !minIdx, found_max = !maxIdx;
-        for (int i = 0; i < height && (!found_min || !found_max); i++)
-        {
-            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
-            int vl;
-            for (int j = 0; j < width && (!found_min || !found_max); j += vl)
-            {
-                vl = rvv<T>::vsetvl(width - j);
-                auto vec_src = rvv<T>::vle(src_row + j, vl);
-                if (!found_min)
-                {
-                    auto bool_minpos = __riscv_vmseq(vec_src, val_min, vl);
-                    int index = __riscv_vfirst(bool_minpos, vl);
-                    if (index != -1)
-                    {
-                        found_min = true;
-                        minIdx[0] = i;
-                        minIdx[1] = j + index;
-                    }
-                }
-                if (!found_max)
-                {
-                    auto bool_maxpos = __riscv_vmseq(vec_src, val_max, vl);
-                    int index = __riscv_vfirst(bool_maxpos, vl);
-                    if (index != -1)
-                    {
-                        found_max = true;
-                        maxIdx[0] = i;
-                        maxIdx[1] = j + index;
-                    }
-                }
-            }
-        }
-    }
-    if (minVal)
-    {
-        *minVal = val_min;
-    }
-    if (maxVal)
-    {
-        *maxVal = val_max;
-    }
-
-    return CV_HAL_ERROR_OK;
-}
-
-template<typename T>
-inline int minMaxIdxReadOnce(const uchar* src_data, size_t src_step, int width, int height, double* minVal, double* maxVal,
-                             int* minIdx, int* maxIdx, uchar* mask, size_t mask_step)
-{
-    int vlmax = rvv<T>::vsetvlmax();
-    auto vec_min = rvv<T>::vmv_v_x(std::numeric_limits<T>::max(), vlmax);
-    auto vec_max = rvv<T>::vmv_v_x(std::numeric_limits<T>::lowest(), vlmax);
-    auto vec_pos = rvv<T>::vid(vlmax);
-    auto vec_minpos = rvv<T>::vundefined(), vec_maxpos = rvv<T>::vundefined();
-    T val_min, val_max;
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = rvv<T>::vsetvl(width - j);
-                auto vec_src = rvv<T>::vle(src_row + j, vl);
-                auto vec_mask = rvv<T>::vle_mask(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto bool_zero = __riscv_vmxor(bool_mask, bool_mask, vl);
-
-                auto bool_minpos = rvv<T>::vmlt_mu(bool_mask, bool_zero, vec_src, vec_min, vl);
-                auto bool_maxpos = rvv<T>::vmgt_mu(bool_mask, bool_zero, vec_src, vec_max, vl);
-                vec_minpos = __riscv_vmerge_tu(vec_minpos, vec_minpos, vec_pos, bool_minpos, vl);
-                vec_maxpos = __riscv_vmerge_tu(vec_maxpos, vec_maxpos, vec_pos, bool_maxpos, vl);
-
-                vec_min = __riscv_vmerge_tu(vec_min, vec_min, vec_src, bool_minpos, vl);
-                vec_max = __riscv_vmerge_tu(vec_max, vec_max, vec_src, bool_maxpos, vl);
-                vec_pos = __riscv_vadd(vec_pos, vl, vlmax);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const T* src_row = reinterpret_cast<const T*>(src_data + i * src_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = rvv<T>::vsetvl(width - j);
-                auto vec_src = rvv<T>::vle(src_row + j, vl);
-
-                auto bool_minpos = rvv<T>::vmlt(vec_src, vec_min, vl);
-                auto bool_maxpos = rvv<T>::vmgt(vec_src, vec_max, vl);
-                vec_minpos = __riscv_vmerge_tu(vec_minpos, vec_minpos, vec_pos, bool_minpos, vl);
-                vec_maxpos = __riscv_vmerge_tu(vec_maxpos, vec_maxpos, vec_pos, bool_maxpos, vl);
-
-                vec_min = __riscv_vmerge_tu(vec_min, vec_min, vec_src, bool_minpos, vl);
-                vec_max = __riscv_vmerge_tu(vec_max, vec_max, vec_src, bool_maxpos, vl);
-                vec_pos = __riscv_vadd(vec_pos, vl, vlmax);
-            }
-        }
-    }
-
-    val_min = std::numeric_limits<T>::max();
-    val_max = std::numeric_limits<T>::lowest();
-    for (int i = 0; i < vlmax; i++)
-    {
-        if (val_min > rvv<T>::vmv_x_s(vec_min))
-        {
-            val_min = rvv<T>::vmv_x_s(vec_min);
-            if (minIdx)
-            {
-                minIdx[0] = __riscv_vmv_x(vec_minpos) / width;
-                minIdx[1] = __riscv_vmv_x(vec_minpos) % width;
-            }
-        }
-        if (val_max < rvv<T>::vmv_x_s(vec_max))
-        {
-            val_max = rvv<T>::vmv_x_s(vec_max);
-            if (maxIdx)
-            {
-                maxIdx[0] = __riscv_vmv_x(vec_maxpos) / width;
-                maxIdx[1] = __riscv_vmv_x(vec_maxpos) % width;
-            }
-        }
-        vec_min = __riscv_vslidedown(vec_min, 1, vlmax);
-        vec_max = __riscv_vslidedown(vec_max, 1, vlmax);
-        vec_minpos = __riscv_vslidedown(vec_minpos, 1, vlmax);
-        vec_maxpos = __riscv_vslidedown(vec_maxpos, 1, vlmax);
-    }
-    if (minVal)
-    {
-        *minVal = val_min;
-    }
-    if (maxVal)
-    {
-        *maxVal = val_max;
-    }
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int height, int depth, double* minVal, double* maxVal,
-                     int* minIdx, int* maxIdx, uchar* mask, size_t mask_step = 0)
-{
-    if (!mask_step)
-        mask_step = src_step;
-
-    switch (depth)
-    {
-    case CV_8UC1:
-        return minMaxIdxReadTwice<uchar>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    case CV_8SC1:
-        return minMaxIdxReadTwice<schar>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    case CV_16UC1:
-        return minMaxIdxReadTwice<ushort>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    case CV_16SC1:
-        return minMaxIdxReadTwice<short>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    case CV_32SC1:
-        return minMaxIdxReadOnce<int>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    case CV_32FC1:
-        return minMaxIdxReadOnce<float>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    case CV_64FC1:
-        return minMaxIdxReadOnce<double>(src_data, src_step, width, height, minVal, maxVal, minIdx, maxIdx, mask, mask_step);
-    }
-
-    return CV_HAL_ERROR_NOT_IMPLEMENTED;
-}
-
-}}
-
-#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp
@ -1,517 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-#ifndef OPENCV_HAL_RVV_NORM_HPP_INCLUDED
-#define OPENCV_HAL_RVV_NORM_HPP_INCLUDED
-
-#include <riscv_vector.h>
-
-namespace cv { namespace cv_hal_rvv {
-
-#undef cv_hal_norm
-#define cv_hal_norm cv::cv_hal_rvv::norm
-
-inline int normInf_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m8();
-    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m8(width - j);
-                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m8(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                vec_max = __riscv_vmaxu_tumu(bool_mask, vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m8(width - j);
-                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
-                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
-    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
-    *result = __riscv_vmv_x(sc_max);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normL1_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask, vec_src, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
-    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vmv_x(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normL2Sqr_8UC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    int cnt = 0;
-    auto reduce = [&](int vl) {
-        if ((cnt += vl) < (1 << 16))
-            return;
-        cnt = vl;
-        for (int i = 0; i < vlmax; i++)
-        {
-            *result += __riscv_vmv_x(vec_sum);
-            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
-        }
-        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    };
-
-    *result = 0;
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                reduce(vl);
-
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask, vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask, vec_mul, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                reduce(vl);
-
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    reduce(1 << 16);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normInf_8UC4(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m8();
-    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl, vlm;
-            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
-            {
-                vl = __riscv_vsetvl_e8m8(width * 4 - j);
-                vlm = __riscv_vsetvl_e8m2(width - jm);
-                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + jm, vlm);
-                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
-                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m8(vec_mask_ext), 0, vl);
-                vec_max = __riscv_vmaxu_tumu(bool_mask_ext, vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            int vl;
-            for (int j = 0; j < width * 4; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m8(width * 4 - j);
-                auto vec_src = __riscv_vle8_v_u8m8(src_row + j, vl);
-                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
-    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
-    *result = __riscv_vmv_x(sc_max);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normL1_8UC4(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl, vlm;
-            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                vlm = __riscv_vsetvl_e8mf2(width - jm);
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
-                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
-                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
-                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask_ext, vec_src, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            int vl;
-            for (int j = 0; j < width * 4; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
-    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vmv_x(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normL2Sqr_8UC4(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    int cnt = 0;
-    auto reduce = [&](int vl) {
-        if ((cnt += vl) < (1 << 16))
-            return;
-        cnt = vl;
-        for (int i = 0; i < vlmax; i++)
-        {
-            *result += __riscv_vmv_x(vec_sum);
-            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
-        }
-        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    };
-
-    *result = 0;
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl, vlm;
-            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                vlm = __riscv_vsetvl_e8mf2(width - jm);
-                reduce(vl);
-
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
-                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
-                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
-                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask_ext, vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask_ext, vec_mul, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src_row = src + i * src_step;
-            int vl;
-            for (int j = 0; j < width * 4; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                reduce(vl);
-
-                auto vec_src = __riscv_vle8_v_u8m2(src_row + j, vl);
-                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    reduce(1 << 16);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normInf_32FC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e32m8();
-    auto vec_max = __riscv_vfmv_v_f_f32m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m8(width - j);
-                auto vec_src = __riscv_vle32_v_f32m8(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_abs = __riscv_vfabs_v_f32m8_m(bool_mask, vec_src, vl);
-                vec_max = __riscv_vfmax_tumu(bool_mask, vec_max, vec_max, vec_abs, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m8(width - j);
-                auto vec_src = __riscv_vle32_v_f32m8(src_row + j, vl);
-                auto vec_abs = __riscv_vfabs(vec_src, vl);
-                vec_max = __riscv_vfmax_tu(vec_max, vec_max, vec_abs, vl);
-            }
-        }
-    }
-    auto sc_max = __riscv_vfmv_s_f_f32m1(0, vlmax);
-    sc_max = __riscv_vfredmax(vec_max, sc_max, vlmax);
-    *result = __riscv_vfmv_f(sc_max);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normL1_32FC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e32m4();
-    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_abs = __riscv_vfabs_v_f32m4_m(bool_mask, vec_src, vl);
-                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8_m(bool_mask, vec_abs, vl);
-                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_fext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
-                auto vec_abs = __riscv_vfabs(vec_src, vl);
-                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8(vec_abs, vl);
-                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_fext, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
-    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vfmv_f(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normL2Sqr_32FC1(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e32m4();
-    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_mul = __riscv_vfwmul_vv_f64m8_m(bool_mask, vec_src, vec_src, vl);
-                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_mul, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src_row = reinterpret_cast<const float*>(src + i * src_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src = __riscv_vle32_v_f32m4(src_row + j, vl);
-                auto vec_mul = __riscv_vfwmul(vec_src, vec_src, vl);
-                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_mul, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
-    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vfmv_f(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mask_step, int width,
-                int height, int type, int norm_type, double* result)
-{
-    if (!result)
-        return CV_HAL_ERROR_OK;
-
-    switch (type)
-    {
-    case CV_8UC1:
-        switch (norm_type)
-        {
-        case NORM_INF:
-            return normInf_8UC1(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L1:
-            return normL1_8UC1(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L2SQR:
-            return normL2Sqr_8UC1(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L2:
-            int ret = normL2Sqr_8UC1(src, src_step, mask, mask_step, width, height, result);
-            *result = std::sqrt(*result);
-            return ret;
-        }
-        return CV_HAL_ERROR_NOT_IMPLEMENTED;
-    case CV_8UC4:
-        switch (norm_type)
-        {
-        case NORM_INF:
-            return normInf_8UC4(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L1:
-            return normL1_8UC4(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L2SQR:
-            return normL2Sqr_8UC4(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L2:
-            int ret = normL2Sqr_8UC4(src, src_step, mask, mask_step, width, height, result);
-            *result = std::sqrt(*result);
-            return ret;
-        }
-        return CV_HAL_ERROR_NOT_IMPLEMENTED;
-    case CV_32FC1:
-        switch (norm_type)
-        {
-        case NORM_INF:
-            return normInf_32FC1(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L1:
-            return normL1_32FC1(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L2SQR:
-            return normL2Sqr_32FC1(src, src_step, mask, mask_step, width, height, result);
-        case NORM_L2:
-            int ret = normL2Sqr_32FC1(src, src_step, mask, mask_step, width, height, result);
-            *result = std::sqrt(*result);
-            return ret;
-        }
-        return CV_HAL_ERROR_NOT_IMPLEMENTED;
-    }
-
-    return CV_HAL_ERROR_NOT_IMPLEMENTED;
-}
-
-}}
-
-#endif
--- a/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp
+++ b/3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp
@ -1,605 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-#ifndef OPENCV_HAL_RVV_NORM_DIFF_HPP_INCLUDED
-#define OPENCV_HAL_RVV_NORM_DIFF_HPP_INCLUDED
-
-#include <riscv_vector.h>
-
-namespace cv { namespace cv_hal_rvv {
-
-#undef cv_hal_normDiff
-#define cv_hal_normDiff cv::cv_hal_rvv::normDiff
-
-inline int normDiffInf_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m8();
-    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m8(width - j);
-                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m8(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_src = __riscv_vsub_vv_u8m8_m(bool_mask, __riscv_vmaxu_vv_u8m8_m(bool_mask, vec_src1, vec_src2, vl),
-                                                      __riscv_vminu_vv_u8m8_m(bool_mask, vec_src1, vec_src2, vl), vl);
-                vec_max = __riscv_vmaxu_tumu(bool_mask, vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m8(width - j);
-                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
-                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
-                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
-    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
-    *result = __riscv_vmv_x(sc_max);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffL1_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask, __riscv_vmaxu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl),
-                                                      __riscv_vminu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl), vl);
-                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask, vec_src, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
-                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
-    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vmv_x(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffL2Sqr_8UC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    int cnt = 0;
-    auto reduce = [&](int vl) {
-        if ((cnt += vl) < (1 << 16))
-            return;
-        cnt = vl;
-        for (int i = 0; i < vlmax; i++)
-        {
-            *result += __riscv_vmv_x(vec_sum);
-            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
-        }
-        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    };
-
-    *result = 0;
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                reduce(vl);
-
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask, __riscv_vmaxu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl),
-                                                      __riscv_vminu_vv_u8m2_m(bool_mask, vec_src1, vec_src2, vl), vl);
-                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask, vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask, vec_mul, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width - j);
-                reduce(vl);
-
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
-                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    reduce(1 << 16);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffInf_8UC4(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m8();
-    auto vec_max = __riscv_vmv_v_x_u8m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl, vlm;
-            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
-            {
-                vl = __riscv_vsetvl_e8m8(width * 4 - j);
-                vlm = __riscv_vsetvl_e8m2(width - jm);
-                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + jm, vlm);
-                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
-                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m8(vec_mask_ext), 0, vl);
-                auto vec_src = __riscv_vsub_vv_u8m8_m(bool_mask_ext, __riscv_vmaxu_vv_u8m8_m(bool_mask_ext, vec_src1, vec_src2, vl),
-                                                      __riscv_vminu_vv_u8m8_m(bool_mask_ext, vec_src1, vec_src2, vl), vl);
-                vec_max = __riscv_vmaxu_tumu(bool_mask_ext, vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            int vl;
-            for (int j = 0; j < width * 4; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m8(width * 4 - j);
-                auto vec_src1 = __riscv_vle8_v_u8m8(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m8(src2_row + j, vl);
-                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
-                vec_max = __riscv_vmaxu_tu(vec_max, vec_max, vec_src, vl);
-            }
-        }
-    }
-    auto sc_max = __riscv_vmv_s_x_u8m1(0, vlmax);
-    sc_max = __riscv_vredmaxu(vec_max, sc_max, vlmax);
-    *result = __riscv_vmv_x(sc_max);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffL1_8UC4(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl, vlm;
-            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                vlm = __riscv_vsetvl_e8mf2(width - jm);
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
-                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
-                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
-                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask_ext, __riscv_vmaxu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl),
-                                                      __riscv_vminu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl), vl);
-                auto vec_zext = __riscv_vzext_vf4_u32m8_m(bool_mask_ext, vec_src, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            int vl;
-            for (int j = 0; j < width * 4; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
-                auto vec_zext = __riscv_vzext_vf4(vec_src, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vmv_s_x_u32m1(0, vlmax);
-    sc_sum = __riscv_vredsum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vmv_x(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffL2Sqr_8UC4(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e8m2();
-    auto vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    int cnt = 0;
-    auto reduce = [&](int vl) {
-        if ((cnt += vl) < (1 << 16))
-            return;
-        cnt = vl;
-        for (int i = 0; i < vlmax; i++)
-        {
-            *result += __riscv_vmv_x(vec_sum);
-            vec_sum = __riscv_vslidedown(vec_sum, 1, vlmax);
-        }
-        vec_sum = __riscv_vmv_v_x_u32m8(0, vlmax);
-    };
-
-    *result = 0;
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            const uchar* mask_row = mask + i * mask_step;
-            int vl, vlm;
-            for (int j = 0, jm = 0; j < width * 4; j += vl, jm += vlm)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                vlm = __riscv_vsetvl_e8mf2(width - jm);
-                reduce(vl);
-
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8mf2(mask_row + jm, vlm);
-                auto vec_mask_ext = __riscv_vmul(__riscv_vzext_vf4(__riscv_vminu(vec_mask, 1, vlm), vlm), 0x01010101, vlm);
-                auto bool_mask_ext = __riscv_vmsne(__riscv_vreinterpret_u8m2(vec_mask_ext), 0, vl);
-                auto vec_src = __riscv_vsub_vv_u8m2_m(bool_mask_ext, __riscv_vmaxu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl),
-                                                      __riscv_vminu_vv_u8m2_m(bool_mask_ext, vec_src1, vec_src2, vl), vl);
-                auto vec_mul = __riscv_vwmulu_vv_u16m4_m(bool_mask_ext, vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2_u32m8_m(bool_mask_ext, vec_mul, vl);
-                vec_sum = __riscv_vadd_tumu(bool_mask_ext, vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const uchar* src1_row = src1 + i * src1_step;
-            const uchar* src2_row = src2 + i * src2_step;
-            int vl;
-            for (int j = 0; j < width * 4; j += vl)
-            {
-                vl = __riscv_vsetvl_e8m2(width * 4 - j);
-                reduce(vl);
-
-                auto vec_src1 = __riscv_vle8_v_u8m2(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle8_v_u8m2(src2_row + j, vl);
-                auto vec_src = __riscv_vsub(__riscv_vmaxu(vec_src1, vec_src2, vl), __riscv_vminu(vec_src1, vec_src2, vl), vl);
-                auto vec_mul = __riscv_vwmulu(vec_src, vec_src, vl);
-                auto vec_zext = __riscv_vzext_vf2(vec_mul, vl);
-                vec_sum = __riscv_vadd_tu(vec_sum, vec_sum, vec_zext, vl);
-            }
-        }
-    }
-    reduce(1 << 16);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffInf_32FC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e32m8();
-    auto vec_max = __riscv_vfmv_v_f_f32m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
-            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m8(width - j);
-                auto vec_src1 = __riscv_vle32_v_f32m8(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle32_v_f32m8(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m2(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_src = __riscv_vfsub_vv_f32m8_m(bool_mask, vec_src1, vec_src2, vl);
-                auto vec_abs = __riscv_vfabs_v_f32m8_m(bool_mask, vec_src, vl);
-                vec_max = __riscv_vfmax_tumu(bool_mask, vec_max, vec_max, vec_abs, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
-            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m8(width - j);
-                auto vec_src1 = __riscv_vle32_v_f32m8(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle32_v_f32m8(src2_row + j, vl);
-                auto vec_src = __riscv_vfsub(vec_src1, vec_src2, vl);
-                auto vec_abs = __riscv_vfabs(vec_src, vl);
-                vec_max = __riscv_vfmax_tu(vec_max, vec_max, vec_abs, vl);
-            }
-        }
-    }
-    auto sc_max = __riscv_vfmv_s_f_f32m1(0, vlmax);
-    sc_max = __riscv_vfredmax(vec_max, sc_max, vlmax);
-    *result = __riscv_vfmv_f(sc_max);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffL1_32FC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e32m4();
-    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
-            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_src = __riscv_vfsub_vv_f32m4_m(bool_mask, vec_src1, vec_src2, vl);
-                auto vec_abs = __riscv_vfabs_v_f32m4_m(bool_mask, vec_src, vl);
-                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8_m(bool_mask, vec_abs, vl);
-                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_fext, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
-            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
-                auto vec_src = __riscv_vfsub(vec_src1, vec_src2, vl);
-                auto vec_abs = __riscv_vfabs(vec_src, vl);
-                auto vec_fext = __riscv_vfwcvt_f_f_v_f64m8(vec_abs, vl);
-                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_fext, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
-    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vfmv_f(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiffL2Sqr_32FC1(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask, size_t mask_step, int width, int height, double* result)
-{
-    int vlmax = __riscv_vsetvlmax_e32m4();
-    auto vec_sum = __riscv_vfmv_v_f_f64m8(0, vlmax);
-
-    if (mask)
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
-            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
-            const uchar* mask_row = mask + i * mask_step;
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
-                auto vec_mask = __riscv_vle8_v_u8m1(mask_row + j, vl);
-                auto bool_mask = __riscv_vmsne(vec_mask, 0, vl);
-                auto vec_src = __riscv_vfsub_vv_f32m4_m(bool_mask, vec_src1, vec_src2, vl);
-                auto vec_mul = __riscv_vfwmul_vv_f64m8_m(bool_mask, vec_src, vec_src, vl);
-                vec_sum = __riscv_vfadd_tumu(bool_mask, vec_sum, vec_sum, vec_mul, vl);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++)
-        {
-            const float* src1_row = reinterpret_cast<const float*>(src1 + i * src1_step);
-            const float* src2_row = reinterpret_cast<const float*>(src2 + i * src2_step);
-            int vl;
-            for (int j = 0; j < width; j += vl)
-            {
-                vl = __riscv_vsetvl_e32m4(width - j);
-                auto vec_src1 = __riscv_vle32_v_f32m4(src1_row + j, vl);
-                auto vec_src2 = __riscv_vle32_v_f32m4(src2_row + j, vl);
-                auto vec_src = __riscv_vfsub(vec_src1, vec_src2, vl);
-                auto vec_mul = __riscv_vfwmul(vec_src, vec_src, vl);
-                vec_sum = __riscv_vfadd_tu(vec_sum, vec_sum, vec_mul, vl);
-            }
-        }
-    }
-    auto sc_sum = __riscv_vfmv_s_f_f64m1(0, vlmax);
-    sc_sum = __riscv_vfredosum(vec_sum, sc_sum, vlmax);
-    *result = __riscv_vfmv_f(sc_sum);
-
-    return CV_HAL_ERROR_OK;
-}
-
-inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size_t src2_step, const uchar* mask,
-                    size_t mask_step, int width, int height, int type, int norm_type, double* result)
-{
-    if (!result)
-        return CV_HAL_ERROR_OK;
-
-    int ret;
-    switch (type)
-    {
-    case CV_8UC1:
-        switch (norm_type & ~NORM_RELATIVE)
-        {
-        case NORM_INF:
-            ret = normDiffInf_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L1:
-            ret = normDiffL1_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L2SQR:
-            ret = normDiffL2Sqr_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L2:
-            ret = normDiffL2Sqr_8UC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            *result = std::sqrt(*result);
-            break;
-        default:
-            ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
-        }
-        break;
-    case CV_8UC4:
-        switch (norm_type & ~NORM_RELATIVE)
-        {
-        case NORM_INF:
-            ret = normDiffInf_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L1:
-            ret = normDiffL1_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L2SQR:
-            ret = normDiffL2Sqr_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L2:
-            ret = normDiffL2Sqr_8UC4(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            *result = std::sqrt(*result);
-            break;
-        default:
-            ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
-        }
-        break;
-    case CV_32FC1:
-        switch (norm_type & ~NORM_RELATIVE)
-        {
-        case NORM_INF:
-            ret = normDiffInf_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L1:
-            ret = normDiffL1_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L2SQR:
-            ret = normDiffL2Sqr_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            break;
-        case NORM_L2:
-            ret = normDiffL2Sqr_32FC1(src1, src1_step, src2, src2_step, mask, mask_step, width, height, result);
-            *result = std::sqrt(*result);
-            break;
-        default:
-            ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
-        }
-        break;
-    default:
-        ret = CV_HAL_ERROR_NOT_IMPLEMENTED;
-    }
-
-    if(ret == CV_HAL_ERROR_OK && (norm_type & NORM_RELATIVE))
-    {
-        double result_;
-        ret = cv::cv_hal_rvv::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type & ~NORM_RELATIVE, &result_);
-        if(ret == CV_HAL_ERROR_OK)
-        {
-            *result /= result_ + DBL_EPSILON;
-        }
-    }
-
-    return ret;
-}
-
-}}
-
-#endif
--- a/3rdparty/hal_rvv/version/hal_rvv_071.hpp
+++ b/3rdparty/hal_rvv/version/hal_rvv_071.hpp
@ -1,109 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-#ifndef OPENCV_HAL_RVV_071_HPP_INCLUDED
-#define OPENCV_HAL_RVV_071_HPP_INCLUDED
-
-#include <riscv_vector.h>
-
-#include <limits>
-
-namespace cv { namespace cv_hal_rvv {
-
-#undef cv_hal_cvtBGRtoBGR
-#define cv_hal_cvtBGRtoBGR cv::cv_hal_rvv::cvtBGRtoBGR
-
-static const unsigned char index_array_32 [32]
-                        { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15, 18, 17, 16, 19, 22, 21, 20, 23, 26, 25, 24, 27, 30, 29, 28, 31  };
-
-static const unsigned char index_array_24 [24]
-                        { 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17, 16, 15, 20, 19, 18, 23, 22, 21  };
-
-static void vBGRtoBGR(const unsigned char* src, unsigned char * dst, const unsigned char * index, int n, int scn, int dcn, int vsize_pixels, const int vsize)
-{
-    vuint8m2_t vec_index = vle8_v_u8m2(index, vsize);
-
-    int i = 0;
-
-    for ( ; i <= n-vsize; i += vsize_pixels, src += vsize, dst += vsize)
-    {
-        vuint8m2_t vec_src = vle8_v_u8m2(src, vsize);
-        vuint8m2_t vec_dst = vrgather_vv_u8m2(vec_src, vec_index, vsize);
-        vse8_v_u8m2(dst, vec_dst, vsize);
-    }
-
-    for ( ; i < n; i++, src += scn, dst += dcn )
-    {
-        unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
-        dst[2] = t0;
-        dst[1] = t1;
-        dst[0] = t2;
-        if(dcn == 4)
-        {
-            unsigned char d = src[3];
-            dst[3] = d;
-        }
-    }
-}
-
-static void sBGRtoBGR(const unsigned char* src, unsigned char * dst, int n, int scn, int dcn, int bi)
-{
-    for (int i = 0; i < n; i++, src += scn, dst += dcn)
-    {
-        unsigned char t0 = src[0], t1 = src[1], t2 = src[2];
-        dst[bi  ] = t0;
-        dst[1]    = t1;
-        dst[bi^2] = t2;
-        if(dcn == 4)
-        {
-            unsigned char d = scn == 4 ? src[3] : std::numeric_limits<unsigned char>::max();
-            dst[3] = d;
-        }
-    }
-}
-
-static int cvtBGRtoBGR(const unsigned char * src_data, size_t src_step, unsigned char * dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue)
-{
-    if (depth != CV_8U)
-    {
-        return CV_HAL_ERROR_NOT_IMPLEMENTED;
-    }
-
-    const int blueIdx = swapBlue ? 2 : 0;
-    if (scn == dcn)
-    {
-        if (!swapBlue)
-        {
-            return CV_HAL_ERROR_NOT_IMPLEMENTED;
-        }
-
-        const int vsize_pixels = 8;
-
-        if (scn == 4)
-        {
-            for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
-            {
-                vBGRtoBGR(src_data, dst_data, index_array_32, width, scn, dcn, vsize_pixels, 32);
-            }
-        }
-        else
-        {
-            for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
-            {
-                vBGRtoBGR(src_data, dst_data, index_array_24, width, scn, dcn, vsize_pixels, 24);
-            }
-        }
-    }
-    else
-    {
-        for (int i = 0; i < height; i++, src_data += src_step, dst_data += dst_step)
-            sBGRtoBGR(src_data, dst_data, width, scn, dcn, blueIdx);
-    }
-
-    return CV_HAL_ERROR_OK;
-}
-
-}}
-
-#endif
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@ -18,7 +18,7 @@ if(CV_GCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
  ocv_warnings_disable(CMAKE_C_FLAGS -Wstringop-overflow)
 endif()

-set(VERSION 3.0.3)
+set(VERSION 3.1.0)
 set(COPYRIGHT_YEAR "1991-2024")
 string(REPLACE "." ";" VERSION_TRIPLET ${VERSION})
 list(GET VERSION_TRIPLET 0 VERSION_MAJOR)
@ -203,7 +203,7 @@ check_type_size("size_t" SIZE_T)
 check_type_size("unsigned long" UNSIGNED_LONG)

 if(ENABLE_LIBJPEG_TURBO_SIMD)
-  add_subdirectory(src/simd)
+  add_subdirectory(simd)
  if(NEON_INTRINSICS)
    add_definitions(-DNEON_INTRINSICS)
  endif()
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@ -94,7 +94,7 @@ intended solely for clarification.
 The Modified (3-clause) BSD License
 ===================================

-Copyright (C)2009-2023 D. R. Commander.  All Rights Reserved.<br>
+Copyright (C)2009-2024 D. R. Commander.  All Rights Reserved.<br>
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.

 Redistribution and use in source and binary forms, with or without
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@ -36,16 +36,18 @@ TO DO               Plans for future IJG releases.
 Other documentation files in the distribution are:

 User documentation:
-  usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
-                    rdjpgcom, and wrjpgcom.
-  *.1               Unix-style man pages for programs (same info as usage.txt).
-  wizard.txt        Advanced usage instructions for JPEG wizards only.
-  change.log        Version-to-version change highlights.
+  doc/usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
+                        rdjpgcom, and wrjpgcom.
+  doc/*.1               Unix-style man pages for programs (same info as
+                        usage.txt).
+  doc/wizard.txt        Advanced usage instructions for JPEG wizards only.
+  doc/change.log        Version-to-version change highlights.
 Programmer and internal documentation:
-  libjpeg.txt       How to use the JPEG library in your own programs.
-  example.c         Sample code for calling the JPEG library.
-  structure.txt     Overview of the JPEG library's internal structure.
-  coderules.txt     Coding style rules --- please read if you contribute code.
+  doc/libjpeg.txt       How to use the JPEG library in your own programs.
+  src/example.c         Sample code for calling the JPEG library.
+  doc/structure.txt     Overview of the JPEG library's internal structure.
+  doc/coderules.txt     Coding style rules --- please read if you contribute
+                        code.

 Please read at least usage.txt.  Some information can also be found in the JPEG
 FAQ (Frequently Asked Questions) article.  See ARCHIVE LOCATIONS below to find
@ -89,9 +91,9 @@ The library is intended to be reused in other applications.
 In order to support file conversion and viewing software, we have included
 considerable functionality beyond the bare JPEG coding/decoding capability;
 for example, the color quantization modules are not strictly part of JPEG
-decoding, but they are essential for output to colormapped file formats or
-colormapped displays.  These extra functions can be compiled out of the
-library if not required for a particular application.
+decoding, but they are essential for output to colormapped file formats.  These
+extra functions can be compiled out of the library if not required for a
+particular application.

 We have also included "jpegtran", a utility for lossless transcoding between
 different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@ -69,9 +69,12 @@ JPEG images:
  generating planar YUV images and performing multiple simultaneous lossless
  transforms on an image.  The Java interface for libjpeg-turbo is written on
  top of the TurboJPEG API.  The TurboJPEG API is recommended for first-time
-  users of libjpeg-turbo.  Refer to [tjexample.c](tjexample.c) and
-  [TJExample.java](java/TJExample.java) for examples of its usage and to
-  <http://libjpeg-turbo.org/Documentation/Documentation> for API documentation.
+  users of libjpeg-turbo.  Refer to [tjcomp.c](src/tjcomp.c),
+  [tjdecomp.c](src/tjdecomp.c), [tjtran.c](src/tjtran.c),
+  [TJComp.java](java/TJComp.java), [TJDecomp.java](java/TJDecomp.java), and
+  [TJTran.java](java/TJTran.java) for examples of its usage and to
+  <https://libjpeg-turbo.org/Documentation/Documentation> for API
+  documentation.

 - **libjpeg API**<br>
  This is the de facto industry-standard API for compressing and decompressing
@ -79,8 +82,9 @@ JPEG images:
  more powerful.  The libjpeg API implementation in libjpeg-turbo is both
  API/ABI-compatible and mathematically compatible with libjpeg v6b.  It can
  also optionally be configured to be API/ABI-compatible with libjpeg v7 and v8
-  (see below.)  Refer to [cjpeg.c](cjpeg.c) and [djpeg.c](djpeg.c) for examples
-  of its usage and to [libjpeg.txt](libjpeg.txt) for API documentation.
+  (see below.)  Refer to [cjpeg.c](src/cjpeg.c) and [djpeg.c](src/djpeg.c) for
+  examples of its usage and to [libjpeg.txt](doc/libjpeg.txt) for API
+  documentation.

 There is no significant performance advantage to either API when both are used
 to perform similar operations.
@ -132,9 +136,9 @@ extensions at compile time with:

    #ifdef JCS_ALPHA_EXTENSIONS

-[jcstest.c](jcstest.c), located in the libjpeg-turbo source tree, demonstrates
-how to check for the existence of the colorspace extensions at compile time and
-run time.
+[jcstest.c](src/jcstest.c), located in the libjpeg-turbo source tree,
+demonstrates how to check for the existence of the colorspace extensions at
+compile time and run time.

 libjpeg v7 and v8 API/ABI Emulation
 -----------------------------------
@ -199,7 +203,7 @@ supported and which aren't.
 NOTE:  As of this writing, extensive research has been conducted into the
 usefulness of DCT scaling as a means of data reduction and SmartScale as a
 means of quality improvement.  Readers are invited to peruse the research at
-<http://www.libjpeg-turbo.org/About/SmartScale> and draw their own conclusions,
+<https://libjpeg-turbo.org/About/SmartScale> and draw their own conclusions,
 but it is the general belief of our project that these features have not
 demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.

--- a/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
@ -273,48 +273,33 @@ endif()
 check_c_source_compiles("
  #include <arm_neon.h>
  int main(int argc, char **argv) {
-    int16_t input[] = {
-      (int16_t)argc, (int16_t)argc, (int16_t)argc, (int16_t)argc,
-      (int16_t)argc, (int16_t)argc, (int16_t)argc, (int16_t)argc,
-      (int16_t)argc, (int16_t)argc, (int16_t)argc, (int16_t)argc
-    };
-    int16x4x3_t output = vld1_s16_x3(input);
+    int16_t input[12];
+    int16x4x3_t output;
+    int i;
+    for (i = 0; i < 12; i++) input[i] = (int16_t)argc;
+    output = vld1_s16_x3(input);
    vst3_s16(input, output);
    return (int)input[0];
  }" HAVE_VLD1_S16_X3)
 check_c_source_compiles("
  #include <arm_neon.h>
  int main(int argc, char **argv) {
-    uint16_t input[] = {
-      (uint16_t)argc, (uint16_t)argc, (uint16_t)argc, (uint16_t)argc,
-      (uint16_t)argc, (uint16_t)argc, (uint16_t)argc, (uint16_t)argc
-    };
-    uint16x4x2_t output = vld1_u16_x2(input);
+    uint16_t input[8];
+    uint16x4x2_t output;
+    int i;
+    for (i = 0; i < 8; i++) input[i] = (uint16_t)argc;
+    output = vld1_u16_x2(input);
    vst2_u16(input, output);
    return (int)input[0];
  }" HAVE_VLD1_U16_X2)
 check_c_source_compiles("
  #include <arm_neon.h>
  int main(int argc, char **argv) {
-    uint8_t input[] = {
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc,
-      (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc
-    };
-    uint8x16x4_t output = vld1q_u8_x4(input);
+    uint8_t input[64];
+    uint8x16x4_t output;
+    int i;
+    for (i = 0; i < 64; i++) input[i] = (uint8_t)argc;
+    output = vld1q_u8_x4(input);
    vst4q_u8(input, output);
    return (int)input[0];
  }" HAVE_VLD1Q_U8_X4)
@ -369,7 +354,8 @@ if(NOT NEON_INTRINSICS)
  separate_arguments(CMAKE_ASM_FLAGS_SEP UNIX_COMMAND "${CMAKE_ASM_FLAGS}")
  execute_process(COMMAND ${CMAKE_ASM_COMPILER} ${CMAKE_ASM_FLAGS_SEP}
      -x assembler-with-cpp -c ${CMAKE_CURRENT_BINARY_DIR}/gastest.S
-    RESULT_VARIABLE RESULT OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR)
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} RESULT_VARIABLE RESULT
+    OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR)
  if(NOT RESULT EQUAL 0)
    message(WARNING "GAS appears to be broken.  Using the full Neon SIMD intrinsics implementation.")
    set(NEON_INTRINSICS 1 CACHE INTERNAL "" FORCE)
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jccolext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jccolext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jchuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jchuff-neon.c
@ -2,6 +2,7 @@
 * jchuff-neon.c - Huffman entropy encoding (32-bit Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -24,11 +25,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"
 #include "../jchuff.h"
 #include "neon-compat.h"
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
@ -3,7 +3,7 @@
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, 2024, D. R. Commander.
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
 * Copyright (C) 2019, Google LLC.
 * Copyright (C) 2020, Arm Limited.
@ -18,11 +18,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"

 #include <ctype.h>
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd_neon.S
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd_neon.S
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jccolext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jccolext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jchuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jchuff-neon.c
@ -2,7 +2,7 @@
 * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon)
 *
 * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, 2022, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2022, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -25,11 +25,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"
 #include "../align.h"
 #include "../jchuff.h"
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
@ -3,7 +3,8 @@
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, 2024,
+ *           D. R. Commander.
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
 * Copyright (C) 2020, Arm Limited.
 *
@ -17,11 +18,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"

 #include <ctype.h>
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd_neon.S
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd_neon.S
--- a/3rdparty/libjpeg-turbo/src/simd/arm/align.h
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/align.h
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jccolor-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jccolor-neon.c
@ -2,7 +2,7 @@
 * jccolor-neon.c - colorspace conversion (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -22,11 +22,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcgray-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcgray-neon.c
@ -2,6 +2,7 @@
 * jcgray-neon.c - grayscale colorspace conversion (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,13 +22,14 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcgryext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcgryext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jchuff.h
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jchuff.h
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
@ -3,7 +3,7 @@
 *
 * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
 * Copyright (C) 2022, Matthieu Darbois.  All Rights Reserved.
- * Copyright (C) 2022, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2022, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -23,11 +23,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "neon-compat.h"

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcsample-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcsample-neon.c
@ -2,6 +2,7 @@
 * jcsample-neon.c - downsampling (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,13 +22,14 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdcolext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdcolext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
@ -2,6 +2,7 @@
 * jdcolor-neon.c - colorspace conversion (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,13 +22,14 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
@ -2,6 +2,7 @@
 * jdmerge-neon.c - merged upsampling/color conversion (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,13 +22,14 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdmrgext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdmrgext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdsample-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdsample-neon.c
@ -2,7 +2,7 @@
 * jdsample-neon.c - upsampling (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -22,12 +22,13 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jfdctfst-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jfdctfst-neon.c
@ -2,6 +2,7 @@
 * jfdctfst-neon.c - fast integer FDCT (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,13 +22,14 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jfdctint-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jfdctint-neon.c
@ -2,7 +2,7 @@
 * jfdctint-neon.c - accurate integer FDCT (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -22,11 +22,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctfst-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctfst-neon.c
@ -2,6 +2,7 @@
 * jidctfst-neon.c - fast integer IDCT (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,13 +22,14 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
@ -2,7 +2,7 @@
 * jidctint-neon.c - accurate integer IDCT (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -22,11 +22,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctred-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctred-neon.c
@ -2,7 +2,7 @@
 * jidctred-neon.c - reduced-size IDCT (Arm Neon)
 *
 * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -22,11 +22,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jquanti-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jquanti-neon.c
@ -2,6 +2,7 @@
 * jquanti-neon.c - sample data conversion and quantization (Arm Neon)
 *
 * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
@ -21,12 +22,13 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
+#include "neon-compat.h"

 #include <arm_neon.h>

--- a/3rdparty/libjpeg-turbo/src/simd/arm/neon-compat.h.in
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/neon-compat.h.in
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
 * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
 *
 * This software is provided 'as-is', without any express or implied
@ -35,3 +35,11 @@
 #else
 #error "Unknown compiler"
 #endif
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wdeclaration-after-statement"
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
@ -7,11 +7,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
@ -7,11 +7,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
@ -7,11 +7,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
@ -7,11 +7,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains an SSE2 implementation for Huffman coding of one block.
 ; The following code is based on jchuff.c; see jchuff.c for more details.
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcphuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcphuff-sse2.asm
@ -7,11 +7,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains an SSE2 implementation of data preparation for progressive
 ; Huffman encoding.  See jcphuff.c for more details.
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jcolsamp.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the forward DCT
 ; (Discrete Cosine Transform). The following code is based directly on
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the forward DCT
 ; (Discrete Cosine Transform). The following code is based directly on
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the forward DCT (Discrete Cosine Transform). The following code is
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the forward DCT (Discrete Cosine Transform). The following code is
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the inverse DCT (Discrete Cosine Transform). The following code is
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the inverse DCT (Discrete Cosine Transform). The following code is
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains inverse-DCT routines that produce reduced-size
 ; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains inverse-DCT routines that produce reduced-size
 ; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"
 %include "jdct.inc"
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"
 %include "jdct.inc"
@ -120,8 +116,8 @@ EXTN(jsimd_convsamp_mmx):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_mmx(JCOEFPTR coef_block, DCTELEM *divisors,
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"
 %include "jdct.inc"
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"
 %include "jdct.inc"
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-avx2.asm
@ -2,18 +2,14 @@
 ; jquanti.asm - sample data conversion and quantization (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, 2018, D. R. Commander.
+; Copyright (C) 2016, 2018, 2024, D. R. Commander.
 ; Copyright (C) 2016, Matthieu Darbois.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"
 %include "jdct.inc"
@ -107,8 +103,8 @@ EXTN(jsimd_convsamp_avx2):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors,
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"
 %include "jdct.inc"
@ -98,8 +94,8 @@ EXTN(jsimd_convsamp_sse2):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors,
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
@ -2,7 +2,7 @@
 * jsimd_i386.c
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2024, D. R. Commander.
 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
 *
 * Based on the x86 SIMD extension for IJG JPEG library,
@ -15,11 +15,11 @@
 */

 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"

 /*
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jsimdcpu.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jsimdcpu.asm
@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.

 %include "jsimdext.inc"

--- a/3rdparty/libjpeg-turbo/src/simd/jsimd.h
+++ b/3rdparty/libjpeg-turbo/src/simd/jsimd.h
--- a/Show More
+++ b/Show More