Merge pull request #27734 from cudawarped:cuda_double4_dep

[cuda] Add compatibility layer for vector types due for depreciation in CUDA 14.0
2025-12-06 12:19:50 +01:00 · 2025-09-09 14:49:48 +03:00 · 2025-09-09 14:49:48 +03:00 · efea09120b
commit efea09120b
parent 3a21ed56e3 ca35ed2f1c
5 changed files with 54 additions and 4 deletions
--- a/modules/core/include/opencv2/core/cuda/cuda_compat.hpp
+++ b/modules/core/include/opencv2/core/cuda/cuda_compat.hpp
@ -0,0 +1,38 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 #ifndef OPENCV_CUDA_CUDA_COMPAT_HPP
 #define OPENCV_CUDA_CUDA_COMPAT_HPP
 #include <cuda.h>
 namespace cv { namespace cuda { namespace device { namespace compat
 {
 #if CUDA_VERSION >= 13000
    using ulonglong4 = ::ulonglong4_16a;
    using double4 = ::double4_16a;
    __host__ __device__ __forceinline__
    double4 make_double4(double x, double y, double z, double w)
    {
        return ::make_double4_16a(x, y, z, w);
    }
 #else
    using ulonglong4 = ::ulonglong4;
    using double4 = ::double4;
    __host__ __device__ __forceinline__
    double4 make_double4(double x, double y, double z, double w)
    {
        return ::make_double4(x, y, z, w);
    }
 #endif
    using ulonglong4Compat = ulonglong4;
    using double4Compat = double4;
    __host__ __device__ __forceinline__
    double4Compat make_double4_compat(double x, double y, double z, double w)
    {
        return make_double4(x, y, z, w);
    }
 }}}}
 #endif // OPENCV_CUDA_CUDA_COMPAT_HPP
--- a/modules/core/include/opencv2/core/cuda/vec_math.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp
@ -45,6 +45,7 @@
 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"
 #include "cuda_compat.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
@ -54,6 +55,8 @@
 namespace cv { namespace cuda { namespace device
 {
    using cv::cuda::device::compat::double4;
    using cv::cuda::device::compat::make_double4;
 // saturate_cast
--- a/modules/core/include/opencv2/core/cuda/vec_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_traits.hpp
@ -44,6 +44,7 @@
 #define OPENCV_CUDA_VEC_TRAITS_HPP
 #include "common.hpp"
 #include "cuda_compat.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
@ -53,6 +54,9 @@
 namespace cv { namespace cuda { namespace device
 {
    using cv::cuda::device::compat::double4;
    using cv::cuda::device::compat::make_double4;
    template<typename T, int N> struct TypeVec;
    struct __align__(8) uchar8
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@ -51,10 +51,12 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/cudev.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 #include "opencv2/core/cuda/cuda_compat.hpp"
 using namespace cv;
 using namespace cv::cuda;
 using namespace cv::cudev;
 using cv::cuda::device::compat::double4Compat;
 device::ThrustAllocator::~ThrustAllocator()
 {
@ -341,7 +343,7 @@ void cv::cuda::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& stream
        {0,0,0,0},
        {0,0,0,0},
        {0,0,0,0},
-        {copyWithMask<double>, copyWithMask<double2>, copyWithMask<double3>, copyWithMask<double4>}
+        {copyWithMask<double>, copyWithMask<double2>, copyWithMask<double3>, copyWithMask<double4Compat>}
    };
    if (mask.channels() == channels())
@ -424,7 +426,7 @@ GpuMat& cv::cuda::GpuMat::setTo(Scalar value, Stream& stream)
        {setToWithOutMask<short>,setToWithOutMask<short2>,setToWithOutMask<short3>,setToWithOutMask<short4>},
        {setToWithOutMask<int>,setToWithOutMask<int2>,setToWithOutMask<int3>,setToWithOutMask<int4>},
        {setToWithOutMask<float>,setToWithOutMask<float2>,setToWithOutMask<float3>,setToWithOutMask<float4>},
-        {setToWithOutMask<double>,setToWithOutMask<double2>,setToWithOutMask<double3>,setToWithOutMask<double4>}
+        {setToWithOutMask<double>,setToWithOutMask<double2>,setToWithOutMask<double3>,setToWithOutMask<double4Compat>}
    };
    funcs[depth()][channels() - 1](*this, value, stream);
@ -455,7 +457,7 @@ GpuMat& cv::cuda::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
        {setToWithMask<short>,setToWithMask<short2>,setToWithMask<short3>,setToWithMask<short4>},
        {setToWithMask<int>,setToWithMask<int2>,setToWithMask<int3>,setToWithMask<int4>},
        {setToWithMask<float>,setToWithMask<float2>,setToWithMask<float3>,setToWithMask<float4>},
-        {setToWithMask<double>,setToWithMask<double2>,setToWithMask<double3>,setToWithMask<double4>}
+        {setToWithMask<double>,setToWithMask<double2>,setToWithMask<double3>,setToWithMask<double4Compat>}
    };
    funcs[depth()][channels() - 1](*this, mask, value, stream);
--- a/modules/dnn/src/cuda/vector_traits.hpp
+++ b/modules/dnn/src/cuda/vector_traits.hpp
@ -11,6 +11,7 @@
 #include "memory.hpp"
 #include "../cuda4dnn/csl/pointer.hpp"
 #include "opencv2/core/cuda/cuda_compat.hpp"
 #include <type_traits>
@ -34,9 +35,11 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de
     * v_store(output_vPtr, vec);
     */
    using cv::cuda::device::compat::ulonglong4Compat;
    namespace detail {
        template <size_type N> struct raw_type_ { };
-        template <> struct raw_type_<256> { typedef ulonglong4 type; };
+        template <> struct raw_type_<256> { typedef ulonglong4Compat type; };
        template <> struct raw_type_<128> { typedef uint4 type; };
        template <> struct raw_type_<64> { typedef uint2 type; };
        template <> struct raw_type_<32> { typedef uint1 type; };