Merge pull request #27636 from jmackay2:cuda_13

Cuda 13.0 compatibility #27636 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake ### Issue CUDA 13 deprecated some fields, resulting in build failures with CUDA 13. This updates to use the replacement API. The reference to the deprecated features is here: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6 ### Testing This was testing by building on the following configurations: OS: Ubuntu 24.04 CUDA: 12.9, 13.0
2025-12-06 12:19:50 +01:00 · 2025-08-11 05:15:24 -04:00 · 2025-08-11 05:15:24 -04:00 · f0888a10e8
commit f0888a10e8
parent e31ff00104
1 changed files with 31 additions and 9 deletions
--- a/modules/core/src/cuda_info.cpp
+++ b/modules/core/src/cuda_info.cpp
@ -424,7 +424,9 @@ int cv::cuda::DeviceInfo::clockRate() const
 #ifndef HAVE_CUDA
    throw_no_cuda();
 #else
-    return deviceProps().get(device_id_)->clockRate;
+    int clockRate;
    cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device_id_));
    return clockRate;
 #endif
 }
@ -487,7 +489,9 @@ bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
 #ifndef HAVE_CUDA
    throw_no_cuda();
 #else
-    return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
+    int kernelExecTimeoutEnabled;
    cudaSafeCall(cudaDeviceGetAttribute(&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, device_id_));
    return kernelExecTimeoutEnabled != 0;
 #endif
 }
@ -522,7 +526,9 @@ DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
        ComputeModeExclusiveProcess
    };
-    return tbl[deviceProps().get(device_id_)->computeMode];
+    int computeMode;
    cudaSafeCall(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, device_id_));
    return tbl[computeMode];
 #endif
 }
@ -554,7 +560,14 @@ int cv::cuda::DeviceInfo::maxTexture1DLinear() const
 #ifndef HAVE_CUDA
    throw_no_cuda();
 #else
    #if CUDA_VERSION >= 13000
        size_t maxWidthInElements;
        cudaChannelFormatDesc fmtDesc = cudaCreateChannelDesc<float4>();
        cudaSafeCall(cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, &fmtDesc, device_id_));
        return maxWidthInElements;
    #else
        return deviceProps().get(device_id_)->maxTexture1DLinear;
    #endif
 #endif
 }
@ -793,7 +806,9 @@ int cv::cuda::DeviceInfo::memoryClockRate() const
 #ifndef HAVE_CUDA
    throw_no_cuda();
 #else
-    return deviceProps().get(device_id_)->memoryClockRate;
+    int memoryClockRate;
    cudaSafeCall(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, device_id_));
    return memoryClockRate;
 #endif
 }
@ -933,7 +948,9 @@ void cv::cuda::printCudaDeviceInfo(int device)
        if (cores > 0)
            printf("  (%2d) Multiprocessors x (%2d) CUDA Cores/MP:     %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
-        printf("  GPU Clock Speed:                               %.2f GHz\n", prop.clockRate * 1e-6f);
+        int clockRate;
        cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, dev));
        printf("  GPU Clock Speed:                               %.2f GHz\n", clockRate * 1e-6f);
        printf("  Max Texture Dimension Size (x,y,z)             1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
            prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
@ -952,8 +969,10 @@ void cv::cuda::printCudaDeviceInfo(int device)
        printf("  Maximum memory pitch:                          %u bytes\n", (int)prop.memPitch);
        printf("  Texture alignment:                             %u bytes\n", (int)prop.textureAlignment);
-        printf("  Concurrent copy and execution:                 %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
+        printf("  Concurrent copy and execution:                 %s with %d copy engine(s)\n", (prop.asyncEngineCount ? "Yes" : "No"), prop.asyncEngineCount);
-        printf("  Run time limit on kernels:                     %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
+        int kernelExecTimeoutEnabled;
        cudaSafeCall(cudaDeviceGetAttribute(&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, dev));
        printf("  Run time limit on kernels:                     %s\n", kernelExecTimeoutEnabled ? "Yes" : "No");
        printf("  Integrated GPU sharing Host Memory:            %s\n", prop.integrated ? "Yes" : "No");
        printf("  Support host page-locked memory mapping:       %s\n", prop.canMapHostMemory ? "Yes" : "No");
@ -963,8 +982,11 @@ void cv::cuda::printCudaDeviceInfo(int device)
        printf("  Device is using TCC driver mode:               %s\n", prop.tccDriver ? "Yes" : "No");
        printf("  Device supports Unified Addressing (UVA):      %s\n", prop.unifiedAddressing ? "Yes" : "No");
        printf("  Device PCI Bus ID / PCI location ID:           %d / %d\n", prop.pciBusID, prop.pciDeviceID );
        int propComputeMode;
        cudaSafeCall(cudaDeviceGetAttribute(&propComputeMode, cudaDevAttrComputeMode, dev));
        printf("  Compute Mode:\n");
-        printf("      %s \n", computeMode[prop.computeMode]);
+        printf("      %s \n", computeMode[propComputeMode]);
    }
    printf("\n");