From f0888a10e8266b2202d930c6974433a421e6f9a7 Mon Sep 17 00:00:00 2001 From: jmackay2 <1.732mackay@gmail.com> Date: Mon, 11 Aug 2025 05:15:24 -0400 Subject: [PATCH] Merge pull request #27636 from jmackay2:cuda_13 Cuda 13.0 compatibility #27636 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake ### Issue CUDA 13 deprecated some fields, resulting in build failures with CUDA 13. This updates to use the replacement API. The reference to the deprecated features is here: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6 ### Testing This was testing by building on the following configurations: OS: Ubuntu 24.04 CUDA: 12.9, 13.0 --- modules/core/src/cuda_info.cpp | 40 ++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/modules/core/src/cuda_info.cpp b/modules/core/src/cuda_info.cpp index 2558ec8ea5..d97fadd4ec 100644 --- a/modules/core/src/cuda_info.cpp +++ b/modules/core/src/cuda_info.cpp @@ -424,7 +424,9 @@ int cv::cuda::DeviceInfo::clockRate() const #ifndef HAVE_CUDA throw_no_cuda(); #else - return deviceProps().get(device_id_)->clockRate; + int clockRate; + cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device_id_)); + return clockRate; #endif } @@ -487,7 +489,9 @@ bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const #ifndef HAVE_CUDA throw_no_cuda(); #else - return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0; + int kernelExecTimeoutEnabled; + cudaSafeCall(cudaDeviceGetAttribute(&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, device_id_)); + return kernelExecTimeoutEnabled != 0; #endif } @@ -522,7 +526,9 @@ DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const ComputeModeExclusiveProcess }; - return tbl[deviceProps().get(device_id_)->computeMode]; + int computeMode; + cudaSafeCall(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, device_id_)); + return tbl[computeMode]; #endif } @@ -554,7 +560,14 @@ int cv::cuda::DeviceInfo::maxTexture1DLinear() const #ifndef HAVE_CUDA throw_no_cuda(); #else - return deviceProps().get(device_id_)->maxTexture1DLinear; + #if CUDA_VERSION >= 13000 + size_t maxWidthInElements; + cudaChannelFormatDesc fmtDesc = cudaCreateChannelDesc(); + cudaSafeCall(cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, &fmtDesc, device_id_)); + return maxWidthInElements; + #else + return deviceProps().get(device_id_)->maxTexture1DLinear; + #endif #endif } @@ -793,7 +806,9 @@ int cv::cuda::DeviceInfo::memoryClockRate() const #ifndef HAVE_CUDA throw_no_cuda(); #else - return deviceProps().get(device_id_)->memoryClockRate; + int memoryClockRate; + cudaSafeCall(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, device_id_)); + return memoryClockRate; #endif } @@ -933,7 +948,9 @@ void cv::cuda::printCudaDeviceInfo(int device) if (cores > 0) printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount); - printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); + int clockRate; + cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, dev)); + printf(" GPU Clock Speed: %.2f GHz\n", clockRate * 1e-6f); printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], @@ -952,8 +969,10 @@ void cv::cuda::printCudaDeviceInfo(int device) printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch); printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment); - printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount); - printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No"); + printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.asyncEngineCount ? "Yes" : "No"), prop.asyncEngineCount); + int kernelExecTimeoutEnabled; + cudaSafeCall(cudaDeviceGetAttribute(&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, dev)); + printf(" Run time limit on kernels: %s\n", kernelExecTimeoutEnabled ? "Yes" : "No"); printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No"); printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No"); @@ -963,8 +982,11 @@ void cv::cuda::printCudaDeviceInfo(int device) printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No"); printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No"); printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID ); + + int propComputeMode; + cudaSafeCall(cudaDeviceGetAttribute(&propComputeMode, cudaDevAttrComputeMode, dev)); printf(" Compute Mode:\n"); - printf(" %s \n", computeMode[prop.computeMode]); + printf(" %s \n", computeMode[propComputeMode]); } printf("\n");