mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-06 12:19:50 +01:00
Merge pull request #27636 from jmackay2:cuda_13
Cuda 13.0 compatibility #27636 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake ### Issue CUDA 13 deprecated some fields, resulting in build failures with CUDA 13. This updates to use the replacement API. The reference to the deprecated features is here: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6 ### Testing This was testing by building on the following configurations: OS: Ubuntu 24.04 CUDA: 12.9, 13.0
This commit is contained in:
parent
e31ff00104
commit
f0888a10e8
|
|
@ -424,7 +424,9 @@ int cv::cuda::DeviceInfo::clockRate() const
|
||||||
#ifndef HAVE_CUDA
|
#ifndef HAVE_CUDA
|
||||||
throw_no_cuda();
|
throw_no_cuda();
|
||||||
#else
|
#else
|
||||||
return deviceProps().get(device_id_)->clockRate;
|
int clockRate;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device_id_));
|
||||||
|
return clockRate;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -487,7 +489,9 @@ bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
|
||||||
#ifndef HAVE_CUDA
|
#ifndef HAVE_CUDA
|
||||||
throw_no_cuda();
|
throw_no_cuda();
|
||||||
#else
|
#else
|
||||||
return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
|
int kernelExecTimeoutEnabled;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, device_id_));
|
||||||
|
return kernelExecTimeoutEnabled != 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -522,7 +526,9 @@ DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
|
||||||
ComputeModeExclusiveProcess
|
ComputeModeExclusiveProcess
|
||||||
};
|
};
|
||||||
|
|
||||||
return tbl[deviceProps().get(device_id_)->computeMode];
|
int computeMode;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, device_id_));
|
||||||
|
return tbl[computeMode];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -554,7 +560,14 @@ int cv::cuda::DeviceInfo::maxTexture1DLinear() const
|
||||||
#ifndef HAVE_CUDA
|
#ifndef HAVE_CUDA
|
||||||
throw_no_cuda();
|
throw_no_cuda();
|
||||||
#else
|
#else
|
||||||
|
#if CUDA_VERSION >= 13000
|
||||||
|
size_t maxWidthInElements;
|
||||||
|
cudaChannelFormatDesc fmtDesc = cudaCreateChannelDesc<float4>();
|
||||||
|
cudaSafeCall(cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, &fmtDesc, device_id_));
|
||||||
|
return maxWidthInElements;
|
||||||
|
#else
|
||||||
return deviceProps().get(device_id_)->maxTexture1DLinear;
|
return deviceProps().get(device_id_)->maxTexture1DLinear;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -793,7 +806,9 @@ int cv::cuda::DeviceInfo::memoryClockRate() const
|
||||||
#ifndef HAVE_CUDA
|
#ifndef HAVE_CUDA
|
||||||
throw_no_cuda();
|
throw_no_cuda();
|
||||||
#else
|
#else
|
||||||
return deviceProps().get(device_id_)->memoryClockRate;
|
int memoryClockRate;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, device_id_));
|
||||||
|
return memoryClockRate;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -933,7 +948,9 @@ void cv::cuda::printCudaDeviceInfo(int device)
|
||||||
if (cores > 0)
|
if (cores > 0)
|
||||||
printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
|
printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
|
||||||
|
|
||||||
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
|
int clockRate;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, dev));
|
||||||
|
printf(" GPU Clock Speed: %.2f GHz\n", clockRate * 1e-6f);
|
||||||
|
|
||||||
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
|
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
|
||||||
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
|
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
|
||||||
|
|
@ -952,8 +969,10 @@ void cv::cuda::printCudaDeviceInfo(int device)
|
||||||
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
|
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
|
||||||
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
|
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
|
||||||
|
|
||||||
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
|
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.asyncEngineCount ? "Yes" : "No"), prop.asyncEngineCount);
|
||||||
printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
|
int kernelExecTimeoutEnabled;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, dev));
|
||||||
|
printf(" Run time limit on kernels: %s\n", kernelExecTimeoutEnabled ? "Yes" : "No");
|
||||||
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
|
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
|
||||||
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
|
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
|
||||||
|
|
||||||
|
|
@ -963,8 +982,11 @@ void cv::cuda::printCudaDeviceInfo(int device)
|
||||||
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
|
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
|
||||||
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
|
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
|
||||||
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
|
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
|
||||||
|
|
||||||
|
int propComputeMode;
|
||||||
|
cudaSafeCall(cudaDeviceGetAttribute(&propComputeMode, cudaDevAttrComputeMode, dev));
|
||||||
printf(" Compute Mode:\n");
|
printf(" Compute Mode:\n");
|
||||||
printf(" %s \n", computeMode[prop.computeMode]);
|
printf(" %s \n", computeMode[propComputeMode]);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user