mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-07 00:19:53 +01:00
Fix CUDA for old GPUs without FP16 support #25880
Fixes #21461
~This is a build-time solution that reflects https://github.com/opencv/opencv/blob/4.10.0/modules/dnn/src/cuda4dnn/init.hpp#L68-L82.~
~We shouldn't add an invalid target while building with `CUDA_ARCH_BIN` < 53.~
_(please see [this discussion](https://github.com/opencv/opencv/pull/25880#discussion_r1668074505))_
This is a run-time solution that basically reverts [these lines](d0fe6ad109 (diff-757c5ab6ddf2f99cdd09f851e3cf17abff203aff4107d908c7ad3d0466f39604L245-R245)).
I've debugged these changes, [coupled with other fixes](https://github.com/gentoo/gentoo/pull/37479), on [Gentoo Linux](https://www.gentoo.org/) and [related tests passed](https://github.com/user-attachments/files/16135391/opencv-4.10.0.20240708-224733.log.gz) on my laptop with `GeForce GTX 960M`.
Alternative solution:
- #21462
_Best regards!_
### Pull Request Readiness Checklist
- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] `n/a` There is accuracy test, performance test and test data in opencv_extra repository, if applicable
- [ ] `n/a` The feature is well documented and sample code can be built with the project CMake
192 lines
5.4 KiB
C++
192 lines
5.4 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#include "precomp.hpp"
|
|
|
|
#include "op_halide.hpp"
|
|
#include "op_inf_engine.hpp"
|
|
#include "ie_ngraph.hpp"
|
|
#include "op_vkcom.hpp"
|
|
#include "op_cuda.hpp"
|
|
#include "op_webnn.hpp"
|
|
#include "op_timvx.hpp"
|
|
#include "op_cann.hpp"
|
|
|
|
#include "halide_scheduler.hpp"
|
|
|
|
#include "backend.hpp"
|
|
#include "factory.hpp"
|
|
|
|
#ifdef HAVE_CUDA
|
|
#include "cuda4dnn/init.hpp"
|
|
#endif
|
|
|
|
namespace cv {
|
|
namespace dnn {
|
|
CV__DNN_INLINE_NS_BEGIN
|
|
|
|
|
|
class BackendRegistry
|
|
{
|
|
public:
|
|
typedef std::vector< std::pair<Backend, Target> > BackendsList;
|
|
const BackendsList & getBackends() const { return backends; }
|
|
static BackendRegistry & getRegistry()
|
|
{
|
|
static BackendRegistry impl;
|
|
return impl;
|
|
}
|
|
|
|
|
|
private:
|
|
BackendRegistry()
|
|
{
|
|
#ifdef HAVE_HALIDE
|
|
backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
|
|
#ifdef HAVE_OPENCL
|
|
if (cv::ocl::useOpenCL())
|
|
backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
|
|
#endif
|
|
#endif // HAVE_HALIDE
|
|
|
|
bool haveBackendOpenVINO = false;
|
|
#ifdef HAVE_INF_ENGINE
|
|
haveBackendOpenVINO = true;
|
|
#elif defined(ENABLE_PLUGINS)
|
|
{
|
|
auto factory = dnn_backend::createPluginDNNBackendFactory("openvino");
|
|
if (factory)
|
|
{
|
|
auto backend = factory->createNetworkBackend();
|
|
if (backend)
|
|
haveBackendOpenVINO = true;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
bool haveBackendCPU_FP16 = false;
|
|
#if defined(__arm64__) && __arm64__
|
|
haveBackendCPU_FP16 = true;
|
|
#endif
|
|
|
|
if (haveBackendOpenVINO && openvino::checkTarget(DNN_TARGET_CPU))
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
|
|
}
|
|
if (haveBackendOpenVINO && openvino::checkTarget(DNN_TARGET_MYRIAD))
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
|
|
}
|
|
if (haveBackendOpenVINO && openvino::checkTarget(DNN_TARGET_HDDL))
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL));
|
|
}
|
|
#ifdef HAVE_OPENCL
|
|
if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
|
|
{
|
|
if (haveBackendOpenVINO && openvino::checkTarget(DNN_TARGET_OPENCL))
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
|
|
}
|
|
if (haveBackendOpenVINO && openvino::checkTarget(DNN_TARGET_OPENCL_FP16))
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
|
|
}
|
|
}
|
|
#endif // HAVE_OPENCL
|
|
|
|
#ifdef HAVE_WEBNN
|
|
if (haveWebnn())
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_WEBNN, DNN_TARGET_CPU));
|
|
}
|
|
#endif // HAVE_WEBNN
|
|
|
|
#ifdef HAVE_OPENCL
|
|
if (cv::ocl::useOpenCL())
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
|
|
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
|
|
}
|
|
#endif
|
|
|
|
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
|
|
|
|
if (haveBackendCPU_FP16)
|
|
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU_FP16));
|
|
|
|
#ifdef HAVE_VULKAN
|
|
if (haveVulkan())
|
|
backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
|
|
#endif
|
|
|
|
#ifdef HAVE_CUDA
|
|
cuda4dnn::checkVersions();
|
|
|
|
bool hasCudaCompatible = false;
|
|
bool hasCudaFP16 = false;
|
|
for (int i = 0; i < cuda4dnn::getDeviceCount(); i++)
|
|
{
|
|
if (cuda4dnn::isDeviceCompatible(i))
|
|
{
|
|
hasCudaCompatible = true;
|
|
if (cuda4dnn::doesDeviceSupportFP16(i))
|
|
{
|
|
hasCudaFP16 = true;
|
|
break; // we already have all we need here
|
|
}
|
|
}
|
|
}
|
|
|
|
if (hasCudaCompatible)
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
|
|
if (hasCudaFP16)
|
|
backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_TIMVX
|
|
if (haveTimVX())
|
|
{
|
|
backends.push_back(std::make_pair(DNN_BACKEND_TIMVX, DNN_TARGET_NPU));
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_CANN
|
|
backends.push_back(std::make_pair(DNN_BACKEND_CANN, DNN_TARGET_NPU));
|
|
#endif
|
|
}
|
|
|
|
BackendsList backends;
|
|
};
|
|
|
|
|
|
std::vector<std::pair<Backend, Target>> getAvailableBackends()
|
|
{
|
|
return BackendRegistry::getRegistry().getBackends();
|
|
}
|
|
|
|
std::vector<Target> getAvailableTargets(Backend be)
|
|
{
|
|
if (be == DNN_BACKEND_DEFAULT)
|
|
be = (Backend)getParam_DNN_BACKEND_DEFAULT();
|
|
|
|
if (be == DNN_BACKEND_INFERENCE_ENGINE)
|
|
be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
|
|
|
std::vector<Target> result;
|
|
const BackendRegistry::BackendsList all_backends = getAvailableBackends();
|
|
for (BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i)
|
|
{
|
|
if (i->first == be)
|
|
result.push_back(i->second);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
CV__DNN_INLINE_NS_END
|
|
}} // namespace cv::dnn
|