opencv · asmorkalov · Jul 10, 2024 · Jul 8, 2024 · Jul 9, 2024 · Jul 10, 2024
diff --git a/modules/dnn/src/cuda4dnn/init.hpp b/modules/dnn/src/cuda4dnn/init.hpp
@@ -15,7 +15,7 @@
 
 namespace cv { namespace dnn { namespace cuda4dnn {
 
-    void checkVersions()
+    inline void checkVersions()
     {
         // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model
         // cuDNN API Compatibility
@@ -44,21 +44,23 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         }
     }
 
-    int getDeviceCount()
+    inline int getDeviceCount()
     {
         return cuda::getCudaEnabledDeviceCount();
     }
 
-    int getDevice()
+    inline int getDevice()
     {
         int device_id = -1;
         CUDA4DNN_CHECK_CUDA(cudaGetDevice(&device_id));
         return device_id;
     }
 
-    bool isDeviceCompatible()
+    inline bool isDeviceCompatible(int device_id = -1)
     {
-        int device_id = getDevice();
+        if (device_id < 0)
+            device_id = getDevice();
+
         if (device_id < 0)
             return false;
 
@@ -76,9 +78,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         return false;
     }
 
-    bool doesDeviceSupportFP16()
+    inline bool doesDeviceSupportFP16(int device_id = -1)
     {
-        int device_id = getDevice();
+        if (device_id < 0)
+            device_id = getDevice();
+
         if (device_id < 0)
             return false;
 
@@ -87,9 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         CUDA4DNN_CHECK_CUDA(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_id));
 
         int version = major * 10 + minor;
-        if (version < 53)
-            return false;
-        return true;
+        return (version >= 53);
     }
 
 }}} /* namespace cv::dnn::cuda4dnn */

diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp
@@ -10,6 +10,10 @@
 #include "backend.hpp"
 #include "factory.hpp"
 
+#ifdef HAVE_CUDA
+#include "cuda4dnn/init.hpp"
+#endif
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@@ -242,6 +246,16 @@ void Net::Impl::setPreferableTarget(int targetId)
 #endif
         }
 
+        if (IS_DNN_CUDA_TARGET(targetId))
+        {
+            preferableTarget = DNN_TARGET_CPU;
+#ifdef HAVE_CUDA
+            if (cuda4dnn::doesDeviceSupportFP16() && targetId == DNN_TARGET_CUDA_FP16)
+                preferableTarget = DNN_TARGET_CUDA_FP16;
+            else
+                preferableTarget = DNN_TARGET_CUDA;
+#endif
+        }
 #if !defined(__arm64__) || !__arm64__
         if (targetId == DNN_TARGET_CPU_FP16)
         {

diff --git a/modules/dnn/src/registry.cpp b/modules/dnn/src/registry.cpp
@@ -18,6 +18,10 @@
 #include "backend.hpp"
 #include "factory.hpp"
 
+#ifdef HAVE_CUDA
+#include "cuda4dnn/init.hpp"
+#endif
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@@ -118,10 +122,28 @@ class BackendRegistry
 #endif
 
 #ifdef HAVE_CUDA
-        if (haveCUDA())
+        cuda4dnn::checkVersions();
+
+        bool hasCudaCompatible = false;
+        bool hasCudaFP16 = false;
+        for (int i = 0; i < cuda4dnn::getDeviceCount(); i++)
+        {
+            if (cuda4dnn::isDeviceCompatible(i))
+            {
+                hasCudaCompatible = true;
+                if (cuda4dnn::doesDeviceSupportFP16(i))
+                {
+                    hasCudaFP16 = true;
+                    break; // we already have all we need here
+                }
+            }
+        }
+
+        if (hasCudaCompatible)
         {
             backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
-            backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
+            if (hasCudaFP16)
+                backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
         }
 #endif
 

diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp
@@ -211,7 +211,7 @@ class DNNTestLayer : public TestWithParam<tuple<Backend, Target> >
             if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
             {
                 hasFallbacks = true;
-                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
+                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
             }
         }
         if (hasFallbacks && raiseError)

diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp
@@ -1008,7 +1008,7 @@ class Test_ONNX_conformance : public TestWithParam<ONNXConfParams>
             if ((!l->supportBackend(backend) || l->preferableTarget != target) && !fused)
             {
                 hasFallbacks = true;
-                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to has backend implementation" << endl;
+                std::cout << "FALLBACK: Layer [" << l->type << "]:[" << l->name << "] is expected to have backend implementation" << endl;
             }
         }
         return hasFallbacks;