opencv · asmorkalov · Feb 12, 2024 · Jan 8, 2024 · Jan 13, 2024 · Jan 24, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -65,6 +65,10 @@ if(POLICY CMP0068)
   cmake_policy(SET CMP0068 NEW)  # CMake 3.9+: `RPATH` settings on macOS do not affect `install_name`.
 endif()
 
+if(POLICY CMP0071)
+  cmake_policy(SET CMP0071 NEW)  # CMake 3.10+: Let `AUTOMOC` and `AUTOUIC` process `GENERATED` files.
+endif()
+
 if(POLICY CMP0075)
   cmake_policy(SET CMP0075 NEW)  # CMake 3.12+: Include file check macros honor `CMAKE_REQUIRED_LIBRARIES`
 endif()
@@ -1870,7 +1874,7 @@ if(BUILD_JAVA)
     status("    JNI:"         JNI_INCLUDE_DIRS    THEN "${JNI_INCLUDE_DIRS}"                                       ELSE NO)
   endif()
   status("    Java wrappers:" HAVE_opencv_java                                                            THEN "YES (${OPENCV_JAVA_SDK_BUILD_TYPE})" ELSE NO)
-  status("    Java tests:"    BUILD_TESTS AND opencv_test_java_BINARY_DIR                                 THEN YES ELSE NO)
+  status("    Java tests:"    BUILD_TESTS AND (opencv_test_java_BINARY_DIR OR opencv_test_android_BINARY_DIR) THEN YES ELSE NO)
 endif()
 
 # ========================== Objective-C =======================

diff --git a/cmake/android/android_gradle_projects.cmake b/cmake/android/android_gradle_projects.cmake
@@ -141,6 +141,8 @@ if (gradle.opencv_source == 'sdk_path') {
 ")
 
 ocv_check_environment_variables(OPENCV_GRADLE_VERBOSE_OPTIONS)
+ocv_update(OPENCV_GRADLE_VERBOSE_OPTIONS "-i")
+separate_arguments(OPENCV_GRADLE_VERBOSE_OPTIONS UNIX_COMMAND "${OPENCV_GRADLE_VERBOSE_OPTIONS}")
 
 macro(add_android_project target path)
   get_filename_component(__dir "${path}" NAME)
@@ -175,7 +177,6 @@ include ':${__dir}'
   if (BUILD_ANDROID_EXAMPLES)
     # build apk
     set(APK_FILE "${ANDROID_BUILD_BASE_DIR}/${__dir}/build/outputs/apk/release/${__dir}-${ANDROID_ABI}-release-unsigned.apk")
-    ocv_update(OPENCV_GRADLE_VERBOSE_OPTIONS "-i")
     add_custom_command(
         OUTPUT "${APK_FILE}" "${OPENCV_DEPHELPER}/android_sample_${__dir}"
         COMMAND ./gradlew ${OPENCV_GRADLE_VERBOSE_OPTIONS} "${__dir}:assemble"

diff --git a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@@ -9,46 +9,224 @@ YOLO DNNs  {#tutorial_dnn_yolo}
 |    |    |
 | -: | :- |
 | Original author | Alessandro de Oliveira Faria |
-| Compatibility | OpenCV >= 3.3.1 |
+| Extended by     | Abduragim Shtanchaev |
+| Compatibility   | OpenCV >= 4.9.0 |
+
+
+Running pre-trained YOLO model in OpenCV
+----------------------------------------
+
+Deploying pre-trained models is a common task in machine learning, particularly when working with
+hardware that does not support certain frameworks like PyTorch. This guide provides a comprehensive
+overview of exporting pre-trained YOLO family models from PyTorch and deploying them using OpenCV's
+DNN framework. For demonstration purposes, we will focus on the [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX/blob/main)
+model, but the methodology applies to other supported models.
+
+@note Currently, OpenCV supports the following YOLO models:
+- [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX/blob/main),
+- [YoloNas](https://github.com/Deci-AI/super-gradients/tree/master),
+- [YOLOv8](https://github.com/ultralytics/ultralytics/tree/main),
+- [YOLOv7](https://github.com/WongKinYiu/yolov7/tree/main),
+- [YOLOv6](https://github.com/meituan/YOLOv6/blob/main),
+- [YOLOv5](https://github.com/ultralytics/yolov5),
+- [YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4).
+
+This support includes pre and post-processing routines specific to these models. While other older
+version of YOLO are also supported by OpenCV in Darknet format, they are out of the scope of this tutorial.
+
+
+Assuming that we have successfully trained YOLOX model, the subsequent step involves exporting and
+running this model with OpenCV. There are several critical considerations to address before
+proceeding with this process. Let's delve into these aspects.
+
+### YOLO's Pre-proccessing & Output
+
+Understanding the nature of inputs and outputs associated with YOLO family detectors is pivotal.
+These detectors, akin to most Deep Neural Networks (DNN), typically exhibit variation in input
+sizes contingent upon the model's scale.
+
+| Model Scale  | Input Size   |
+|--------------|--------------|
+| Small Models <sup>[1](https://github.com/Megvii-BaseDetection/YOLOX/tree/main#standard-models)</sup>| 416x416      |
+| Midsize Models <sup>[2](https://github.com/Megvii-BaseDetection/YOLOX/tree/main#standard-models)</sup>| 640x640    |
+| Large Models <sup>[3](https://github.com/meituan/YOLOv6/tree/main#benchmark)</sup>| 1280x1280    |
+
+This table provides a quick reference to understand the different input dimensions commonly used in
+various YOLO models inputs. These are standard input shapes. Make sure you use input size that you
+trained model with, if it is differed from from the size mentioned in the table.
+
+The next critical element in the process involves understanding the specifics of image pre-processing
+for YOLO detectors. While the fundamental pre-processing approach remains consistent across the YOLO
+family, there are subtle yet crucial differences that must be accounted for to avoid any degradation
+in performance. Key among these are the `resize type` and the `padding value` applied post-resize.
+For instance, the [YOLOX model](https://github.com/Megvii-BaseDetection/YOLOX/blob/ac58e0a5e68e57454b7b9ac822aced493b553c53/yolox/data/data_augment.py#L142)
+utilizes a `LetterBox` resize method and a padding value of `114.0`. It is imperative to ensure that
+these parameters, along with the normalization constants, are appropriately matched to the model being
+exported.
+
+Regarding the model's output, it typically takes the form of a tensor with dimensions [BxNxC+5] or
+[BxNxC+4], where 'B' represents the batch size, 'N' denotes the number of anchors, and 'C' signifies
+the number of classes (for instance, 80 classes if the model is trained on the COCO dataset).
+The additional 5 in the former tensor structure corresponds to the objectness score (obj), confidence
+score (conf), and the bounding box coordinates (cx, cy, w, h). Notably, the YOLOv8 model's output
+is shaped as [BxNxC+4], where there is no explicit objectness score, and the object score is directly
+inferred from the class score. For the YOLOX model, specifically, it is also necessary to incorporate
+anchor points to rescale predictions back to the image domain. This step will be integrated into
+the ONNX graph, a process that we will detail further in the subsequent sections.
+
+
+### PyTorch Model Export
+
+Now that we know know the parameters of the pre-precessing we can go on and export the model from
+Pytorch to ONNX graph. Since in this tutorial we are using YOLOX as our sample model, lets use its
+export for demonstration purposes (the process is  identical for the rest of the YOLO detectors).
+To exporting YOLOX we can just use [export script](https://github.com/Megvii-BaseDetection/YOLOX/blob/ac58e0a5e68e57454b7b9ac822aced493b553c53/tools/export_onnx.py). Particularly we need following commands:
 
-Introduction
-------------
-
-In this text you will learn how to use opencv_dnn module using yolo_object_detection (Sample of using OpenCV dnn module in real time with device capture, video and image).
+@code{.bash}
+git clone https://github.com/Megvii-BaseDetection/YOLOX.git
+cd YOLOX
+wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth # download pre-trained weights
+python3 -m tools.export_onnx --output-name yolox_s.onnx -n yolox-s -c yolox_s.pth --decode_in_inference
+@endcode
 
-We will demonstrate results of this example on the following picture.
-![Picture example](images/yolo.jpg)
+**NOTE:** Here `--decode_in_inference` is to include anchor box creation in the ONNX graph itself.
+It sets [this value](https://github.com/Megvii-BaseDetection/YOLOX/blob/ac58e0a5e68e57454b7b9ac822aced493b553c53/yolox/models/yolo_head.py#L210C16-L210C39)
+to `True`, which subsequently includes anchor generation function.
+
+Below we demonstrated the minimal version of the export script (which could be used for models other
+than YOLOX) in case it is needed. However, usually each YOLO repository has predefined export script.
+
+@code{.py}
+    import onnx
+    import torch
+    from onnxsim import simplify
+
+    # load the model state dict
+    ckpt = torch.load(ckpt_file, map_location="cpu")
+    model.load_state_dict(ckpt)
+
+    # prepare dummy input
+    dummy_input = torch.randn(args.batch_size, 3, exp.test_size[0], exp.test_size[1])
+
+    #export the model
+    torch.onnx._export(
+        model,
+        dummy_input,
+        "yolox.onnx",
+        input_names=["input"],
+        output_names=["output"],
+        dynamic_axes={"input": {0: 'batch'},
+                      "output": {0: 'batch'}})
+
+    # use onnx-simplifier to reduce reduent model.
+    onnx_model = onnx.load(args.output_name)
+    model_simp, check = simplify(onnx_model)
+    assert check, "Simplified ONNX model could not be validated"
+    onnx.save(model_simp, args.output_name)
+@endcode
 
-Examples
---------
+### Running Yolo ONNX detector with OpenCV Sample
+
+Once we have our ONNX graph of the model, we just simply can run with OpenCV's sample. To that we need to make sure:
+
+1. OpenCV is build with -DBUILD_EXAMLES=ON flag.
+2. Navigate to the OpenCV's `build` directory
+3. Run the following command:
+
+@code{.cpp}
+./bin/example_dnn_yolo_detector --input=<path_to_your_input_file> \
+                                --classes=<path_to_class_names_file> \
+                                --thr=<confidence_threshold> \
+                                --nms=<non_maximum_suppression_threshold> \
+                                --mean=<mean_normalization_value> \
+                                --scale=<scale_factor> \
+                                --yolo=<yolo_model_version> \
+                                --padvalue=<padding_value> \
+                                --paddingmode=<padding_mode> \
+                                --backend=<computation_backend> \
+                                --target=<target_computation_device>
+@endcode
 
 VIDEO DEMO:
 @youtube{NHtRlndE2cg}
 
-Source Code
------------
+- --input: File path to your input image or video. If omitted, it will capture frames from a camera.
+- --classes: File path to a text file containing class names for object detection.
+- --thr: Confidence threshold for detection (e.g., 0.5).
+- --nms: Non-maximum suppression threshold (e.g., 0.4).
+- --mean: Mean normalization value (e.g., 0.0 for no mean normalization).
+- --scale: Scale factor for input normalization (e.g., 1.0).
+- --yolo: YOLO model version (e.g., YOLOv3, YOLOv4, etc.).
+- --padvalue: Padding value used in pre-processing (e.g., 114.0).
+- --paddingmode: Method for handling image resizing and padding. Options: 0 (resize without extra processing), 1 (crop after resize), 2 (resize with aspect ratio preservation).
+- --backend: Selection of computation backend (0 for automatic, 1 for Halide, 2 for OpenVINO, etc.).
+- --target: Selection of target computation device (0 for CPU, 1 for OpenCL, etc.).
+- --device: Camera device number (0 for default camera). If `--input` is not provided camera with index 0 will used by default.
+
+Here `mean`, `scale`, `padvalue`, `paddingmode` should exactly match those that we discussed
+in pre-processing section in order for the model to match result in PyTorch
+
+To demonstrate how to run OpenCV YOLO samples without your own pretrained model, follow these instructions:
+
+1. Ensure Python is installed on your platform.
+2. Confirm that OpenCV is built with the `-DBUILD_EXAMPLES=ON` flag.
+
+Run the YOLOX detector(with default values):
+
+@code{.sh}
+git clone https://github.com/opencv/opencv_extra.git
+cd opencv_extra/testdata/dnn
+python download_models.py yolox_s_inf_decoder
+cd ..
+export OPENCV_TEST_DATA_PATH=$(pwd)
+cd <build directory of OpenCV>
+./bin/example_dnn_yolo_detector
+@endcode
 
-Use a universal sample for object detection models written
-[in C++](https://github.com/opencv/opencv/blob/5.x/samples/dnn/object_detection.cpp) and
-[in Python](https://github.com/opencv/opencv/blob/5.x/samples/dnn/object_detection.py) languages
+This will execute the YOLOX detector with your camera. For YOLOv8 (for instance), follow these additional steps:
 
-Usage examples
---------------
+@code{.sh}
+cd opencv_extra/testdata/dnn
+python download_models.py yolov8
+cd ..
+export OPENCV_TEST_DATA_PATH=$(pwd)
+cd <build directory of OpenCV>
 
-Execute in webcam:
+./bin/example_dnn_yolo_detector --model=onnx/models/yolov8n.onnx --yolo=yolov8 --mean=0.0 --scale=0.003921568627 --paddingmode=2 --padvalue=144.0 --thr=0.5 --nms=0.4 --rgb=0
+@endcode
 
-@code{.bash}
 
-$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --rgb
+### Building a Custom Pipeline
 
-@endcode
+Sometimes there is a need to make some custom adjustments in the inference pipeline. With OpenCV DNN
+module this is also quite easy to achieve. Below we will outline the sample implementation details:
 
-Execute with image or video file:
+- Import required libraries
 
-@code{.bash}
+@snippet samples/dnn/yolo_detector.cpp includes
 
-$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --input=[PATH-TO-IMAGE-OR-VIDEO-FILE] --rgb
+- Read ONNX graph and create neural network model:
 
-@endcode
+@snippet samples/dnn/yolo_detector.cpp read_net
+
+- Read image and pre-process it:
+
+@snippet samples/dnn/yolo_detector.cpp preprocess_params
+@snippet samples/dnn/yolo_detector.cpp preprocess_call
+@snippet samples/dnn/yolo_detector.cpp preprocess_call_func
+
+- Inference:
+
+@snippet samples/dnn/yolo_detector.cpp forward_buffers
+@snippet samples/dnn/yolo_detector.cpp forward
+
+- Post-Processing
+
+All post-processing steps are implemented in function `yoloPostProcess`. Please pay attention,
+that NMS step is not included into onnx graph. Sample uses OpenCV function for it.
+
+@snippet samples/dnn/yolo_detector.cpp postprocess
+
+- Draw predicted boxes
 
-Questions and suggestions email to: Alessandro de Oliveira Faria [email protected] or OpenCV Team.
+@snippet samples/dnn/yolo_detector.cpp draw_boxes
diff --git a/modules/calib/src/calibinit.cpp b/modules/calib/src/calibinit.cpp
@@ -156,7 +156,8 @@ struct ChessBoardQuad
     float edge_len; // quad edge len, in pix^2
     // neighbors and corners are synced, i.e., neighbor 0 shares corner 0
     ChessBoardCorner *corners[4]; // Coordinates of quad corners
-    struct ChessBoardQuad *neighbors[4]; // Pointers of quad neighbors
+    struct ChessBoardQuad *neighbors[4]; // Pointers of quad neighbors. M.b. sparse.
+    // Each neighbors element corresponds to quad corner, but not just sequential index.
 
     ChessBoardQuad(int group_idx_ = -1) :
         count(0),
@@ -1701,12 +1702,12 @@ void ChessBoardDetector::findQuadNeighbors()
                     continue;
 
                 // Check that each corner is a neighbor of different quads
-                for(j = 0; j < closest_quad->count; j++ )
+                for(j = 0; j < 4; j++ )
                 {
                     if (closest_quad->neighbors[j] == &cur_quad)
                         break;
                 }
-                if (j < closest_quad->count)
+                if (j < 4)
                     continue;
 
                 // check whether the closest corner to closest_corner

diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
@@ -556,6 +556,8 @@ The format of half precision floating point is defined in IEEE 754-2008.
 
 @param src input array.
 @param dst output array.
+
+@deprecated Use Mat::convertTo with CV_16F instead.
 */
 CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst);
 

diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
@@ -271,11 +271,11 @@ enum BorderTypes {
     BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
     BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
     BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
-    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
+    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno` - Treats outliers as transparent.
 
     BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
     BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
-    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
+    BORDER_ISOLATED    = 16 //!< Interpolation restricted within the ROI boundaries.
 };
 
 //! @} core_array

diff --git a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp
@@ -860,6 +860,7 @@ class v_lsx_palignr_u8_class<imm, false, true, false, false, false>
 public:
     inline __m128i operator()(const __m128i& a, const __m128i& b) const
     {
+        CV_UNUSED(b);
         return a;
     }
 };
@@ -880,6 +881,7 @@ class v_lsx_palignr_u8_class<imm, false, false, false, true, false>
 public:
     inline __m128i operator()(const __m128i& a, const __m128i& b) const
     {
+        CV_UNUSED(a);
         return b;
     }
 };