diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp index 31182a029a84..2e07b7f52669 100644 --- a/3rdparty/carotene/hal/tegra_hal.hpp +++ b/3rdparty/carotene/hal/tegra_hal.hpp @@ -1962,4 +1962,20 @@ inline int TEGRA_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_ste #define cv_hal_LKOpticalFlowLevel TEGRA_LKOpticalFlowLevel #endif // __ARM_ARCH=7 +#if 0 // OpenCV provides fater parallel implementation +inline int TEGRA_ScharrDeriv(const uchar* src_data, size_t src_step, + short* dst_data, size_t dst_step, + int width, int height, int cn) +{ + if (!CAROTENE_NS::isSupportedConfiguration()) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + CAROTENE_NS::ScharrDeriv(CAROTENE_NS::Size2D(width, height), cn, src_data, src_step, dst_data, dst_step); + return CV_HAL_ERROR_OK; +} + +#undef cv_hal_ScharrDeriv +#define cv_hal_ScharrDeriv TEGRA_ScharrDeriv +#endif + #endif diff --git a/3rdparty/hal_rvv/hal_rvv.hpp b/3rdparty/hal_rvv/hal_rvv.hpp index e10a3258e90f..4765a15518a5 100644 --- a/3rdparty/hal_rvv/hal_rvv.hpp +++ b/3rdparty/hal_rvv/hal_rvv.hpp @@ -19,4 +19,8 @@ #include "version/hal_rvv_071.hpp" #endif -#endif \ No newline at end of file +#if defined(__riscv_v) && __riscv_v == 1000000 +#include "hal_rvv_1p0/merge.hpp" // core +#endif + +#endif diff --git a/3rdparty/hal_rvv/hal_rvv_1p0/merge.hpp b/3rdparty/hal_rvv/hal_rvv_1p0/merge.hpp new file mode 100644 index 000000000000..5278680eaaf5 --- /dev/null +++ b/3rdparty/hal_rvv/hal_rvv_1p0/merge.hpp @@ -0,0 +1,363 @@ +#ifndef OPENCV_HAL_RVV_MERGE_HPP_INCLUDED +#define OPENCV_HAL_RVV_MERGE_HPP_INCLUDED + +#include + +namespace cv { namespace cv_hal_rvv { + +#undef cv_hal_merge8u +#define cv_hal_merge8u cv::cv_hal_rvv::merge8u +#undef cv_hal_merge16u +#define cv_hal_merge16u cv::cv_hal_rvv::merge16u +#undef cv_hal_merge32s +#define cv_hal_merge32s cv::cv_hal_rvv::merge32s +#undef cv_hal_merge64s +#define cv_hal_merge64s cv::cv_hal_rvv::merge64s + +#if defined __GNUC__ +__attribute__((optimize("no-tree-vectorize"))) +#endif +static int merge8u(const uchar** src, uchar* dst, int len, int cn ) { + int k = cn % 4 ? cn % 4 : 4; + int i = 0, j; + int vl = __riscv_vsetvlmax_e8m1(); + if( k == 1 ) + { + const uchar* src0 = src[0]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle8_v_u8m1(src0 + i, vl); + __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*2, a, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++) + dst[i*cn] = src0[i]; + } + else if( k == 2 ) + { + const uchar *src0 = src[0], *src1 = src[1]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle8_v_u8m1(src0 + i, vl); + auto b = __riscv_vle8_v_u8m1(src1 + i, vl); + __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*2, a, vl); + __riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*2, b, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++ ) + { + dst[i*cn] = src0[i]; + dst[i*cn+1] = src1[i]; + } + } + else if( k == 3 ) + { + const uchar *src0 = src[0], *src1 = src[1], *src2 = src[2]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle8_v_u8m1(src0 + i, vl); + auto b = __riscv_vle8_v_u8m1(src1 + i, vl); + auto c = __riscv_vle8_v_u8m1(src2 + i, vl); + __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*3, a, vl); + __riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*3, b, vl); + __riscv_vsse8_v_u8m1(dst + i*cn + 2, sizeof(uchar)*3, c, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++ ) + { + dst[i*cn] = src0[i]; + dst[i*cn+1] = src1[i]; + dst[i*cn+2] = src2[i]; + } + } + else + { + const uchar *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle8_v_u8m1(src0 + i, vl); + auto b = __riscv_vle8_v_u8m1(src1 + i, vl); + auto c = __riscv_vle8_v_u8m1(src2 + i, vl); + auto d = __riscv_vle8_v_u8m1(src3 + i, vl); + __riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*4, a, vl); + __riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*4, b, vl); + __riscv_vsse8_v_u8m1(dst + i*cn + 2, sizeof(uchar)*4, c, vl); + __riscv_vsse8_v_u8m1(dst + i*cn + 3, sizeof(uchar)*4, d, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++ ) + { + dst[i*cn] = src0[i]; + dst[i*cn+1] = src1[i]; + dst[i*cn+2] = src2[i]; + dst[i*cn+3] = src3[i]; + } + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; k < cn; k += 4 ) + { + const uchar *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3]; + for( i = 0, j = k; i < len; i++, j += cn ) + { + dst[j] = src0[i]; dst[j+1] = src1[i]; + dst[j+2] = src2[i]; dst[j+3] = src3[i]; + } + } + return CV_HAL_ERROR_OK; +} + +#if defined __GNUC__ +__attribute__((optimize("no-tree-vectorize"))) +#endif +static int merge16u(const ushort** src, ushort* dst, int len, int cn ) { + int k = cn % 4 ? cn % 4 : 4; + int i = 0, j; + int vl = __riscv_vsetvlmax_e16m1(); + if( k == 1 ) + { + const ushort* src0 = src[0]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle16_v_u16m1(src0 + i, vl); + __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*2, a, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++) + dst[i*cn] = src0[i]; + } + else if( k == 2 ) + { + const ushort *src0 = src[0], *src1 = src[1]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle16_v_u16m1(src0 + i, vl); + auto b = __riscv_vle16_v_u16m1(src1 + i, vl); + __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*2, a, vl); + __riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*2, b, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++ ) + { + dst[i*cn] = src0[i]; + dst[i*cn+1] = src1[i]; + } + } + else if( k == 3 ) + { + const ushort *src0 = src[0], *src1 = src[1], *src2 = src[2]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle16_v_u16m1(src0 + i, vl); + auto b = __riscv_vle16_v_u16m1(src1 + i, vl); + auto c = __riscv_vle16_v_u16m1(src2 + i, vl); + __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*3, a, vl); + __riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*3, b, vl); + __riscv_vsse16_v_u16m1(dst + i*cn + 2, sizeof(ushort)*3, c, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++ ) + { + dst[i*cn] = src0[i]; + dst[i*cn+1] = src1[i]; + dst[i*cn+2] = src2[i]; + } + } + else + { + const ushort *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3]; + for( ; i <= len - vl; i += vl) + { + auto a = __riscv_vle16_v_u16m1(src0 + i, vl); + auto b = __riscv_vle16_v_u16m1(src1 + i, vl); + auto c = __riscv_vle16_v_u16m1(src2 + i, vl); + auto d = __riscv_vle16_v_u16m1(src3 + i, vl); + __riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*4, a, vl); + __riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*4, b, vl); + __riscv_vsse16_v_u16m1(dst + i*cn + 2, sizeof(ushort)*4, c, vl); + __riscv_vsse16_v_u16m1(dst + i*cn + 3, sizeof(ushort)*4, d, vl); + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++ ) + { + dst[i*cn] = src0[i]; + dst[i*cn+1] = src1[i]; + dst[i*cn+2] = src2[i]; + dst[i*cn+3] = src3[i]; + } + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; k < cn; k += 4 ) + { + const uint16_t *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3]; + for( i = 0, j = k; i < len; i++, j += cn ) + { + dst[j] = src0[i]; dst[j+1] = src1[i]; + dst[j+2] = src2[i]; dst[j+3] = src3[i]; + } + } + return CV_HAL_ERROR_OK; +} + +#if defined __GNUC__ +__attribute__((optimize("no-tree-vectorize"))) +#endif +static int merge32s(const int** src, int* dst, int len, int cn ) { + int k = cn % 4 ? cn % 4 : 4; + int i, j; + if( k == 1 ) + { + const int* src0 = src[0]; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( i = j = 0; i < len; i++, j += cn ) + dst[j] = src0[i]; + } + else if( k == 2 ) + { + const int *src0 = src[0], *src1 = src[1]; + i = j = 0; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++, j += cn ) + { + dst[j] = src0[i]; + dst[j+1] = src1[i]; + } + } + else if( k == 3 ) + { + const int *src0 = src[0], *src1 = src[1], *src2 = src[2]; + i = j = 0; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++, j += cn ) + { + dst[j] = src0[i]; + dst[j+1] = src1[i]; + dst[j+2] = src2[i]; + } + } + else + { + const int *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3]; + i = j = 0; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++, j += cn ) + { + dst[j] = src0[i]; dst[j+1] = src1[i]; + dst[j+2] = src2[i]; dst[j+3] = src3[i]; + } + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; k < cn; k += 4 ) + { + const int *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3]; + for( i = 0, j = k; i < len; i++, j += cn ) + { + dst[j] = src0[i]; dst[j+1] = src1[i]; + dst[j+2] = src2[i]; dst[j+3] = src3[i]; + } + } + return CV_HAL_ERROR_OK; +} + +#if defined __GNUC__ +__attribute__((optimize("no-tree-vectorize"))) +#endif +static int merge64s(const int64** src, int64* dst, int len, int cn ) { + int k = cn % 4 ? cn % 4 : 4; + int i, j; + if( k == 1 ) + { + const int64* src0 = src[0]; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( i = j = 0; i < len; i++, j += cn ) + dst[j] = src0[i]; + } + else if( k == 2 ) + { + const int64 *src0 = src[0], *src1 = src[1]; + i = j = 0; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++, j += cn ) + { + dst[j] = src0[i]; + dst[j+1] = src1[i]; + } + } + else if( k == 3 ) + { + const int64 *src0 = src[0], *src1 = src[1], *src2 = src[2]; + i = j = 0; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++, j += cn ) + { + dst[j] = src0[i]; + dst[j+1] = src1[i]; + dst[j+2] = src2[i]; + } + } + else + { + const int64 *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3]; + i = j = 0; + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; i < len; i++, j += cn ) + { + dst[j] = src0[i]; dst[j+1] = src1[i]; + dst[j+2] = src2[i]; dst[j+3] = src3[i]; + } + } + #if defined(__clang__) + #pragma clang loop vectorize(disable) + #endif + for( ; k < cn; k += 4 ) + { + const int64 *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3]; + for( i = 0, j = k; i < len; i++, j += cn ) + { + dst[j] = src0[i]; dst[j+1] = src1[i]; + dst[j+2] = src2[i]; dst[j+3] = src3[i]; + } + } + return CV_HAL_ERROR_OK; +} + +}} + +#endif diff --git a/3rdparty/kleidicv/CMakeLists.txt b/3rdparty/kleidicv/CMakeLists.txt index 26e485441603..a7f7c1a37c94 100644 --- a/3rdparty/kleidicv/CMakeLists.txt +++ b/3rdparty/kleidicv/CMakeLists.txt @@ -1,8 +1,8 @@ project(kleidicv_hal) set(KLEIDICV_SOURCE_PATH "" CACHE PATH "Directory containing KleidiCV sources") -ocv_update(KLEIDICV_SRC_COMMIT "0.1.0") -ocv_update(KLEIDICV_SRC_HASH "9388f28cf2fbe3338197b2b57d491468") +ocv_update(KLEIDICV_SRC_COMMIT "0.2.0") +ocv_update(KLEIDICV_SRC_HASH "dabe522e8f55ac342d07a787391dab80") if(KLEIDICV_SOURCE_PATH) set(THE_ROOT "${KLEIDICV_SOURCE_PATH}") diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bc9cbe03824..9e983bad79b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,10 @@ if(POLICY CMP0056) cmake_policy(SET CMP0056 NEW) # try_compile(): link flags endif() +if(POLICY CMP0057) + cmake_policy(SET CMP0057 NEW) # CMake 3.3: if(IN_LIST) support +endif() + if(POLICY CMP0066) cmake_policy(SET CMP0066 NEW) # CMake 3.7: try_compile(): use per-config flags, like CMAKE_CXX_FLAGS_RELEASE endif() @@ -217,7 +221,7 @@ OCV_OPTION(WITH_1394 "Include IEEE1394 support" OFF OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O (iOS/visionOS/Mac)" ON VISIBLE_IF APPLE VERIFY HAVE_AVFOUNDATION) -OCV_OPTION(WITH_AVIF "Enable AVIF support" OFF +OCV_OPTION(WITH_AVIF "Enable AVIF support" ON VERIFY HAVE_AVIF) OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON VISIBLE_IF IOS diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 5344b1597469..f94235038f43 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -353,23 +353,23 @@ function(ocv_target_include_directories target) #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})") _ocv_fix_target(target) set(__params "") - if(CV_GCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND - ";${ARGN};" MATCHES "/usr/include;") - return() # workaround for GCC 6.x bug - endif() - set(__params "") set(__system_params "") set(__var_name __params) foreach(dir ${ARGN}) if("${dir}" STREQUAL "SYSTEM") set(__var_name __system_params) else() - get_filename_component(__abs_dir "${dir}" ABSOLUTE) - ocv_is_opencv_directory(__is_opencv_dir "${dir}") - if(__is_opencv_dir) - list(APPEND ${__var_name} "${__abs_dir}") + if(CV_GCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND + dir MATCHES "/usr/include$") + # workaround for GCC 6.x bug else() - list(APPEND ${__var_name} "${dir}") + get_filename_component(__abs_dir "${dir}" ABSOLUTE) + ocv_is_opencv_directory(__is_opencv_dir "${dir}") + if(__is_opencv_dir) + list(APPEND ${__var_name} "${__abs_dir}") + else() + list(APPEND ${__var_name} "${dir}") + endif() endif() endif() endforeach() diff --git a/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown b/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown index 30ed9185762c..03e152f3866b 100644 --- a/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown +++ b/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown @@ -4,9 +4,9 @@ Arithmetic Operations on Images {#tutorial_js_image_arithmetics} Goal ---- -- Learn several arithmetic operations on images like addition, subtraction, bitwise operations +- Learn several arithmetic operations on images like addition, subtraction, bitwise operations, etc. -- You will learn these functions : **cv.add()**, **cv.subtract()** etc. +- You will learn these functions : **cv.add()**, **cv.subtract()**, etc. Image Addition -------------- diff --git a/doc/tutorial-utils.js b/doc/tutorial-utils.js index 3f08f1d2c1ff..9703e227795e 100644 --- a/doc/tutorial-utils.js +++ b/doc/tutorial-utils.js @@ -72,12 +72,23 @@ function buttonsToAdd($elements, $heading, $type) { } function addTutorialsButtons() { - $("h1").each(function() { - var $elements = $(this).nextUntil("h1") + // See https://github.com/opencv/opencv/issues/26339 + $lastHeader = undefined + $("h1,h2,h3,div.newInnerHTML").each(function() { + if( this.tagName.startsWith("H") ) { + $lastHeader = $(this) + return true // loop-continue + } + if( $lastHeader === undefined ) { + return true // loop-continue + } + var $toggleHeader = $lastHeader.tagName + var $elements = $lastHeader.nextUntil($toggleHeader) var $lower = $elements.find("div.newInnerHTML") $elements = $elements.add($lower) $elements = $elements.filter("div.newInnerHTML") - buttonsToAdd($elements, $(this), "h1") + buttonsToAdd($elements, $lastHeader, $toggleHeader) + $lastHeader = undefined }); $(".toggleable_button").first().click(); var $clickDefault = $('.toggleable_button.label_python').first(); diff --git a/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown b/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown index acd66bdeecf9..197292808f81 100644 --- a/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown +++ b/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown @@ -1,4 +1,4 @@ -File Input and Output using XML and YAML files {#tutorial_file_input_output_with_xml_yml} +File Input and Output using XML / YAML / JSON files {#tutorial_file_input_output_with_xml_yml} ============================================== @tableofcontents @@ -14,12 +14,12 @@ File Input and Output using XML and YAML files {#tutorial_file_input_output_with Goal ---- -You'll find answers for the following questions: +You'll find answers to the following questions: -- How to print and read text entries to a file and OpenCV using YAML or XML files? -- How to do the same for OpenCV data structures? -- How to do this for your data structures? -- Usage of OpenCV data structures such as @ref cv::FileStorage , @ref cv::FileNode or @ref +- How do you print and read text entries to a file in OpenCV using YAML, XML, or JSON files? +- How can you perform the same operations for OpenCV data structures? +- How can this be done for your custom data structures? +- How do you use OpenCV data structures, such as @ref cv::FileStorage , @ref cv::FileNode or @ref cv::FileNodeIterator . Source code @@ -49,14 +49,14 @@ Here's a sample code of how to achieve all the stuff enumerated at the goal list Explanation ----------- -Here we talk only about XML and YAML file inputs. Your output (and its respective input) file may +Here we talk only about XML, YAML and JSON file inputs. Your output (and its respective input) file may have only one of these extensions and the structure coming from this. They are two kinds of data structures you may serialize: *mappings* (like the STL map and the Python dictionary) and *element sequence* (like the STL vector). The difference between these is that in a map every element has a unique name through what you may access it. For sequences you need to go through them to query a specific item. --# **XML/YAML File Open and Close.** Before you write any content to such file you need to open it - and at the end to close it. The XML/YAML data structure in OpenCV is @ref cv::FileStorage . To +-# **XML/YAML/JSON File Open and Close.** Before you write any content to such file you need to open it + and at the end to close it. The XML/YAML/JSON data structure in OpenCV is @ref cv::FileStorage . To specify that this structure to which file binds on your hard drive you can use either its constructor or the *open()* function of this: @add_toggle_cpp diff --git a/modules/3d/src/sqpnp.cpp b/modules/3d/src/sqpnp.cpp index ef5d2470ad0e..b66998e68fa4 100644 --- a/modules/3d/src/sqpnp.cpp +++ b/modules/3d/src/sqpnp.cpp @@ -1,3 +1,10 @@ +// Implementation of SQPnP as described in the paper: +// +// "A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem" by G. Terzakis and M. Lourakis +// a) Paper: https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123460460.pdf +// b) Supplementary: https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123460460-supp.pdf + + // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html @@ -39,6 +46,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "precomp.hpp" #include "sqpnp.hpp" +#ifdef HAVE_EIGEN +#include +#endif + namespace cv { namespace sqpnp { @@ -52,8 +63,8 @@ const double PoseSolver::POINT_VARIANCE_THRESHOLD = 1e-5; const double PoseSolver::SQRT3 = std::sqrt(3); const int PoseSolver::SQP_MAX_ITERATION = 15; -//No checking done here for overflow, since this is not public all call instances -//are assumed to be valid +// No checking done here for overflow, since this is not public all call instances +// are assumed to be valid template void set(int row, int col, cv::Matx& dest, @@ -78,7 +89,7 @@ PoseSolver::PoseSolver() void PoseSolver::solve(InputArray objectPoints, InputArray imagePoints, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs) { - //Input checking + // Input checking int objType = objectPoints.getMat().type(); CV_CheckType(objType, objType == CV_32FC3 || objType == CV_64FC3, "Type of objectPoints must be CV_32FC3 or CV_64FC3"); @@ -158,12 +169,12 @@ void PoseSolver::computeOmega(InputArray objectPoints, InputArray imagePoints) sum_img += img_pt; sum_obj += obj_pt; - const double& x = img_pt.x, & y = img_pt.y; - const double& X = obj_pt.x, & Y = obj_pt.y, & Z = obj_pt.z; + const double x = img_pt.x, y = img_pt.y; + const double X = obj_pt.x, Y = obj_pt.y, Z = obj_pt.z; double sq_norm = x * x + y * y; sq_norm_sum += sq_norm; - double X2 = X * X, + const double X2 = X * X, XY = X * Y, XZ = X * Z, Y2 = Y * Y, @@ -178,47 +189,47 @@ void PoseSolver::computeOmega(InputArray objectPoints, InputArray imagePoints) omega_(2, 2) += Z2; - //Populating this manually saves operations by only calculating upper triangle - omega_(0, 6) += -x * X2; omega_(0, 7) += -x * XY; omega_(0, 8) += -x * XZ; - omega_(1, 7) += -x * Y2; omega_(1, 8) += -x * YZ; - omega_(2, 8) += -x * Z2; + // Populating this manually saves operations by only calculating upper triangle + omega_(0, 6) -= x * X2; omega_(0, 7) -= x * XY; omega_(0, 8) -= x * XZ; + omega_(1, 7) -= x * Y2; omega_(1, 8) -= x * YZ; + omega_(2, 8) -= x * Z2; - omega_(3, 6) += -y * X2; omega_(3, 7) += -y * XY; omega_(3, 8) += -y * XZ; - omega_(4, 7) += -y * Y2; omega_(4, 8) += -y * YZ; - omega_(5, 8) += -y * Z2; + omega_(3, 6) -= y * X2; omega_(3, 7) -= y * XY; omega_(3, 8) -= y * XZ; + omega_(4, 7) -= y * Y2; omega_(4, 8) -= y * YZ; + omega_(5, 8) -= y * Z2; omega_(6, 6) += sq_norm * X2; omega_(6, 7) += sq_norm * XY; omega_(6, 8) += sq_norm * XZ; omega_(7, 7) += sq_norm * Y2; omega_(7, 8) += sq_norm * YZ; omega_(8, 8) += sq_norm * Z2; - //Compute qa_sum. Certain pairs of elements are equal, so filling them outside the loop saves some operations + // Compute qa_sum. Certain pairs of elements are equal, so filling them outside the loop saves some operations qa_sum(0, 0) += X; qa_sum(0, 1) += Y; qa_sum(0, 2) += Z; - qa_sum(0, 6) += -x * X; qa_sum(0, 7) += -x * Y; qa_sum(0, 8) += -x * Z; - qa_sum(1, 6) += -y * X; qa_sum(1, 7) += -y * Y; qa_sum(1, 8) += -y * Z; + qa_sum(0, 6) -= x * X; qa_sum(0, 7) -= x * Y; qa_sum(0, 8) -= x * Z; + qa_sum(1, 6) -= y * X; qa_sum(1, 7) -= y * Y; qa_sum(1, 8) -= y * Z; qa_sum(2, 6) += sq_norm * X; qa_sum(2, 7) += sq_norm * Y; qa_sum(2, 8) += sq_norm * Z; } - //Complete qa_sum + // Complete qa_sum qa_sum(1, 3) = qa_sum(0, 0); qa_sum(1, 4) = qa_sum(0, 1); qa_sum(1, 5) = qa_sum(0, 2); qa_sum(2, 0) = qa_sum(0, 6); qa_sum(2, 1) = qa_sum(0, 7); qa_sum(2, 2) = qa_sum(0, 8); qa_sum(2, 3) = qa_sum(1, 6); qa_sum(2, 4) = qa_sum(1, 7); qa_sum(2, 5) = qa_sum(1, 8); - //lower triangles of omega_'s off-diagonal blocks (0:2, 6:8), (3:5, 6:8) and (6:8, 6:8) + // lower triangles of omega_'s off-diagonal blocks (0:2, 6:8), (3:5, 6:8) and (6:8, 6:8) omega_(1, 6) = omega_(0, 7); omega_(2, 6) = omega_(0, 8); omega_(2, 7) = omega_(1, 8); omega_(4, 6) = omega_(3, 7); omega_(5, 6) = omega_(3, 8); omega_(5, 7) = omega_(4, 8); omega_(7, 6) = omega_(6, 7); omega_(8, 6) = omega_(6, 8); omega_(8, 7) = omega_(7, 8); - //upper triangle of omega_'s block (3:5, 3:5) + // upper triangle of omega_'s block (3:5, 3:5) omega_(3, 3) = omega_(0, 0); omega_(3, 4) = omega_(0, 1); omega_(3, 5) = omega_(0, 2); omega_(4, 4) = omega_(1, 1); omega_(4, 5) = omega_(1, 2); omega_(5, 5) = omega_(2, 2); - //Mirror omega_'s upper triangle to lower triangle - //Note that elements (7, 6), (8, 6) & (8, 7) have already been assigned above + // Mirror omega_'s upper triangle to lower triangle + // Note that elements (7, 6), (8, 6) & (8, 7) have already been assigned above omega_(1, 0) = omega_(0, 1); omega_(2, 0) = omega_(0, 2); omega_(2, 1) = omega_(1, 2); omega_(3, 0) = omega_(0, 3); omega_(3, 1) = omega_(1, 3); omega_(3, 2) = omega_(2, 3); @@ -240,12 +251,26 @@ void PoseSolver::computeOmega(InputArray objectPoints, InputArray imagePoints) CV_Assert(point_coordinate_variance >= POINT_VARIANCE_THRESHOLD); Matx q_inv; - analyticalInverse3x3Symm(q, q_inv); + if (!invertSPD3x3(q, q_inv)) analyticalInverse3x3Symm(q, q_inv); p_ = -q_inv * qa_sum; omega_ += qa_sum.t() * p_; +#ifdef HAVE_EIGEN + // Rank revealing QR nullspace computation with full pivoting. + // This is slightly less accurate compared to SVD but x2-x3 faster + Eigen::Matrix omega_eig, tmp_eig; + cv::cv2eigen(omega_, omega_eig); + Eigen::FullPivHouseholderQR > rrqr(omega_eig); + tmp_eig = rrqr.matrixQ(); + cv::eigen2cv(tmp_eig, u_); + + tmp_eig = rrqr.matrixQR().template triangularView(); // R + Eigen::Matrix S_eig = tmp_eig.diagonal().array().abs(); + cv::eigen2cv(S_eig, s_); +#else + // Use OpenCV's SVD cv::SVD omega_svd(omega_, cv::SVD::FULL_UV); s_ = omega_svd.w; u_ = cv::Mat(omega_svd.vt.t()); @@ -255,6 +280,8 @@ void PoseSolver::computeOmega(InputArray objectPoints, InputArray imagePoints) u_ = u_.t(); // eigenvectors were returned as rows #endif +#endif // HAVE_EIGEN + CV_Assert(s_(0) >= 1e-7); while (s_(7 - num_null_vectors_) < RANK_TOLERANCE) num_null_vectors_++; @@ -276,7 +303,7 @@ void PoseSolver::solveInternal(InputArray objectPoints) SQPSolution solutions[2]; - //If e is orthogonal, we can skip SQP + // If e is orthogonal, we can skip SQP if (orthogonality_sq_err < ORTHOGONALITY_SQUARED_ERROR_THRESHOLD) { solutions[0].r_hat = det3x3(e) * e; @@ -393,6 +420,77 @@ void PoseSolver::solveSQPSystem(const cv::Matx& r, cv::Matx& A, cv::Matx& A1) +{ + double L[3*3], D[3], v[2], x[3]; + + v[0]=D[0]=A(0, 0); + if(v[0]<=1E-10) return false; + v[1]=1.0/v[0]; + L[3]=A(1, 0)*v[1]; + L[6]=A(2, 0)*v[1]; + //L[0]=1.0; + //L[1]=L[2]=0.0; + + v[0]=L[3]*D[0]; + v[1]=D[1]=A(1, 1)-L[3]*v[0]; + if(v[1]<=1E-10) return false; + L[7]=(A(2, 1)-L[6]*v[0])/v[1]; + //L[4]=1.0; + //L[5]=0.0; + + v[0]=L[6]*D[0]; + v[1]=L[7]*D[1]; + D[2]=A(2, 2)-L[6]*v[0]-L[7]*v[1]; + if(D[2]<=1E-10) return false; + //L[8]=1.0; + + D[0]=1.0/D[0]; + D[1]=1.0/D[1]; + D[2]=1.0/D[2]; + + /* Forward solve Lx = e0 */ + //x[0]=1.0; + x[1]=-L[3]; + x[2]=-L[6]+L[7]*L[3]; + + /* Backward solve D*L'x = y */ + A1(0, 2)=x[2]=x[2]*D[2]; + A1(0, 1)=x[1]=x[1]*D[1]-L[7]*x[2]; + A1(0, 0) = D[0]-L[3]*x[1]-L[6]*x[2]; + + /* Forward solve Lx = e1 */ + //x[0]=0.0; + //x[1]=1.0; + x[2]=-L[7]; + + /* Backward solve D*L'x = y */ + A1(1, 2)=x[2]=x[2]*D[2]; + A1(1, 1)=x[1]= D[1]-L[7]*x[2]; + A1(1, 0) = -L[3]*x[1]-L[6]*x[2]; + + /* Forward solve Lx = e2 */ + //x[0]=0.0; + //x[1]=0.0; + //x[2]=1.0; + + /* Backward solve D*L'x = y */ + A1(2, 2)=x[2]=D[2]; + A1(2, 1)=x[1]= -L[7]*x[2]; + A1(2, 0) = -L[3]*x[1]-L[6]*x[2]; + + return true; +} + bool PoseSolver::analyticalInverse3x3Symm(const cv::Matx& Q, cv::Matx& Qinv, const double& threshold) @@ -411,7 +509,7 @@ bool PoseSolver::analyticalInverse3x3Symm(const cv::Matx& Q, t12 = c * c; double det = -t4 * f + a * t2 + t7 * f - 2.0 * t9 * e + t12 * d; - if (fabs(det) < threshold) return false; + if (fabs(det) < threshold) { cv::invert(Q, Qinv, cv::DECOMP_SVD); return false; } // fall back to pseudoinverse // 3. Inverse double t15, t20, t24, t30; @@ -502,7 +600,7 @@ void PoseSolver::computeRowAndNullspace(const cv::Matx& r, H(6, 4) = r(3) - dot_j5q3 * H(6, 2); H(7, 4) = r(4) - dot_j5q3 * H(7, 2); H(8, 4) = r(5) - dot_j5q3 * H(8, 2); Matx q4 = H.col(4); - q4 /= cv::norm(q4); + q4 *= (1.0 / cv::norm(q4)); set(0, 4, H, q4); K(4, 0) = 0; @@ -531,7 +629,7 @@ void PoseSolver::computeRowAndNullspace(const cv::Matx& r, H(8, 5) = r(2) - dot_j6q3 * H(8, 2) - dot_j6q5 * H(8, 4); Matx q5 = H.col(5); - q5 /= cv::norm(q5); + q5 *= (1.0 / cv::norm(q5)); set(0, 5, H, q5); K(5, 0) = r(6) * H(0, 0) + r(7) * H(1, 0) + r(8) * H(2, 0); @@ -573,10 +671,11 @@ void PoseSolver::computeRowAndNullspace(const cv::Matx& r, Matx v1 = Pn.col(index1); v1 /= max_norm1; set(0, 0, N, v1); + col_norms[index1] = -1.0; // mark to avoid use in subsequent loops for (int i = 0; i < 9; i++) { - if (i == index1) continue; + //if (i == index1) continue; if (col_norms[i] >= norm_threshold) { double cos_v1_x_col = fabs(Pn.col(i).dot(v1) / col_norms[i]); @@ -592,16 +691,18 @@ void PoseSolver::computeRowAndNullspace(const cv::Matx& r, Matx v2 = Pn.col(index2); Matx n0 = N.col(0); v2 -= v2.dot(n0) * n0; - v2 /= cv::norm(v2); + v2 *= (1.0 / cv::norm(v2)); set(0, 1, N, v2); + col_norms[index2] = -1.0; // mark to avoid use in subsequent loops for (int i = 0; i < 9; i++) { - if (i == index2 || i == index1) continue; + //if (i == index2 || i == index1) continue; if (col_norms[i] >= norm_threshold) { - double cos_v1_x_col = fabs(Pn.col(i).dot(v1) / col_norms[i]); - double cos_v2_x_col = fabs(Pn.col(i).dot(v2) / col_norms[i]); + double inv_norm = 1.0 / col_norms[i]; + double cos_v1_x_col = fabs(Pn.col(i).dot(v1) * inv_norm); + double cos_v2_x_col = fabs(Pn.col(i).dot(v2) * inv_norm); if (cos_v1_x_col + cos_v2_x_col <= min_dot1323) { @@ -614,7 +715,7 @@ void PoseSolver::computeRowAndNullspace(const cv::Matx& r, Matx v3 = Pn.col(index3); Matx n1 = N.col(1); v3 -= (v3.dot(n1)) * n1 - (v3.dot(n0)) * n0; - v3 /= cv::norm(v3); + v3 *= (1.0 / cv::norm(v3)); set(0, 2, N, v3); } @@ -635,17 +736,17 @@ void PoseSolver::nearestRotationMatrixSVD(const cv::Matx& e, // Faster nearest rotation computation based on FOAM. See M. Lourakis: "An Efficient Solution to Absolute Orientation", ICPR 2016 // and M. Lourakis, G. Terzakis: "Efficient Absolute Orientation Revisited", IROS 2018. /* Solve the nearest orthogonal approximation problem - * i.e., given e, find R minimizing ||R-e||_F - * - * The computation borrows from Markley's FOAM algorithm - * "Attitude Determination Using Vector Observations: A Fast Optimal Matrix Algorithm", J. Astronaut. Sci. 1993. - * - * See also M. Lourakis: "An Efficient Solution to Absolute Orientation", ICPR 2016 - * - * Copyright (C) 2019 Manolis Lourakis (lourakis **at** ics forth gr) - * Institute of Computer Science, Foundation for Research & Technology - Hellas - * Heraklion, Crete, Greece. - */ + * i.e., given e, find R minimizing ||R-e||_F + * + * The computation borrows from Markley's FOAM algorithm + * "Attitude Determination Using Vector Observations: A Fast Optimal Matrix Algorithm", J. Astronaut. Sci. 1993. + * + * See also M. Lourakis: "An Efficient Solution to Absolute Orientation", ICPR 2016 + * + * Copyright (C) 2019 Manolis Lourakis (lourakis **at** ics forth gr) + * Institute of Computer Science, Foundation for Research & Technology - Hellas + * Heraklion, Crete, Greece. + */ void PoseSolver::nearestRotationMatrixFOAM(const cv::Matx& e, cv::Matx& r) { @@ -653,7 +754,7 @@ void PoseSolver::nearestRotationMatrixFOAM(const cv::Matx& e, double l, lprev, det_e, e_sq, adj_e_sq, adj_e[9]; // det(e) - det_e = e(0) * e(4) * e(8) - e(0) * e(5) * e(7) - e(1) * e(3) * e(8) + e(2) * e(3) * e(7) + e(1) * e(6) * e(5) - e(2) * e(6) * e(4); + det_e = ( e(0) * e(4) * e(8) - e(0) * e(5) * e(7) - e(1) * e(3) * e(8) ) + ( e(2) * e(3) * e(7) + e(1) * e(6) * e(5) - e(2) * e(6) * e(4) ); if (fabs(det_e) < 1E-04) { // singular, handle it with SVD PoseSolver::nearestRotationMatrixSVD(e, r); return; @@ -665,8 +766,8 @@ void PoseSolver::nearestRotationMatrixFOAM(const cv::Matx& e, adj_e[6] = e(3) * e(7) - e(4) * e(6); adj_e[7] = e(1) * e(6) - e(0) * e(7); adj_e[8] = e(0) * e(4) - e(1) * e(3); // ||e||^2, ||adj(e)||^2 - e_sq = e(0) * e(0) + e(1) * e(1) + e(2) * e(2) + e(3) * e(3) + e(4) * e(4) + e(5) * e(5) + e(6) * e(6) + e(7) * e(7) + e(8) * e(8); - adj_e_sq = adj_e[0] * adj_e[0] + adj_e[1] * adj_e[1] + adj_e[2] * adj_e[2] + adj_e[3] * adj_e[3] + adj_e[4] * adj_e[4] + adj_e[5] * adj_e[5] + adj_e[6] * adj_e[6] + adj_e[7] * adj_e[7] + adj_e[8] * adj_e[8]; + e_sq = ( e(0) * e(0) + e(1) * e(1) + e(2) * e(2) ) + ( e(3) * e(3) + e(4) * e(4) + e(5) * e(5) ) + ( e(6) * e(6) + e(7) * e(7) + e(8) * e(8) ); + adj_e_sq = ( adj_e[0] * adj_e[0] + adj_e[1] * adj_e[1] + adj_e[2] * adj_e[2] ) + ( adj_e[3] * adj_e[3] + adj_e[4] * adj_e[4] + adj_e[5] * adj_e[5] ) + ( adj_e[6] * adj_e[6] + adj_e[7] * adj_e[7] + adj_e[8] * adj_e[8] ); // compute l_max with Newton-Raphson from FOAM's characteristic polynomial, i.e. eq.(23) - (26) l = 0.5*(e_sq + 3.0); // 1/2*(trace(mat(e)*mat(e)') + trace(eye(3))) @@ -733,8 +834,8 @@ void PoseSolver::nearestRotationMatrixFOAM(const cv::Matx& e, double PoseSolver::det3x3(const cv::Matx& e) { - return e(0) * e(4) * e(8) + e(1) * e(5) * e(6) + e(2) * e(3) * e(7) - - e(6) * e(4) * e(2) - e(7) * e(5) * e(0) - e(8) * e(3) * e(1); + return ( e(0) * e(4) * e(8) + e(1) * e(5) * e(6) + e(2) * e(3) * e(7) ) + - ( e(6) * e(4) * e(2) + e(7) * e(5) * e(0) + e(8) * e(3) * e(1) ); } inline bool PoseSolver::positiveDepth(const SQPSolution& solution) const @@ -815,8 +916,8 @@ double PoseSolver::orthogonalityError(const cv::Matx& e) double dot_e1e3 = e(0) * e(6) + e(1) * e(7) + e(2) * e(8); double dot_e2e3 = e(3) * e(6) + e(4) * e(7) + e(5) * e(8); - return (sq_norm_e1 - 1) * (sq_norm_e1 - 1) + (sq_norm_e2 - 1) * (sq_norm_e2 - 1) + (sq_norm_e3 - 1) * (sq_norm_e3 - 1) + - 2 * (dot_e1e2 * dot_e1e2 + dot_e1e3 * dot_e1e3 + dot_e2e3 * dot_e2e3); + return ( (sq_norm_e1 - 1) * (sq_norm_e1 - 1) + (sq_norm_e2 - 1) * (sq_norm_e2 - 1) ) + ( (sq_norm_e3 - 1) * (sq_norm_e3 - 1) + + 2 * (dot_e1e2 * dot_e1e2 + dot_e1e3 * dot_e1e3 + dot_e2e3 * dot_e2e3) ); } } diff --git a/modules/3d/src/sqpnp.hpp b/modules/3d/src/sqpnp.hpp index 078c07e906cf..ba44b8e86765 100644 --- a/modules/3d/src/sqpnp.hpp +++ b/modules/3d/src/sqpnp.hpp @@ -1,3 +1,10 @@ +// Implementation of SQPnP as described in the paper: +// +// "A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem" by G. Terzakis and M. Lourakis +// a) Paper: https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123460460.pdf +// b) Supplementary: https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123460460-supp.pdf + + // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html @@ -158,6 +165,13 @@ class PoseSolver { */ void solveSQPSystem(const cv::Matx& r, cv::Matx& delta); + /* + * @brief Inverse of SPD 3x3 A via lower triangular sqrt-free Cholesky: A = L*D*L' + * @param A The input matrix + * @param A1 The inverse + */ + static bool invertSPD3x3(const cv::Matx& A, cv::Matx& A1); + /* * @brief Analytically computes the inverse of a symmetric 3x3 matrix using the * lower triangle. diff --git a/modules/core/doc/intro.markdown b/modules/core/doc/intro.markdown index 3a0ba76cd40e..55bcbe998f79 100644 --- a/modules/core/doc/intro.markdown +++ b/modules/core/doc/intro.markdown @@ -14,6 +14,9 @@ libraries. The following modules are available: - @ref imgproc (**imgproc**) - an image processing module that includes linear and non-linear image filtering, geometrical image transformations (resize, affine and perspective warping, generic table-based remapping), color space conversion, histograms, and so on. +- @ref imgcodecs (**imgcodecs**) - includes functions for reading and writing image files in various formats. +- @ref videoio (**videoio**) - an easy-to-use interface to video capturing and video codecs. +- @ref highgui (**highgui**) - an easy-to-use interface to simple UI capabilities. - @ref video (**video**) - a video analysis module that includes motion estimation, background subtraction, and object tracking algorithms. - @ref _3d "3d" (**3d**) - basic multiple-view geometry algorithms, object pose estimation and elements of 3D reconstruction. @@ -24,6 +27,9 @@ libraries. The following modules are available: - @ref stereo (**stereo**) - stereo correspondence algorithms - @ref highgui (**highgui**) - an easy-to-use interface to simple UI capabilities. - @ref videoio (**videoio**) - an easy-to-use interface to video capturing and video codecs. +- @ref dnn (**dnn**) - Deep Neural Network module. +- @ref photo (**photo**) - advanced photo processing techniques like denoising, inpainting. +- @ref stitching (**stitching**) - functions for image stitching and panorama creation. - ... some other helper modules, such as FLANN and Google test wrappers, Python bindings, and others. diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index a9ed18c2d204..6cfdec1b5915 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -60,11 +60,16 @@ /** @defgroup core Core functionality + +The Core module is the backbone of OpenCV, offering fundamental data structures, matrix operations, +and utility functions that other modules depend on. It’s essential for handling image data, +performing mathematical computations, and managing memory efficiently within the OpenCV ecosystem. + @{ @defgroup core_basic Basic structures @defgroup core_array Operations on arrays @defgroup core_async Asynchronous API - @defgroup core_xml XML/YAML Persistence + @defgroup core_xml XML/YAML/JSON Persistence @defgroup core_cluster Clustering @defgroup core_utils Utility and system functions and macros @{ @@ -76,7 +81,6 @@ @defgroup core_utils_samples Utility functions for OpenCV samples @} @defgroup core_opengl OpenGL interoperability - @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters @defgroup core_optim Optimization Algorithms @defgroup core_directx DirectX interoperability @defgroup core_eigen Eigen support @@ -96,12 +100,13 @@ @{ @defgroup core_parallel_backend Parallel backends API @} + @defgroup core_quaternion Quaternion @} */ namespace cv { -//! @addtogroup core +//! @addtogroup core_utils //! @{ enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently @@ -115,6 +120,11 @@ enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independe //!< mutually exclusive. }; +//! @} core_utils + +//! @addtogroup core_array +//! @{ + //! Covariation flags enum CovarFlags { /** The output covariance matrix is calculated as: @@ -151,27 +161,6 @@ enum CovarFlags { COVAR_COLS = 16 }; -//! @addtogroup core_cluster -//! @{ - -//! k-Means flags -enum KmeansFlags { - /** Select random initial centers in each attempt.*/ - KMEANS_RANDOM_CENTERS = 0, - /** Use kmeans++ center initialization by Arthur and Vassilvitskii [Arthur2007].*/ - KMEANS_PP_CENTERS = 2, - /** During the first (and possibly the only) attempt, use the - user-supplied labels instead of computing them from the initial centers. For the second and - further attempts, use the random or semi-random centers. Use one of KMEANS_\*_CENTERS flag - to specify the exact method.*/ - KMEANS_USE_INITIAL_LABELS = 1 -}; - -//! @} core_cluster - -//! @addtogroup core_array -//! @{ - enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix. REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix. REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix. @@ -179,19 +168,12 @@ enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/column REDUCE_SUM2 = 4 //!< the output is the sum of all squared rows/columns of the matrix. }; -//! @} core_array - /** @brief Swaps two matrices */ CV_EXPORTS void swap(Mat& a, Mat& b); /** @overload */ CV_EXPORTS void swap( UMat& a, UMat& b ); -//! @} core - -//! @addtogroup core_array -//! @{ - /** @brief Computes the source location of an extrapolated pixel. The function computes and returns the coordinate of a donor pixel corresponding to the specified @@ -492,6 +474,10 @@ For example: CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst, double alpha = 1, double beta = 0); +/** @example samples/cpp/tutorial_code/core/how_to_scan_images/how_to_scan_images.cpp +Check @ref tutorial_how_to_scan_images "the corresponding tutorial" for more details +*/ + /** @brief Performs a look-up table transform of an array. The function LUT fills the output array with values from the look-up table. Indices of the entries @@ -3007,6 +2993,19 @@ class CV_EXPORTS RNG_MT19937 //! @addtogroup core_cluster //! @{ +//! k-means flags +enum KmeansFlags { + /** Select random initial centers in each attempt.*/ + KMEANS_RANDOM_CENTERS = 0, + /** Use kmeans++ center initialization by Arthur and Vassilvitskii [Arthur2007].*/ + KMEANS_PP_CENTERS = 2, + /** During the first (and possibly the only) attempt, use the + user-supplied labels instead of computing them from the initial centers. For the second and + further attempts, use the random or semi-random centers. Use one of KMEANS_\*_CENTERS flag + to specify the exact method.*/ + KMEANS_USE_INITIAL_LABELS = 1 +}; + /** @example samples/cpp/snippets/kmeans.cpp An example on k-means clustering */ @@ -3021,7 +3020,7 @@ and groups the input samples around the clusters. As an output, \f$\texttt{bestL 0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix. @note -- (Python) An example on K-means clustering can be found at +- (Python) An example on k-means clustering can be found at opencv_source_code/samples/python/kmeans.py @param data Data for clustering. An array of N-Dimensional points with float coordinates is needed. Examples of this array can be: diff --git a/modules/core/include/opencv2/core/affine.hpp b/modules/core/include/opencv2/core/affine.hpp index 1806382e99ae..1aebf2b5071a 100644 --- a/modules/core/include/opencv2/core/affine.hpp +++ b/modules/core/include/opencv2/core/affine.hpp @@ -51,7 +51,7 @@ namespace cv { -//! @addtogroup core +//! @addtogroup core_eigen //! @{ /** @brief Affine transform diff --git a/modules/core/include/opencv2/core/bufferpool.hpp b/modules/core/include/opencv2/core/bufferpool.hpp index 4698e5da167d..e835ad025ca9 100644 --- a/modules/core/include/opencv2/core/bufferpool.hpp +++ b/modules/core/include/opencv2/core/bufferpool.hpp @@ -15,7 +15,7 @@ namespace cv { -//! @addtogroup core +//! @addtogroup core_opencl //! @{ class BufferPoolController diff --git a/modules/core/include/opencv2/core/dualquaternion.hpp b/modules/core/include/opencv2/core/dualquaternion.hpp index 1f644e9dc83b..4fec990461c3 100644 --- a/modules/core/include/opencv2/core/dualquaternion.hpp +++ b/modules/core/include/opencv2/core/dualquaternion.hpp @@ -30,7 +30,7 @@ #include namespace cv{ -//! @addtogroup core +//! @addtogroup core_quaternion //! @{ template class DualQuat; diff --git a/modules/core/include/opencv2/core/exception.hpp b/modules/core/include/opencv2/core/exception.hpp index 117cd47a55d6..2d189241bca7 100644 --- a/modules/core/include/opencv2/core/exception.hpp +++ b/modules/core/include/opencv2/core/exception.hpp @@ -127,28 +127,28 @@ CV_EXPORTS CV_NORETURN void error(const Exception& exc); By default the function prints information about the error to stderr, then it either stops if setBreakOnError() had been called before or raises the exception. It is possible to alternate error processing by using redirectError(). -@param _code - error code (Error::Code) -@param _err - error description -@param _func - function name. Available only when the compiler supports getting it -@param _file - source file name where the error has occurred -@param _line - line number in the source file where the error has occurred +@param code - error code (Error::Code) +@param err - error description +@param func - function name. Available only when the compiler supports getting it +@param file - source file name where the error has occurred +@param line - line number in the source file where the error has occurred @see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert */ -CV_EXPORTS CV_NORETURN void error(Error::Code _code, const String& _err, const char* _func, const char* _file, int _line); +CV_EXPORTS CV_NORETURN void error(Error::Code code, const String& err, const char* func, const char* file, int line); /*! @brief Signals an error and terminate application. By default the function prints information about the error to stderr, then it terminates application with std::terminate. The function is designed for invariants check in functions and methods with noexcept attribute. -@param _code - error code (Error::Code) -@param _err - error description -@param _func - function name. Available only when the compiler supports getting it -@param _file - source file name where the error has occurred -@param _line - line number in the source file where the error has occurred +@param code - error code (Error::Code) +@param err - error description +@param func - function name. Available only when the compiler supports getting it +@param file - source file name where the error has occurred +@param line - line number in the source file where the error has occurred @see CV_AssertTerminate */ -CV_EXPORTS CV_NORETURN void terminate(Error::Code _code, const String& _err, const char* _func, const char* _file, int _line) CV_NOEXCEPT; +CV_EXPORTS CV_NORETURN void terminate(Error::Code code, const String& err, const char* func, const char* file, int line) CV_NOEXCEPT; #ifdef CV_STATIC_ANALYSIS diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 745a9dc40f90..2e78134992ff 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -194,6 +194,20 @@ CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double) #endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +template inline _VecTp v_setzero_(); +template inline _VecTp v_setall_(uchar); +template inline _VecTp v_setall_(schar); +template inline _VecTp v_setall_(ushort); +template inline _VecTp v_setall_(short); +template inline _VecTp v_setall_(unsigned); +template inline _VecTp v_setall_(int); +template inline _VecTp v_setall_(uint64); +template inline _VecTp v_setall_(int64); +template inline _VecTp v_setall_(float); +template inline _VecTp v_setall_(double); +template inline _VecTp v_setall_(hfloat); + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 3a8505a297b5..f2525f0b24c2 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -447,6 +447,10 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float64x4, double, pd, __m128d) { return _Tpvec(_mm256_setzero_si256()); } \ inline _Tpvec v256_setall_##suffix(_Tp v) \ { return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v256_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v256_setall_##suffix(v); } \ OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \ @@ -472,6 +476,10 @@ OPENCV_HAL_IMPL_AVX_INIT(v_int64x4, int64, s64, epi64x, int64) { return _Tpvec(_mm256_setzero_##zsuffix()); } \ inline _Tpvec v256_setall_##suffix(_Tp v) \ { return _Tpvec(_mm256_set1_##zsuffix(v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v256_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v256_setall_##suffix(v); } \ OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, cast) \ OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, cast) \ OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast) \ @@ -3179,6 +3187,20 @@ inline void v_pack_store(bfloat* ptr, const v_float32x8& a) inline void v256_cleanup() { _mm256_zeroall(); } +#include "intrin_math.hpp" +inline v_float32x8 v_exp(const v_float32x8& x) { return v_exp_default_32f(x); } +inline v_float32x8 v_log(const v_float32x8& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x8& x, v_float32x8& s, v_float32x8& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x8 v_sin(const v_float32x8& x) { return v_sin_default_32f(x); } +inline v_float32x8 v_cos(const v_float32x8& x) { return v_cos_default_32f(x); } +inline v_float32x8 v_erf(const v_float32x8& x) { return v_erf_default_32f(x); } + +inline v_float64x4 v_exp(const v_float64x4& x) { return v_exp_default_64f(x); } +inline v_float64x4 v_log(const v_float64x4& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x4& x, v_float64x4& s, v_float64x4& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x4 v_sin(const v_float64x4& x) { return v_sin_default_64f(x); } +inline v_float64x4 v_cos(const v_float64x4& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp index 64dab6b3ae0e..077b4d17a75a 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp @@ -458,6 +458,10 @@ OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float64x8, double, pd, __m256d) { return _Tpvec(_mm512_setzero_si512()); } \ inline _Tpvec v512_setall_##suffix(_Tp v) \ { return _Tpvec(_mm512_set1_##ssuffix((ctype_s)v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v512_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v512_setall_##suffix(v); } \ OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, OPENCV_HAL_NOP) \ @@ -483,6 +487,10 @@ OPENCV_HAL_IMPL_AVX512_INIT(v_int64x8, int64, s64, epi64, int64) { return _Tpvec(_mm512_setzero_##zsuffix()); } \ inline _Tpvec v512_setall_##suffix(_Tp v) \ { return _Tpvec(_mm512_set1_##zsuffix(v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v512_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v512_setall_##suffix(v); } \ OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, cast) \ OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, cast) \ OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, cast) \ @@ -3070,6 +3078,20 @@ inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signm inline void v512_cleanup() { _mm256_zeroall(); } +#include "intrin_math.hpp" +inline v_float32x16 v_exp(const v_float32x16& x) { return v_exp_default_32f(x); } +inline v_float32x16 v_log(const v_float32x16& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x16& x, v_float32x16& s, v_float32x16& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x16 v_sin(const v_float32x16& x) { return v_sin_default_32f(x); } +inline v_float32x16 v_cos(const v_float32x16& x) { return v_cos_default_32f(x); } +inline v_float32x16 v_erf(const v_float32x16& x) { return v_erf_default_32f(x); } + +inline v_float64x8 v_exp(const v_float64x8& x) { return v_exp_default_64f(x); } +inline v_float64x8 v_log(const v_float64x8& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x8& x, v_float64x8& s, v_float64x8& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x8 v_sin(const v_float64x8& x) { return v_sin_default_64f(x); } +inline v_float64x8 v_cos(const v_float64x8& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index fed7cc261a78..7eeed2ce9b5d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -263,8 +263,8 @@ Most of these operations return only one value. ### Other math -- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp, - @ref v_erf +- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp, @ref v_log, + @ref v_erf, @ref v_sin, @ref v_cos - Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs ### Conversions @@ -366,6 +366,7 @@ Floating point: |broadcast_element | x | | |exp | x | x | |log | x | x | +|sin, cos | x | x | @{ */ @@ -745,10 +746,41 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) */ OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp) -//! @cond IGNORED +/** + * @brief Compute sine \f$ sin(x) \f$ and cosine \f$ cos(x) \f$ of elements at the same time + * + * Only for floating point types. Core implementation steps: + * 1. Input Normalization: Scale the periodicity from 2π to 4 and reduce the angle to the range \f$ [0, \frac{\pi}{4}] \f$ using periodicity and trigonometric identities. + * 2. Polynomial Approximation for \f$ sin(x) \f$ and \f$ cos(x) \f$: + * - For float16 and float32, use a Taylor series with 4 terms for sine and 5 terms for cosine. + * - For float64, use a Taylor series with 7 terms for sine and 8 terms for cosine. + * 3. Select Results: select and convert the final sine and cosine values for the original input angle. + * + * @note The precision of the calculation depends on the implementation and the data type of the input vector. + */ +template +inline void v_sincos(const v_reg<_Tp, n>& x, v_reg<_Tp, n>& s, v_reg<_Tp, n>& c) +{ + for( int i = 0; i < n; i++ ) + { + s.s[i] = std::sin(x.s[i]); + c.s[i] = std::cos(x.s[i]); + } +} + +/** + * @brief Sine \f$ sin(x) \f$ of elements + * + * Only for floating point types. Core implementation the same as @ref v_sincos. + */ OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) + +/** + * @brief Cosine \f$ cos(x) \f$ of elements + * + * Only for floating point types. Core implementation the same as @ref v_sincos. + */ OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -//! @endcond /** @brief Absolute value of elements @@ -2801,7 +2833,8 @@ inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, //! @brief Helper macro //! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \ -inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); } +inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); } \ +template <> inline _Tpvec v_setzero_() { return _Tpvec::zero(); } //! @name Init with zero //! @{ @@ -2847,7 +2880,8 @@ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64) //! @brief Helper macro //! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \ -inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); } +inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); } \ +template <> inline _Tpvec v_setall_(_Tp val) { return _Tpvec::all(val); } //! @name Init with value //! @{ diff --git a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp index 45f53de8a248..68d08b2ef43e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp @@ -557,6 +557,10 @@ inline __m256i _lasx_256_castpd_si256(const __m256d& v) { return _Tpvec(__lasx_xvreplgr2vr_d(0)); } \ inline _Tpvec v256_setall_##suffix(_Tp v) \ { return _Tpvec(__lasx_xvreplgr2vr_##ssuffix((ctype_s)v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v256_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v256_setall_##suffix(v); } \ OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \ @@ -588,7 +592,11 @@ inline __m256d _lasx_256_castsi256_pd(const __m256i &v) inline _Tpvec v256_setzero_##suffix() \ { return _Tpvec(__lasx_xvreplgr2vr_d(0)); } \ inline _Tpvec v256_setall_##suffix(_Tp v) \ - { return _Tpvec(_v256_setall_##zsuffix(v)); } \ + { return _Tpvec(_v256_setall_##zsuffix(v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v256_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v256_setall_##suffix(v); } \ OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint8x32, suffix, cast) \ OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_int8x32, suffix, cast) \ OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint16x16, suffix, cast) \ @@ -3005,6 +3013,20 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a) inline void v256_cleanup() {} +#include "intrin_math.hpp" +inline v_float32x8 v_exp(const v_float32x8& x) { return v_exp_default_32f(x); } +inline v_float32x8 v_log(const v_float32x8& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x8& x, v_float32x8& s, v_float32x8& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x8 v_sin(const v_float32x8& x) { return v_sin_default_32f(x); } +inline v_float32x8 v_cos(const v_float32x8& x) { return v_cos_default_32f(x); } +inline v_float32x8 v_erf(const v_float32x8& x) { return v_erf_default_32f(x); } + +inline v_float64x4 v_exp(const v_float64x4& x) { return v_exp_default_64f(x); } +inline v_float64x4 v_log(const v_float64x4& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x4& x, v_float64x4& s, v_float64x4& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x4 v_sin(const v_float64x4& x) { return v_sin_default_64f(x); } +inline v_float64x4 v_cos(const v_float64x4& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp index aa997070c359..a2f23d6abe44 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lsx.hpp @@ -417,6 +417,10 @@ inline __m128i _lsx_128_castpd_si128(const __m128d& v) { return _Tpvec(__lsx_vldi(0)); } \ inline _Tpvec v_setall_##suffix(_Tp v) \ { return _Tpvec(__lsx_vreplgr2vr_##ssuffix((ctype_s)v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v_setall_##suffix(v); } \ OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint8x16, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_int8x16, suffix, OPENCV_HAL_NOP) \ OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint16x8, suffix, OPENCV_HAL_NOP) \ @@ -448,6 +452,10 @@ inline __m128d _lsx_128_castsi128_pd(const __m128i &v) { return _Tpvec(__lsx_vldi(0)); } \ inline _Tpvec v_setall_##suffix(_Tp v) \ { return _Tpvec(_v128_setall_##zsuffix(v)); } \ + template <> inline _Tpvec v_setzero_() \ + { return v_setzero_##suffix(); } \ + template <> inline _Tpvec v_setall_(_Tp v) \ + { return v_setall_##suffix(v); } \ OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint8x16, suffix, cast) \ OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_int8x16, suffix, cast) \ OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint16x8, suffix, cast) \ @@ -2515,6 +2523,20 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& a) inline void v_cleanup() {} +#include "intrin_math.hpp" +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp index eaf3b3b78ba8..b7e649e74477 100644 --- a/modules/core/include/opencv2/core/hal/intrin_math.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp @@ -2,10 +2,6 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html -// This header is not standalone. Don't include directly, use "intrin.hpp" instead. -#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp - -namespace CV__SIMD_NAMESPACE { /* Universal Intrinsics implementation of sin, cos, exp and log @@ -34,434 +30,658 @@ namespace CV__SIMD_NAMESPACE { (this is the zlib license) */ - -#ifndef OPENCV_HAL_MATH_HAVE_EXP +#ifndef OPENCV_HAL_INTRIN_MATH_HPP +#define OPENCV_HAL_INTRIN_MATH_HPP //! @name Exponential //! @{ -#if defined(CV_SIMD_FP16) && CV_SIMD_FP16 - // Implementation is the same as float32 vector. - inline v_float16 v_exp(const v_float16 &x) { - const v_float16 _vexp_lo_f16 = vx_setall_f16(hfloat(-10.7421875f)); - const v_float16 _vexp_hi_f16 = vx_setall_f16(hfloat(11.f)); - const v_float16 _vexp_half_fp16 = vx_setall_f16(hfloat(0.5f)); - const v_float16 _vexp_one_fp16 = vx_setall_f16(hfloat(1.f)); - const v_float16 _vexp_LOG2EF_f16 = vx_setall_f16(hfloat(1.44269504088896341f)); - const v_float16 _vexp_C1_f16 = vx_setall_f16(hfloat(-6.93359375E-1f)); - const v_float16 _vexp_C2_f16 = vx_setall_f16(hfloat(2.12194440E-4f)); - const v_float16 _vexp_p0_f16 = vx_setall_f16(hfloat(1.9875691500E-4f)); - const v_float16 _vexp_p1_f16 = vx_setall_f16(hfloat(1.3981999507E-3f)); - const v_float16 _vexp_p2_f16 = vx_setall_f16(hfloat(8.3334519073E-3f)); - const v_float16 _vexp_p3_f16 = vx_setall_f16(hfloat(4.1665795894E-2f)); - const v_float16 _vexp_p4_f16 = vx_setall_f16(hfloat(1.6666665459E-1f)); - const v_float16 _vexp_p5_f16 = vx_setall_f16(hfloat(5.0000001201E-1f)); - const v_int16 _vexp_bias_s16 = vx_setall_s16(0xf); - - v_float16 _vexp_, _vexp_x, _vexp_y, _vexp_xx; - v_int16 _vexp_mm; - - // compute exponential of x - _vexp_x = v_max(x, _vexp_lo_f16); - _vexp_x = v_min(_vexp_x, _vexp_hi_f16); - - _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f16, _vexp_half_fp16); - _vexp_mm = v_floor(_vexp_); - _vexp_ = v_cvt_f16(_vexp_mm); - _vexp_mm = v_add(_vexp_mm, _vexp_bias_s16); - _vexp_mm = v_shl(_vexp_mm, 10); - - _vexp_x = v_fma(_vexp_, _vexp_C1_f16, _vexp_x); - _vexp_x = v_fma(_vexp_, _vexp_C2_f16, _vexp_x); - _vexp_xx = v_mul(_vexp_x, _vexp_x); - - _vexp_y = v_fma(_vexp_x, _vexp_p0_f16, _vexp_p1_f16); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f16); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f16); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f16); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f16); - - _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x); - _vexp_y = v_add(_vexp_y, _vexp_one_fp16); - _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f16(_vexp_mm)); - - // exp(NAN) -> NAN - v_float16 mask_not_nan = v_not_nan(x); - return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f16(vx_setall_s16(0x7e00))); - } -#endif - - inline v_float32 v_exp(const v_float32 &x) { - const v_float32 _vexp_lo_f32 = vx_setall_f32(-88.3762626647949f); - const v_float32 _vexp_hi_f32 = vx_setall_f32(89.f); - const v_float32 _vexp_half_fp32 = vx_setall_f32(0.5f); - const v_float32 _vexp_one_fp32 = vx_setall_f32(1.f); - const v_float32 _vexp_LOG2EF_f32 = vx_setall_f32(1.44269504088896341f); - const v_float32 _vexp_C1_f32 = vx_setall_f32(-6.93359375E-1f); - const v_float32 _vexp_C2_f32 = vx_setall_f32(2.12194440E-4f); - const v_float32 _vexp_p0_f32 = vx_setall_f32(1.9875691500E-4f); - const v_float32 _vexp_p1_f32 = vx_setall_f32(1.3981999507E-3f); - const v_float32 _vexp_p2_f32 = vx_setall_f32(8.3334519073E-3f); - const v_float32 _vexp_p3_f32 = vx_setall_f32(4.1665795894E-2f); - const v_float32 _vexp_p4_f32 = vx_setall_f32(1.6666665459E-1f); - const v_float32 _vexp_p5_f32 = vx_setall_f32(5.0000001201E-1f); - const v_int32 _vexp_bias_s32 = vx_setall_s32(0x7f); - - v_float32 _vexp_, _vexp_x, _vexp_y, _vexp_xx; - v_int32 _vexp_mm; - - // compute exponential of x - _vexp_x = v_max(x, _vexp_lo_f32); - _vexp_x = v_min(_vexp_x, _vexp_hi_f32); - - _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f32, _vexp_half_fp32); - _vexp_mm = v_floor(_vexp_); - _vexp_ = v_cvt_f32(_vexp_mm); - _vexp_mm = v_add(_vexp_mm, _vexp_bias_s32); - _vexp_mm = v_shl(_vexp_mm, 23); - - _vexp_x = v_fma(_vexp_, _vexp_C1_f32, _vexp_x); - _vexp_x = v_fma(_vexp_, _vexp_C2_f32, _vexp_x); - _vexp_xx = v_mul(_vexp_x, _vexp_x); - - _vexp_y = v_fma(_vexp_x, _vexp_p0_f32, _vexp_p1_f32); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f32); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f32); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f32); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f32); - - _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x); - _vexp_y = v_add(_vexp_y, _vexp_one_fp32); - _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm)); - - // exp(NAN) -> NAN - v_float32 mask_not_nan = v_not_nan(x); - return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f32(vx_setall_s32(0x7fc00000))); - } - -#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F - inline v_float64 v_exp(const v_float64 &x) { - const v_float64 _vexp_lo_f64 = vx_setall_f64(-709.43613930310391424428); - const v_float64 _vexp_hi_f64 = vx_setall_f64(710.); - const v_float64 _vexp_half_f64 = vx_setall_f64(0.5); - const v_float64 _vexp_one_f64 = vx_setall_f64(1.0); - const v_float64 _vexp_two_f64 = vx_setall_f64(2.0); - const v_float64 _vexp_LOG2EF_f64 = vx_setall_f64(1.44269504088896340736); - const v_float64 _vexp_C1_f64 = vx_setall_f64(-6.93145751953125E-1); - const v_float64 _vexp_C2_f64 = vx_setall_f64(-1.42860682030941723212E-6); - const v_float64 _vexp_p0_f64 = vx_setall_f64(1.26177193074810590878E-4); - const v_float64 _vexp_p1_f64 = vx_setall_f64(3.02994407707441961300E-2); - const v_float64 _vexp_p2_f64 = vx_setall_f64(9.99999999999999999910E-1); - const v_float64 _vexp_q0_f64 = vx_setall_f64(3.00198505138664455042E-6); - const v_float64 _vexp_q1_f64 = vx_setall_f64(2.52448340349684104192E-3); - const v_float64 _vexp_q2_f64 = vx_setall_f64(2.27265548208155028766E-1); - const v_float64 _vexp_q3_f64 = vx_setall_f64(2.00000000000000000009E0); - const v_int64 _vexp_bias_s64 = vx_setall_s64(0x3ff); - - v_float64 _vexp_, _vexp_x, _vexp_y, _vexp_z, _vexp_xx; - v_int64 _vexp_mm; - - // compute exponential of x - _vexp_x = v_max(x, _vexp_lo_f64); - _vexp_x = v_min(_vexp_x, _vexp_hi_f64); - - _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f64, _vexp_half_f64); - _vexp_mm = v_expand_low(v_floor(_vexp_)); - _vexp_ = v_cvt_f64(_vexp_mm); - _vexp_mm = v_add(_vexp_mm, _vexp_bias_s64); - _vexp_mm = v_shl(_vexp_mm, 52); - - _vexp_x = v_fma(_vexp_, _vexp_C1_f64, _vexp_x); - _vexp_x = v_fma(_vexp_, _vexp_C2_f64, _vexp_x); - _vexp_xx = v_mul(_vexp_x, _vexp_x); - - _vexp_y = v_fma(_vexp_xx, _vexp_p0_f64, _vexp_p1_f64); - _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_p2_f64); - _vexp_y = v_mul(_vexp_y, _vexp_x); - - _vexp_z = v_fma(_vexp_xx, _vexp_q0_f64, _vexp_q1_f64); - _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q2_f64); - _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q3_f64); - - _vexp_z = v_div(_vexp_y, v_sub(_vexp_z, _vexp_y)); - _vexp_z = v_fma(_vexp_two_f64, _vexp_z, _vexp_one_f64); - _vexp_z = v_mul(_vexp_z, v_reinterpret_as_f64(_vexp_mm)); - - // exp(NAN) -> NAN - v_float64 mask_not_nan = v_not_nan(x); - return v_select(mask_not_nan, _vexp_z, v_reinterpret_as_f64(vx_setall_s64(0x7FF8000000000000))); - } -#endif - -#define OPENCV_HAL_MATH_HAVE_EXP 1 -//! @} -#endif +// Implementation is the same as float32 vector. +template +inline _TpVec16F v_exp_default_16f(const _TpVec16F &x) { + const _TpVec16F _vexp_lo_f16 = v_setall_<_TpVec16F>(-10.7421875f); + const _TpVec16F _vexp_hi_f16 = v_setall_<_TpVec16F>(11.f); + const _TpVec16F _vexp_half_fp16 = v_setall_<_TpVec16F>(0.5f); + const _TpVec16F _vexp_one_fp16 = v_setall_<_TpVec16F>(1.f); + const _TpVec16F _vexp_LOG2EF_f16 = v_setall_<_TpVec16F>(1.44269504088896341f); + const _TpVec16F _vexp_C1_f16 = v_setall_<_TpVec16F>(-6.93359375E-1f); + const _TpVec16F _vexp_C2_f16 = v_setall_<_TpVec16F>(2.12194440E-4f); + const _TpVec16F _vexp_p0_f16 = v_setall_<_TpVec16F>(1.9875691500E-4f); + const _TpVec16F _vexp_p1_f16 = v_setall_<_TpVec16F>(1.3981999507E-3f); + const _TpVec16F _vexp_p2_f16 = v_setall_<_TpVec16F>(8.3334519073E-3f); + const _TpVec16F _vexp_p3_f16 = v_setall_<_TpVec16F>(4.1665795894E-2f); + const _TpVec16F _vexp_p4_f16 = v_setall_<_TpVec16F>(1.6666665459E-1f); + const _TpVec16F _vexp_p5_f16 = v_setall_<_TpVec16F>(5.0000001201E-1f); + + _TpVec16F _vexp_, _vexp_x, _vexp_y, _vexp_xx; + _TpVec16S _vexp_mm; + const _TpVec16S _vexp_bias_s16 = v_setall_<_TpVec16S>((short)0xf); + + // compute exponential of x + _vexp_x = v_max(x, _vexp_lo_f16); + _vexp_x = v_min(_vexp_x, _vexp_hi_f16); + + _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f16, _vexp_half_fp16); + _vexp_mm = v_floor(_vexp_); + _vexp_ = v_cvt_f16(_vexp_mm); + _vexp_mm = v_add(_vexp_mm, _vexp_bias_s16); + _vexp_mm = v_shl(_vexp_mm, 10); + + _vexp_x = v_fma(_vexp_, _vexp_C1_f16, _vexp_x); + _vexp_x = v_fma(_vexp_, _vexp_C2_f16, _vexp_x); + _vexp_xx = v_mul(_vexp_x, _vexp_x); + + _vexp_y = v_fma(_vexp_x, _vexp_p0_f16, _vexp_p1_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f16); + + _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x); + _vexp_y = v_add(_vexp_y, _vexp_one_fp16); + _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f16(_vexp_mm)); + + // exp(NAN) -> NAN + _TpVec16F mask_not_nan = v_not_nan(x); + return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00))); +} -#ifndef OPENCV_HAL_MATH_HAVE_LOG +template +inline _TpVec32F v_exp_default_32f(const _TpVec32F &x) { + const _TpVec32F _vexp_lo_f32 = v_setall_<_TpVec32F>(-88.3762626647949f); + const _TpVec32F _vexp_hi_f32 = v_setall_<_TpVec32F>(89.f); + const _TpVec32F _vexp_half_fp32 = v_setall_<_TpVec32F>(0.5f); + const _TpVec32F _vexp_one_fp32 = v_setall_<_TpVec32F>(1.f); + const _TpVec32F _vexp_LOG2EF_f32 = v_setall_<_TpVec32F>(1.44269504088896341f); + const _TpVec32F _vexp_C1_f32 = v_setall_<_TpVec32F>(-6.93359375E-1f); + const _TpVec32F _vexp_C2_f32 = v_setall_<_TpVec32F>(2.12194440E-4f); + const _TpVec32F _vexp_p0_f32 = v_setall_<_TpVec32F>(1.9875691500E-4f); + const _TpVec32F _vexp_p1_f32 = v_setall_<_TpVec32F>(1.3981999507E-3f); + const _TpVec32F _vexp_p2_f32 = v_setall_<_TpVec32F>(8.3334519073E-3f); + const _TpVec32F _vexp_p3_f32 = v_setall_<_TpVec32F>(4.1665795894E-2f); + const _TpVec32F _vexp_p4_f32 = v_setall_<_TpVec32F>(1.6666665459E-1f); + const _TpVec32F _vexp_p5_f32 = v_setall_<_TpVec32F>(5.0000001201E-1f); + + _TpVec32F _vexp_, _vexp_x, _vexp_y, _vexp_xx; + _TpVec32S _vexp_mm; + const _TpVec32S _vexp_bias_s32 = v_setall_<_TpVec32S>((int)0x7f); + + // compute exponential of x + _vexp_x = v_max(x, _vexp_lo_f32); + _vexp_x = v_min(_vexp_x, _vexp_hi_f32); + + _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f32, _vexp_half_fp32); + _vexp_mm = v_floor(_vexp_); + _vexp_ = v_cvt_f32(_vexp_mm); + _vexp_mm = v_add(_vexp_mm, _vexp_bias_s32); + _vexp_mm = v_shl(_vexp_mm, 23); + + _vexp_x = v_fma(_vexp_, _vexp_C1_f32, _vexp_x); + _vexp_x = v_fma(_vexp_, _vexp_C2_f32, _vexp_x); + _vexp_xx = v_mul(_vexp_x, _vexp_x); + + _vexp_y = v_fma(_vexp_x, _vexp_p0_f32, _vexp_p1_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f32); + + _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x); + _vexp_y = v_add(_vexp_y, _vexp_one_fp32); + _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm)); + + // exp(NAN) -> NAN + _TpVec32F mask_not_nan = v_not_nan(x); + return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000))); +} + +template +inline _TpVec64F v_exp_default_64f(const _TpVec64F &x) { + const _TpVec64F _vexp_lo_f64 = v_setall_<_TpVec64F>(-709.43613930310391424428); + const _TpVec64F _vexp_hi_f64 = v_setall_<_TpVec64F>(710.); + const _TpVec64F _vexp_half_f64 = v_setall_<_TpVec64F>(0.5); + const _TpVec64F _vexp_one_f64 = v_setall_<_TpVec64F>(1.0); + const _TpVec64F _vexp_two_f64 = v_setall_<_TpVec64F>(2.0); + const _TpVec64F _vexp_LOG2EF_f64 = v_setall_<_TpVec64F>(1.44269504088896340736); + const _TpVec64F _vexp_C1_f64 = v_setall_<_TpVec64F>(-6.93145751953125E-1); + const _TpVec64F _vexp_C2_f64 = v_setall_<_TpVec64F>(-1.42860682030941723212E-6); + const _TpVec64F _vexp_p0_f64 = v_setall_<_TpVec64F>(1.26177193074810590878E-4); + const _TpVec64F _vexp_p1_f64 = v_setall_<_TpVec64F>(3.02994407707441961300E-2); + const _TpVec64F _vexp_p2_f64 = v_setall_<_TpVec64F>(9.99999999999999999910E-1); + const _TpVec64F _vexp_q0_f64 = v_setall_<_TpVec64F>(3.00198505138664455042E-6); + const _TpVec64F _vexp_q1_f64 = v_setall_<_TpVec64F>(2.52448340349684104192E-3); + const _TpVec64F _vexp_q2_f64 = v_setall_<_TpVec64F>(2.27265548208155028766E-1); + const _TpVec64F _vexp_q3_f64 = v_setall_<_TpVec64F>(2.00000000000000000009E0); + + _TpVec64F _vexp_, _vexp_x, _vexp_y, _vexp_z, _vexp_xx; + _TpVec64S _vexp_mm; + const _TpVec64S _vexp_bias_s64 = v_setall_<_TpVec64S>((int64)0x3ff); + + // compute exponential of x + _vexp_x = v_max(x, _vexp_lo_f64); + _vexp_x = v_min(_vexp_x, _vexp_hi_f64); + + _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f64, _vexp_half_f64); + _vexp_mm = v_expand_low(v_floor(_vexp_)); + _vexp_ = v_cvt_f64(_vexp_mm); + _vexp_mm = v_add(_vexp_mm, _vexp_bias_s64); + _vexp_mm = v_shl(_vexp_mm, 52); + + _vexp_x = v_fma(_vexp_, _vexp_C1_f64, _vexp_x); + _vexp_x = v_fma(_vexp_, _vexp_C2_f64, _vexp_x); + _vexp_xx = v_mul(_vexp_x, _vexp_x); + + _vexp_y = v_fma(_vexp_xx, _vexp_p0_f64, _vexp_p1_f64); + _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_p2_f64); + _vexp_y = v_mul(_vexp_y, _vexp_x); + + _vexp_z = v_fma(_vexp_xx, _vexp_q0_f64, _vexp_q1_f64); + _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q2_f64); + _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q3_f64); + + _vexp_z = v_div(_vexp_y, v_sub(_vexp_z, _vexp_y)); + _vexp_z = v_fma(_vexp_two_f64, _vexp_z, _vexp_one_f64); + _vexp_z = v_mul(_vexp_z, v_reinterpret_as_f64(_vexp_mm)); + + // exp(NAN) -> NAN + _TpVec64F mask_not_nan = v_not_nan(x); + return v_select(mask_not_nan, _vexp_z, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7FF8000000000000))); +} +//! @} //! @name Natural Logarithm //! @{ -#if defined(CV_SIMD_FP16) && CV_SIMD_FP16 - inline v_float16 v_log(const v_float16 &x) { - const v_float16 _vlog_one_fp16 = vx_setall_f16(hfloat(1.0f)); - const v_float16 _vlog_SQRTHF_fp16 = vx_setall_f16(hfloat(0.707106781186547524f)); - const v_float16 _vlog_q1_fp16 = vx_setall_f16(hfloat(-2.12194440E-4f)); - const v_float16 _vlog_q2_fp16 = vx_setall_f16(hfloat(0.693359375f)); - const v_float16 _vlog_p0_fp16 = vx_setall_f16(hfloat(7.0376836292E-2f)); - const v_float16 _vlog_p1_fp16 = vx_setall_f16(hfloat(-1.1514610310E-1f)); - const v_float16 _vlog_p2_fp16 = vx_setall_f16(hfloat(1.1676998740E-1f)); - const v_float16 _vlog_p3_fp16 = vx_setall_f16(hfloat(-1.2420140846E-1f)); - const v_float16 _vlog_p4_fp16 = vx_setall_f16(hfloat(1.4249322787E-1f)); - const v_float16 _vlog_p5_fp16 = vx_setall_f16(hfloat(-1.6668057665E-1f)); - const v_float16 _vlog_p6_fp16 = vx_setall_f16(hfloat(2.0000714765E-1f)); - const v_float16 _vlog_p7_fp16 = vx_setall_f16(hfloat(-2.4999993993E-1f)); - const v_float16 _vlog_p8_fp16 = vx_setall_f16(hfloat(3.3333331174E-1f)); - const v_int16 _vlog_inv_mant_mask_s16 = vx_setall_s16(~0x7c00); - - v_float16 _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp; - v_int16 _vlog_ux, _vlog_emm0; - - _vlog_ux = v_reinterpret_as_s16(x); - _vlog_emm0 = v_shr(_vlog_ux, 10); - - _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s16); - _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s16(vx_setall_f16(hfloat(0.5f)))); - _vlog_x = v_reinterpret_as_f16(_vlog_ux); - - _vlog_emm0 = v_sub(_vlog_emm0, vx_setall_s16(0xf)); - _vlog_e = v_cvt_f16(_vlog_emm0); - - _vlog_e = v_add(_vlog_e, _vlog_one_fp16); - - v_float16 _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp16); - _vlog_tmp = v_and(_vlog_x, _vlog_mask); - _vlog_x = v_sub(_vlog_x, _vlog_one_fp16); - _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp16, _vlog_mask)); - _vlog_x = v_add(_vlog_x, _vlog_tmp); - - _vlog_z = v_mul(_vlog_x, _vlog_x); - - _vlog_y = v_fma(_vlog_p0_fp16, _vlog_x, _vlog_p1_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp16); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp16); - _vlog_y = v_mul(_vlog_y, _vlog_x); - _vlog_y = v_mul(_vlog_y, _vlog_z); - - _vlog_y = v_fma(_vlog_e, _vlog_q1_fp16, _vlog_y); - - _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, vx_setall_f16(hfloat(0.5f)))); - - _vlog_x = v_add(_vlog_x, _vlog_y); - _vlog_x = v_fma(_vlog_e, _vlog_q2_fp16, _vlog_x); - // log(0) -> -INF - v_float16 mask_zero = v_eq(x, vx_setzero_f16()); - _vlog_x = v_select(mask_zero, v_reinterpret_as_f16(vx_setall_s16(0xfc00)), _vlog_x); - // log(NEG), log(NAN) -> NAN - v_float16 mask_not_nan = v_ge(x, vx_setzero_f16()); - _vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f16(vx_setall_s16(0x7e00))); - // log(INF) -> INF - v_float16 mask_inf = v_eq(x, v_reinterpret_as_f16(vx_setall_s16(0x7c00))); - _vlog_x = v_select(mask_inf, x, _vlog_x); - return _vlog_x; - } -#endif - - inline v_float32 v_log(const v_float32 &x) { - const v_float32 _vlog_one_fp32 = vx_setall_f32(1.0f); - const v_float32 _vlog_SQRTHF_fp32 = vx_setall_f32(0.707106781186547524f); - const v_float32 _vlog_q1_fp32 = vx_setall_f32(-2.12194440E-4f); - const v_float32 _vlog_q2_fp32 = vx_setall_f32(0.693359375f); - const v_float32 _vlog_p0_fp32 = vx_setall_f32(7.0376836292E-2f); - const v_float32 _vlog_p1_fp32 = vx_setall_f32(-1.1514610310E-1f); - const v_float32 _vlog_p2_fp32 = vx_setall_f32(1.1676998740E-1f); - const v_float32 _vlog_p3_fp32 = vx_setall_f32(-1.2420140846E-1f); - const v_float32 _vlog_p4_fp32 = vx_setall_f32(1.4249322787E-1f); - const v_float32 _vlog_p5_fp32 = vx_setall_f32(-1.6668057665E-1f); - const v_float32 _vlog_p6_fp32 = vx_setall_f32(2.0000714765E-1f); - const v_float32 _vlog_p7_fp32 = vx_setall_f32(-2.4999993993E-1f); - const v_float32 _vlog_p8_fp32 = vx_setall_f32(3.3333331174E-1f); - const v_int32 _vlog_inv_mant_mask_s32 = vx_setall_s32(~0x7f800000); - - v_float32 _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp; - v_int32 _vlog_ux, _vlog_emm0; - - _vlog_ux = v_reinterpret_as_s32(x); - _vlog_emm0 = v_shr(_vlog_ux, 23); - - _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s32); - _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s32(vx_setall_f32(0.5f))); - _vlog_x = v_reinterpret_as_f32(_vlog_ux); - - _vlog_emm0 = v_sub(_vlog_emm0, vx_setall_s32(0x7f)); - _vlog_e = v_cvt_f32(_vlog_emm0); - - _vlog_e = v_add(_vlog_e, _vlog_one_fp32); - - v_float32 _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp32); - _vlog_tmp = v_and(_vlog_x, _vlog_mask); - _vlog_x = v_sub(_vlog_x, _vlog_one_fp32); - _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp32, _vlog_mask)); - _vlog_x = v_add(_vlog_x, _vlog_tmp); - - _vlog_z = v_mul(_vlog_x, _vlog_x); - - _vlog_y = v_fma(_vlog_p0_fp32, _vlog_x, _vlog_p1_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp32); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp32); - _vlog_y = v_mul(_vlog_y, _vlog_x); - _vlog_y = v_mul(_vlog_y, _vlog_z); - - _vlog_y = v_fma(_vlog_e, _vlog_q1_fp32, _vlog_y); - - _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, vx_setall_f32(0.5))); - - _vlog_x = v_add(_vlog_x, _vlog_y); - _vlog_x = v_fma(_vlog_e, _vlog_q2_fp32, _vlog_x); - // log(0) -> -INF - v_float32 mask_zero = v_eq(x, vx_setzero_f32()); - _vlog_x = v_select(mask_zero, v_reinterpret_as_f32(vx_setall_s32(0xff800000)), _vlog_x); - // log(NEG), log(NAN) -> NAN - v_float32 mask_not_nan = v_ge(x, vx_setzero_f32()); - _vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f32(vx_setall_s32(0x7fc00000))); - // log(INF) -> INF - v_float32 mask_inf = v_eq(x, v_reinterpret_as_f32(vx_setall_s32(0x7f800000))); - _vlog_x = v_select(mask_inf, x, _vlog_x); - return _vlog_x; - } - -#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F - inline v_float64 v_log(const v_float64 &x) { - const v_float64 _vlog_one_fp64 = vx_setall_f64(1.0); - const v_float64 _vlog_SQRTHF_fp64 = vx_setall_f64(0.7071067811865475244); - const v_float64 _vlog_p0_fp64 = vx_setall_f64(1.01875663804580931796E-4); - const v_float64 _vlog_p1_fp64 = vx_setall_f64(4.97494994976747001425E-1); - const v_float64 _vlog_p2_fp64 = vx_setall_f64(4.70579119878881725854); - const v_float64 _vlog_p3_fp64 = vx_setall_f64(1.44989225341610930846E1); - const v_float64 _vlog_p4_fp64 = vx_setall_f64(1.79368678507819816313E1); - const v_float64 _vlog_p5_fp64 = vx_setall_f64(7.70838733755885391666); - const v_float64 _vlog_q0_fp64 = vx_setall_f64(1.12873587189167450590E1); - const v_float64 _vlog_q1_fp64 = vx_setall_f64(4.52279145837532221105E1); - const v_float64 _vlog_q2_fp64 = vx_setall_f64(8.29875266912776603211E1); - const v_float64 _vlog_q3_fp64 = vx_setall_f64(7.11544750618563894466E1); - const v_float64 _vlog_q4_fp64 = vx_setall_f64(2.31251620126765340583E1); - - const v_float64 _vlog_C0_fp64 = vx_setall_f64(2.121944400546905827679e-4); - const v_float64 _vlog_C1_fp64 = vx_setall_f64(0.693359375); - const v_int64 _vlog_inv_mant_mask_s64 = vx_setall_s64(~0x7ff0000000000000); - - v_float64 _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp, _vlog_xx; - v_int64 _vlog_ux, _vlog_emm0; - - _vlog_ux = v_reinterpret_as_s64(x); - _vlog_emm0 = v_shr(_vlog_ux, 52); - - _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s64); - _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s64(vx_setall_f64(0.5))); - _vlog_x = v_reinterpret_as_f64(_vlog_ux); - - _vlog_emm0 = v_sub(_vlog_emm0, vx_setall_s64(0x3ff)); - _vlog_e = v_cvt_f64(_vlog_emm0); - - _vlog_e = v_add(_vlog_e, _vlog_one_fp64); - - v_float64 _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp64); - _vlog_tmp = v_and(_vlog_x, _vlog_mask); - _vlog_x = v_sub(_vlog_x, _vlog_one_fp64); - _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp64, _vlog_mask)); - _vlog_x = v_add(_vlog_x, _vlog_tmp); - - _vlog_xx = v_mul(_vlog_x, _vlog_x); - - _vlog_y = v_fma(_vlog_p0_fp64, _vlog_x, _vlog_p1_fp64); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp64); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp64); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp64); - _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp64); - _vlog_y = v_mul(_vlog_y, _vlog_x); - _vlog_y = v_mul(_vlog_y, _vlog_xx); - - _vlog_z = v_add(_vlog_x, _vlog_q0_fp64); - _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q1_fp64); - _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q2_fp64); - _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q3_fp64); - _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q4_fp64); - - _vlog_z = v_div(_vlog_y, _vlog_z); - _vlog_z = v_sub(_vlog_z, v_mul(_vlog_e, _vlog_C0_fp64)); - _vlog_z = v_sub(_vlog_z, v_mul(_vlog_xx, vx_setall_f64(0.5))); - - _vlog_z = v_add(_vlog_z, _vlog_x); - _vlog_z = v_fma(_vlog_e, _vlog_C1_fp64, _vlog_z); - - // log(0) -> -INF - v_float64 mask_zero = v_eq(x, vx_setzero_f64()); - _vlog_z = v_select(mask_zero, v_reinterpret_as_f64(vx_setall_s64(0xfff0000000000000)), _vlog_z); - // log(NEG), log(NAN) -> NAN - v_float64 mask_not_nan = v_ge(x, vx_setzero_f64()); - _vlog_z = v_select(mask_not_nan, _vlog_z, v_reinterpret_as_f64(vx_setall_s64(0x7ff8000000000000))); - // log(INF) -> INF - v_float64 mask_inf = v_eq(x, v_reinterpret_as_f64(vx_setall_s64(0x7ff0000000000000))); - _vlog_z = v_select(mask_inf, x, _vlog_z); - return _vlog_z; - } -#endif - -#define OPENCV_HAL_MATH_HAVE_LOG 1 -//! @} -#endif +template +inline _TpVec16F v_log_default_16f(const _TpVec16F &x) { + const _TpVec16F _vlog_one_fp16 = v_setall_<_TpVec16F>(1.0f); + const _TpVec16F _vlog_SQRTHF_fp16 = v_setall_<_TpVec16F>(0.707106781186547524f); + const _TpVec16F _vlog_q1_fp16 = v_setall_<_TpVec16F>(-2.12194440E-4f); + const _TpVec16F _vlog_q2_fp16 = v_setall_<_TpVec16F>(0.693359375f); + const _TpVec16F _vlog_p0_fp16 = v_setall_<_TpVec16F>(7.0376836292E-2f); + const _TpVec16F _vlog_p1_fp16 = v_setall_<_TpVec16F>(-1.1514610310E-1f); + const _TpVec16F _vlog_p2_fp16 = v_setall_<_TpVec16F>(1.1676998740E-1f); + const _TpVec16F _vlog_p3_fp16 = v_setall_<_TpVec16F>(-1.2420140846E-1f); + const _TpVec16F _vlog_p4_fp16 = v_setall_<_TpVec16F>(1.4249322787E-1f); + const _TpVec16F _vlog_p5_fp16 = v_setall_<_TpVec16F>(-1.6668057665E-1f); + const _TpVec16F _vlog_p6_fp16 = v_setall_<_TpVec16F>(2.0000714765E-1f); + const _TpVec16F _vlog_p7_fp16 = v_setall_<_TpVec16F>(-2.4999993993E-1f); + const _TpVec16F _vlog_p8_fp16 = v_setall_<_TpVec16F>(3.3333331174E-1f); + + _TpVec16F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp; + _TpVec16S _vlog_ux, _vlog_emm0; + const _TpVec16S _vlog_inv_mant_mask_s16 = v_setall_<_TpVec16S>((short)~0x7c00); + + _vlog_ux = v_reinterpret_as_s16(x); + _vlog_emm0 = v_shr(_vlog_ux, 10); + + _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s16); + _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s16(v_setall_<_TpVec16F>(0.5f))); + _vlog_x = v_reinterpret_as_f16(_vlog_ux); + + _vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec16S>((short)0xf)); + _vlog_e = v_cvt_f16(_vlog_emm0); + + _vlog_e = v_add(_vlog_e, _vlog_one_fp16); + + _TpVec16F _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp16); + _vlog_tmp = v_and(_vlog_x, _vlog_mask); + _vlog_x = v_sub(_vlog_x, _vlog_one_fp16); + _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp16, _vlog_mask)); + _vlog_x = v_add(_vlog_x, _vlog_tmp); + + _vlog_z = v_mul(_vlog_x, _vlog_x); + + _vlog_y = v_fma(_vlog_p0_fp16, _vlog_x, _vlog_p1_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp16); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp16); + _vlog_y = v_mul(_vlog_y, _vlog_x); + _vlog_y = v_mul(_vlog_y, _vlog_z); + + _vlog_y = v_fma(_vlog_e, _vlog_q1_fp16, _vlog_y); + + _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, v_setall_<_TpVec16F>(0.5f))); + + _vlog_x = v_add(_vlog_x, _vlog_y); + _vlog_x = v_fma(_vlog_e, _vlog_q2_fp16, _vlog_x); + // log(0) -> -INF + _TpVec16F mask_zero = v_eq(x, v_setzero_<_TpVec16F>()); + _vlog_x = v_select(mask_zero, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0xfc00)), _vlog_x); + // log(NEG), log(NAN) -> NAN + _TpVec16F mask_not_nan = v_ge(x, v_setzero_<_TpVec16F>()); + _vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00))); + // log(INF) -> INF + _TpVec16F mask_inf = v_eq(x, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7c00))); + _vlog_x = v_select(mask_inf, x, _vlog_x); + return _vlog_x; +} -/* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch - https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220 -*/ +template +inline _TpVec32F v_log_default_32f(const _TpVec32F &x) { + const _TpVec32F _vlog_one_fp32 = v_setall_<_TpVec32F>(1.0f); + const _TpVec32F _vlog_SQRTHF_fp32 = v_setall_<_TpVec32F>(0.707106781186547524f); + const _TpVec32F _vlog_q1_fp32 = v_setall_<_TpVec32F>(-2.12194440E-4f); + const _TpVec32F _vlog_q2_fp32 = v_setall_<_TpVec32F>(0.693359375f); + const _TpVec32F _vlog_p0_fp32 = v_setall_<_TpVec32F>(7.0376836292E-2f); + const _TpVec32F _vlog_p1_fp32 = v_setall_<_TpVec32F>(-1.1514610310E-1f); + const _TpVec32F _vlog_p2_fp32 = v_setall_<_TpVec32F>(1.1676998740E-1f); + const _TpVec32F _vlog_p3_fp32 = v_setall_<_TpVec32F>(-1.2420140846E-1f); + const _TpVec32F _vlog_p4_fp32 = v_setall_<_TpVec32F>(1.4249322787E-1f); + const _TpVec32F _vlog_p5_fp32 = v_setall_<_TpVec32F>(-1.6668057665E-1f); + const _TpVec32F _vlog_p6_fp32 = v_setall_<_TpVec32F>(2.0000714765E-1f); + const _TpVec32F _vlog_p7_fp32 = v_setall_<_TpVec32F>(-2.4999993993E-1f); + const _TpVec32F _vlog_p8_fp32 = v_setall_<_TpVec32F>(3.3333331174E-1f); + + _TpVec32F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp; + _TpVec32S _vlog_ux, _vlog_emm0; + const _TpVec32S _vlog_inv_mant_mask_s32 = v_setall_<_TpVec32S>((int)~0x7f800000); + + _vlog_ux = v_reinterpret_as_s32(x); + _vlog_emm0 = v_shr(_vlog_ux, 23); + + _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s32); + _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s32(v_setall_<_TpVec32F>(0.5f))); + _vlog_x = v_reinterpret_as_f32(_vlog_ux); + + _vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec32S>((int)0x7f)); + _vlog_e = v_cvt_f32(_vlog_emm0); + + _vlog_e = v_add(_vlog_e, _vlog_one_fp32); + + _TpVec32F _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp32); + _vlog_tmp = v_and(_vlog_x, _vlog_mask); + _vlog_x = v_sub(_vlog_x, _vlog_one_fp32); + _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp32, _vlog_mask)); + _vlog_x = v_add(_vlog_x, _vlog_tmp); + + _vlog_z = v_mul(_vlog_x, _vlog_x); + + _vlog_y = v_fma(_vlog_p0_fp32, _vlog_x, _vlog_p1_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp32); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp32); + _vlog_y = v_mul(_vlog_y, _vlog_x); + _vlog_y = v_mul(_vlog_y, _vlog_z); + + _vlog_y = v_fma(_vlog_e, _vlog_q1_fp32, _vlog_y); + + _vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, v_setall_<_TpVec32F>(0.5f))); + + _vlog_x = v_add(_vlog_x, _vlog_y); + _vlog_x = v_fma(_vlog_e, _vlog_q2_fp32, _vlog_x); + // log(0) -> -INF + _TpVec32F mask_zero = v_eq(x, v_setzero_<_TpVec32F>()); + _vlog_x = v_select(mask_zero, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0xff800000)), _vlog_x); + // log(NEG), log(NAN) -> NAN + _TpVec32F mask_not_nan = v_ge(x, v_setzero_<_TpVec32F>()); + _vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000))); + // log(INF) -> INF + _TpVec32F mask_inf = v_eq(x, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7f800000))); + _vlog_x = v_select(mask_inf, x, _vlog_x); + return _vlog_x; +} -#ifndef OPENCV_HAL_MATH_HAVE_ERF +template +inline _TpVec64F v_log_default_64f(const _TpVec64F &x) { + const _TpVec64F _vlog_one_fp64 = v_setall_<_TpVec64F>(1.0); + const _TpVec64F _vlog_SQRTHF_fp64 = v_setall_<_TpVec64F>(0.7071067811865475244); + const _TpVec64F _vlog_p0_fp64 = v_setall_<_TpVec64F>(1.01875663804580931796E-4); + const _TpVec64F _vlog_p1_fp64 = v_setall_<_TpVec64F>(4.97494994976747001425E-1); + const _TpVec64F _vlog_p2_fp64 = v_setall_<_TpVec64F>(4.70579119878881725854); + const _TpVec64F _vlog_p3_fp64 = v_setall_<_TpVec64F>(1.44989225341610930846E1); + const _TpVec64F _vlog_p4_fp64 = v_setall_<_TpVec64F>(1.79368678507819816313E1); + const _TpVec64F _vlog_p5_fp64 = v_setall_<_TpVec64F>(7.70838733755885391666); + const _TpVec64F _vlog_q0_fp64 = v_setall_<_TpVec64F>(1.12873587189167450590E1); + const _TpVec64F _vlog_q1_fp64 = v_setall_<_TpVec64F>(4.52279145837532221105E1); + const _TpVec64F _vlog_q2_fp64 = v_setall_<_TpVec64F>(8.29875266912776603211E1); + const _TpVec64F _vlog_q3_fp64 = v_setall_<_TpVec64F>(7.11544750618563894466E1); + const _TpVec64F _vlog_q4_fp64 = v_setall_<_TpVec64F>(2.31251620126765340583E1); + + const _TpVec64F _vlog_C0_fp64 = v_setall_<_TpVec64F>(2.121944400546905827679e-4); + const _TpVec64F _vlog_C1_fp64 = v_setall_<_TpVec64F>(0.693359375); + + _TpVec64F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp, _vlog_xx; + _TpVec64S _vlog_ux, _vlog_emm0; + const _TpVec64S _vlog_inv_mant_mask_s64 = v_setall_<_TpVec64S>((int64)~0x7ff0000000000000); + + _vlog_ux = v_reinterpret_as_s64(x); + _vlog_emm0 = v_shr(_vlog_ux, 52); + + _vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s64); + _vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s64(v_setall_<_TpVec64F>(0.5))); + _vlog_x = v_reinterpret_as_f64(_vlog_ux); + + _vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec64S>((int64)0x3ff)); + _vlog_e = v_cvt_f64(_vlog_emm0); + + _vlog_e = v_add(_vlog_e, _vlog_one_fp64); + + _TpVec64F _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp64); + _vlog_tmp = v_and(_vlog_x, _vlog_mask); + _vlog_x = v_sub(_vlog_x, _vlog_one_fp64); + _vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp64, _vlog_mask)); + _vlog_x = v_add(_vlog_x, _vlog_tmp); + + _vlog_xx = v_mul(_vlog_x, _vlog_x); + + _vlog_y = v_fma(_vlog_p0_fp64, _vlog_x, _vlog_p1_fp64); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp64); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp64); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp64); + _vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp64); + _vlog_y = v_mul(_vlog_y, _vlog_x); + _vlog_y = v_mul(_vlog_y, _vlog_xx); + + _vlog_z = v_add(_vlog_x, _vlog_q0_fp64); + _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q1_fp64); + _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q2_fp64); + _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q3_fp64); + _vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q4_fp64); + + _vlog_z = v_div(_vlog_y, _vlog_z); + _vlog_z = v_sub(_vlog_z, v_mul(_vlog_e, _vlog_C0_fp64)); + _vlog_z = v_sub(_vlog_z, v_mul(_vlog_xx, v_setall_<_TpVec64F>(0.5))); + + _vlog_z = v_add(_vlog_z, _vlog_x); + _vlog_z = v_fma(_vlog_e, _vlog_C1_fp64, _vlog_z); + + // log(0) -> -INF + _TpVec64F mask_zero = v_eq(x, v_setzero_<_TpVec64F>()); + _vlog_z = v_select(mask_zero, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0xfff0000000000000)), _vlog_z); + // log(NEG), log(NAN) -> NAN + _TpVec64F mask_not_nan = v_ge(x, v_setzero_<_TpVec64F>()); + _vlog_z = v_select(mask_not_nan, _vlog_z, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff8000000000000))); + // log(INF) -> INF + _TpVec64F mask_inf = v_eq(x, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff0000000000000))); + _vlog_z = v_select(mask_inf, x, _vlog_z); + return _vlog_z; +} +//! @} -//! @name Error Function +//! @name Sine and Cosine //! @{ +template +inline void v_sincos_default_16f(const _TpVec16F &x, _TpVec16F &ysin, _TpVec16F &ycos) { + const _TpVec16F v_cephes_FOPI = v_setall_<_TpVec16F>(hfloat(1.27323954473516f)); // 4 / M_PI + const _TpVec16F v_minus_DP1 = v_setall_<_TpVec16F>(hfloat(-0.78515625f)); + const _TpVec16F v_minus_DP2 = v_setall_<_TpVec16F>(hfloat(-2.4187564849853515625E-4f)); + const _TpVec16F v_minus_DP3 = v_setall_<_TpVec16F>(hfloat(-3.77489497744594108E-8f)); + const _TpVec16F v_sincof_p0 = v_setall_<_TpVec16F>(hfloat(-1.9515295891E-4f)); + const _TpVec16F v_sincof_p1 = v_setall_<_TpVec16F>(hfloat(8.3321608736E-3f)); + const _TpVec16F v_sincof_p2 = v_setall_<_TpVec16F>(hfloat(-1.6666654611E-1f)); + const _TpVec16F v_coscof_p0 = v_setall_<_TpVec16F>(hfloat(2.443315711809948E-5f)); + const _TpVec16F v_coscof_p1 = v_setall_<_TpVec16F>(hfloat(-1.388731625493765E-3f)); + const _TpVec16F v_coscof_p2 = v_setall_<_TpVec16F>(hfloat(4.166664568298827E-2f)); + const _TpVec16F v_nan = v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00)); + const _TpVec16F v_neg_zero = v_setall_<_TpVec16F>(hfloat(-0.f)); + + _TpVec16F _vx, _vy, sign_mask_sin, sign_mask_cos; + _TpVec16S emm2; + + sign_mask_sin = v_lt(x, v_setzero_<_TpVec16F>()); + _vx = v_abs(x); + _vy = v_mul(_vx, v_cephes_FOPI); + + emm2 = v_trunc(_vy); + emm2 = v_add(emm2, v_setall_<_TpVec16S>((short)1)); + emm2 = v_and(emm2, v_setall_<_TpVec16S>((short)~1)); + _vy = v_cvt_f16(emm2); + + _TpVec16F poly_mask = v_reinterpret_as_f16(v_eq(v_and(emm2, v_setall_<_TpVec16S>((short)2)), v_setall_<_TpVec16S>((short)0))); + + _vx = v_fma(_vy, v_minus_DP1, _vx); + _vx = v_fma(_vy, v_minus_DP2, _vx); + _vx = v_fma(_vy, v_minus_DP3, _vx); + + sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f16(v_eq(v_and(emm2, v_setall_<_TpVec16S>((short)4)), v_setall_<_TpVec16S>((short)0)))); + sign_mask_cos = v_reinterpret_as_f16(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec16S>((short)2)), v_setall_<_TpVec16S>((short)4)), v_setall_<_TpVec16S>((short)0))); + + _TpVec16F _vxx = v_mul(_vx, _vx); + _TpVec16F y1, y2; + + y1 = v_fma(v_coscof_p0, _vxx, v_coscof_p1); + y1 = v_fma(y1, _vxx, v_coscof_p2); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec16F>(hfloat(-0.5f))); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec16F>(hfloat(1.f))); + + y2 = v_fma(v_sincof_p0, _vxx, v_sincof_p1); + y2 = v_fma(y2, _vxx, v_sincof_p2); + y2 = v_mul(y2, _vxx); + y2 = v_fma(y2, _vx, _vx); + + ysin = v_select(poly_mask, y2, y1); + ycos = v_select(poly_mask, y1, y2); + ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin)); + ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos); + + // sincos(NAN) -> NAN, sincos(±INF) -> NAN + _TpVec16F mask_inf = v_eq(_vx, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7c00))); + _TpVec16F mask_nan = v_or(mask_inf, v_ne(x, x)); + ysin = v_select(mask_nan, v_nan, ysin); + ycos = v_select(mask_nan, v_nan, ycos); +} - inline v_float32 v_erf(const v_float32 &v) { - const v_float32 coef0 = vx_setall_f32(0.3275911f), - coef1 = vx_setall_f32(1.061405429f), - coef2 = vx_setall_f32(-1.453152027f), - coef3 = vx_setall_f32(1.421413741f), - coef4 = vx_setall_f32(-0.284496736f), - coef5 = vx_setall_f32(0.254829592f), - ones = vx_setall_f32(1.0f), - neg_zeros = vx_setall_f32(-0.f); - v_float32 t = v_abs(v); - // sign(v) - v_float32 sign_mask = v_and(neg_zeros, v); - - t = v_div(ones, v_fma(coef0, t, ones)); - v_float32 r = v_fma(coef1, t, coef2); - r = v_fma(r, t, coef3); - r = v_fma(r, t, coef4); - r = v_fma(r, t, coef5); - // - v * v - v_float32 pow_2 = v_mul(v, v); - v_float32 neg_pow_2 = v_xor(neg_zeros, pow_2); - // - exp(- v * v) - v_float32 exp = v_exp(neg_pow_2); - v_float32 neg_exp = v_xor(neg_zeros, exp); - v_float32 res = v_mul(t, neg_exp); - res = v_fma(r, res, ones); - return v_xor(sign_mask, res); - } - -#define OPENCV_HAL_MATH_HAVE_ERF 1 +template +inline _TpVec16F v_sin_default_16f(const _TpVec16F &x) { + _TpVec16F ysin, ycos; + v_sincos_default_16f<_TpVec16F, _TpVec16S>(x, ysin, ycos); + return ysin; +} + +template +inline _TpVec16F v_cos_default_16f(const _TpVec16F &x) { + _TpVec16F ysin, ycos; + v_sincos_default_16f<_TpVec16F, _TpVec16S>(x, ysin, ycos); + return ycos; +} + + +template +inline void v_sincos_default_32f(const _TpVec32F &x, _TpVec32F &ysin, _TpVec32F &ycos) { + const _TpVec32F v_cephes_FOPI = v_setall_<_TpVec32F>(1.27323954473516f); // 4 / M_PI + const _TpVec32F v_minus_DP1 = v_setall_<_TpVec32F>(-0.78515625f); + const _TpVec32F v_minus_DP2 = v_setall_<_TpVec32F>(-2.4187564849853515625E-4f); + const _TpVec32F v_minus_DP3 = v_setall_<_TpVec32F>(-3.77489497744594108E-8f); + const _TpVec32F v_sincof_p0 = v_setall_<_TpVec32F>(-1.9515295891E-4f); + const _TpVec32F v_sincof_p1 = v_setall_<_TpVec32F>(8.3321608736E-3f); + const _TpVec32F v_sincof_p2 = v_setall_<_TpVec32F>(-1.6666654611E-1f); + const _TpVec32F v_coscof_p0 = v_setall_<_TpVec32F>(2.443315711809948E-5f); + const _TpVec32F v_coscof_p1 = v_setall_<_TpVec32F>(-1.388731625493765E-3f); + const _TpVec32F v_coscof_p2 = v_setall_<_TpVec32F>(4.166664568298827E-2f); + const _TpVec32F v_nan = v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000)); + const _TpVec32F v_neg_zero = v_setall_<_TpVec32F>(-0.f); + + _TpVec32F _vx, _vy, sign_mask_sin, sign_mask_cos; + _TpVec32S emm2; + + sign_mask_sin = v_lt(x, v_setzero_<_TpVec32F>()); + _vx = v_abs(x); + _vy = v_mul(_vx, v_cephes_FOPI); + + emm2 = v_trunc(_vy); + emm2 = v_add(emm2, v_setall_<_TpVec32S>(1)); + emm2 = v_and(emm2, v_setall_<_TpVec32S>(~1)); + _vy = v_cvt_f32(emm2); + + _TpVec32F poly_mask = v_reinterpret_as_f32(v_eq(v_and(emm2, v_setall_<_TpVec32S>(2)), v_setall_<_TpVec32S>(0))); + + _vx = v_fma(_vy, v_minus_DP1, _vx); + _vx = v_fma(_vy, v_minus_DP2, _vx); + _vx = v_fma(_vy, v_minus_DP3, _vx); + + sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f32(v_eq(v_and(emm2, v_setall_<_TpVec32S>(4)), v_setall_<_TpVec32S>(0)))); + sign_mask_cos = v_reinterpret_as_f32(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec32S>(2)), v_setall_<_TpVec32S>(4)), v_setall_<_TpVec32S>(0))); + + _TpVec32F _vxx = v_mul(_vx, _vx); + _TpVec32F y1, y2; + + y1 = v_fma(v_coscof_p0, _vxx, v_coscof_p1); + y1 = v_fma(y1, _vxx, v_coscof_p2); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec32F>(-0.5f)); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec32F>(1.f)); + + y2 = v_fma(v_sincof_p0, _vxx, v_sincof_p1); + y2 = v_fma(y2, _vxx, v_sincof_p2); + y2 = v_mul(y2, _vxx); + y2 = v_fma(y2, _vx, _vx); + + ysin = v_select(poly_mask, y2, y1); + ycos = v_select(poly_mask, y1, y2); + ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin)); + ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos); + + // sincos(NAN) -> NAN, sincos(±INF) -> NAN + _TpVec32F mask_inf = v_eq(_vx, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7f800000))); + _TpVec32F mask_nan = v_or(mask_inf, v_ne(x, x)); + ysin = v_select(mask_nan, v_nan, ysin); + ycos = v_select(mask_nan, v_nan, ycos); +} + +template +inline _TpVec32F v_sin_default_32f(const _TpVec32F &x) { + _TpVec32F ysin, ycos; + v_sincos_default_32f<_TpVec32F, _TpVec32S>(x, ysin, ycos); + return ysin; +} + +template +inline _TpVec32F v_cos_default_32f(const _TpVec32F &x) { + _TpVec32F ysin, ycos; + v_sincos_default_32f<_TpVec32F, _TpVec32S>(x, ysin, ycos); + return ycos; +} + +template +inline void v_sincos_default_64f(const _TpVec64F &x, _TpVec64F &ysin, _TpVec64F &ycos) { + const _TpVec64F v_cephes_FOPI = v_setall_<_TpVec64F>(1.2732395447351626861510701069801148); // 4 / M_PI + const _TpVec64F v_minus_DP1 = v_setall_<_TpVec64F>(-7.853981554508209228515625E-1); + const _TpVec64F v_minus_DP2 = v_setall_<_TpVec64F>(-7.94662735614792836714E-9); + const _TpVec64F v_minus_DP3 = v_setall_<_TpVec64F>(-3.06161699786838294307E-17); + const _TpVec64F v_sin_C1 = v_setall_<_TpVec64F>(1.58962301576546568060E-10); + const _TpVec64F v_sin_C2 = v_setall_<_TpVec64F>(-2.50507477628578072866E-8); + const _TpVec64F v_sin_C3 = v_setall_<_TpVec64F>(2.75573136213857245213E-6); + const _TpVec64F v_sin_C4 = v_setall_<_TpVec64F>(-1.98412698295895385996E-4); + const _TpVec64F v_sin_C5 = v_setall_<_TpVec64F>(8.33333333332211858878E-3); + const _TpVec64F v_sin_C6 = v_setall_<_TpVec64F>(-1.66666666666666307295E-1); + const _TpVec64F v_cos_C1 = v_setall_<_TpVec64F>(-1.13585365213876817300E-11); + const _TpVec64F v_cos_C2 = v_setall_<_TpVec64F>(2.08757008419747316778E-9); + const _TpVec64F v_cos_C3 = v_setall_<_TpVec64F>(-2.75573141792967388112E-7); + const _TpVec64F v_cos_C4 = v_setall_<_TpVec64F>(2.48015872888517045348E-5); + const _TpVec64F v_cos_C5 = v_setall_<_TpVec64F>(-1.38888888888730564116E-3); + const _TpVec64F v_cos_C6 = v_setall_<_TpVec64F>(4.16666666666665929218E-2); + const _TpVec64F v_nan = v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff8000000000000)); + const _TpVec64F v_neg_zero = v_setall_<_TpVec64F>(-0.0); + + _TpVec64F _vx, _vy, sign_mask_sin, sign_mask_cos; + _TpVec64S emm2; + + sign_mask_sin = v_lt(x, v_setzero_<_TpVec64F>()); + _vx = v_abs(x); + _vy = v_mul(_vx, v_cephes_FOPI); + + emm2 = v_expand_low(v_trunc(_vy)); + emm2 = v_add(emm2, v_setall_<_TpVec64S>((int64)1)); + emm2 = v_and(emm2, v_setall_<_TpVec64S>((int64)~1)); + _vy = v_cvt_f64(emm2); + + _TpVec64F poly_mask = v_reinterpret_as_f64(v_eq(v_and(emm2, v_setall_<_TpVec64S>((int64)2)), v_setall_<_TpVec64S>((int64)0))); + + _vx = v_fma(_vy, v_minus_DP1, _vx); + _vx = v_fma(_vy, v_minus_DP2, _vx); + _vx = v_fma(_vy, v_minus_DP3, _vx); + + sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f64(v_eq(v_and(emm2, v_setall_<_TpVec64S>((int64)4)), v_setall_<_TpVec64S>((int64)0)))); + sign_mask_cos = v_reinterpret_as_f64(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec64S>((int64)2)), v_setall_<_TpVec64S>((int64)4)), v_setall_<_TpVec64S>((int64)0))); + + _TpVec64F _vxx = v_mul(_vx, _vx); + _TpVec64F y1, y2; + + y1 = v_fma(v_cos_C1, _vxx, v_cos_C2); + y1 = v_fma(y1, _vxx, v_cos_C3); + y1 = v_fma(y1, _vxx, v_cos_C4); + y1 = v_fma(y1, _vxx, v_cos_C5); + y1 = v_fma(y1, _vxx, v_cos_C6); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec64F>(-0.5)); + y1 = v_fma(y1, _vxx, v_setall_<_TpVec64F>(1.0)); + + y2 = v_fma(v_sin_C1, _vxx, v_sin_C2); + y2 = v_fma(y2, _vxx, v_sin_C3); + y2 = v_fma(y2, _vxx, v_sin_C4); + y2 = v_fma(y2, _vxx, v_sin_C5); + y2 = v_fma(y2, _vxx, v_sin_C6); + y2 = v_mul(y2, _vxx); + y2 = v_fma(y2, _vx, _vx); + + ysin = v_select(poly_mask, y2, y1); + ycos = v_select(poly_mask, y1, y2); + ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin)); + ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos); + + // sincos(NAN) -> NAN, sincos(±INF) -> NAN + _TpVec64F mask_inf = v_eq(_vx, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff0000000000000))); + _TpVec64F mask_nan = v_or(mask_inf, v_ne(x, x)); + ysin = v_select(mask_nan, v_nan, ysin); + ycos = v_select(mask_nan, v_nan, ycos); +} + +template +inline _TpVec64F v_sin_default_64f(const _TpVec64F &x) { + _TpVec64F ysin, ycos; + v_sincos_default_64f<_TpVec64F, _TpVec64S>(x, ysin, ycos); + return ysin; +} + +template +inline _TpVec64F v_cos_default_64f(const _TpVec64F &x) { + _TpVec64F ysin, ycos; + v_sincos_default_64f<_TpVec64F, _TpVec64S>(x, ysin, ycos); + return ycos; +} //! @} -#endif // OPENCV_HAL_MATH_HAVE_ERF +/* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch + https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220 +*/ +//! @name Error Function +//! @{ +template +inline _TpVec32F v_erf_default_32f(const _TpVec32F &v) { + const _TpVec32F coef0 = v_setall_<_TpVec32F>(0.3275911f), + coef1 = v_setall_<_TpVec32F>(1.061405429f), + coef2 = v_setall_<_TpVec32F>(-1.453152027f), + coef3 = v_setall_<_TpVec32F>(1.421413741f), + coef4 = v_setall_<_TpVec32F>(-0.284496736f), + coef5 = v_setall_<_TpVec32F>(0.254829592f), + ones = v_setall_<_TpVec32F>(1.0f), + neg_zeros = v_setall_<_TpVec32F>(-0.f); + _TpVec32F t = v_abs(v); + // sign(v) + _TpVec32F sign_mask = v_and(neg_zeros, v); + + t = v_div(ones, v_fma(coef0, t, ones)); + _TpVec32F r = v_fma(coef1, t, coef2); + r = v_fma(r, t, coef3); + r = v_fma(r, t, coef4); + r = v_fma(r, t, coef5); + // - v * v + _TpVec32F v2 = v_mul(v, v); + _TpVec32F mv2 = v_xor(neg_zeros, v2); + // - exp(- v * v) + _TpVec32F exp = v_exp_default_32f<_TpVec32F, _TpVec32S>(mv2); + _TpVec32F neg_exp = v_xor(neg_zeros, exp); + _TpVec32F res = v_mul(t, neg_exp); + res = v_fma(r, res, ones); + return v_xor(sign_mask, res); } -#endif // OPENCV_HAL_INTRIN_HPP +//! @} + +#endif // OPENCV_HAL_INTRIN_MATH_HPP diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index 8d2c22b08760..3917faa292cd 100644 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -235,6 +235,8 @@ struct v_float64x2 #define OPENCV_HAL_IMPL_MSA_INIT(_Tpv, _Tp, suffix) \ inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(msa_dupq_n_##suffix((_Tp)0)); } \ inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(msa_dupq_n_##suffix(v)); } \ +template <> inline v_##_Tpv v_setzero_() { return v_setzero_##suffix(); } \ +template <> inline v_##_Tpv v_setall_(_Tp v) { return v_setall_##suffix(v); } \ inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(MSA_TPV_REINTERPRET(v16u8, v.val)); } \ inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(MSA_TPV_REINTERPRET(v16i8, v.val)); } \ inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(MSA_TPV_REINTERPRET(v8u16, v.val)); } \ @@ -1861,6 +1863,20 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} +#include "intrin_math.hpp" +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 7685b435bf83..d42d48ee8296 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -414,6 +414,8 @@ struct v_float64x2 #define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, _TpCast, suffix) \ inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_TpCast)0)); } \ inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix((_TpCast)v)); } \ +template <> inline v_##_Tpv v_setzero_() { return v_setzero_##suffix(); } \ +template <> inline v_##_Tpv v_setall_(_Tp v) { return v_setall_##suffix(v); } \ inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \ inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \ inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \ @@ -435,6 +437,7 @@ OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, uint64, u64) OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, int64, s64) #if CV_SIMD128_FP16 OPENCV_HAL_IMPL_NEON_INIT(float16x8, hfloat, __fp16, f16); +template <> inline v_float16x8 v_setall_(float v) { return v_setall_f16((hfloat)v); } #define OPENCV_HAL_IMPL_NEON_INIT_FP16(_Tpv, suffix) \ inline v_float16x8 v_reinterpret_as_f16(const v_##_Tpv& v) { return v_float16x8(vreinterpretq_f16_##suffix(v.val)); } OPENCV_HAL_IMPL_NEON_INIT_FP16(uint8x16, u8) @@ -3030,6 +3033,28 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} +#include "intrin_math.hpp" +#if CV_SIMD128_FP16 +inline v_float16x8 v_exp(const v_float16x8& x) { return v_exp_default_16f(x); } +inline v_float16x8 v_log(const v_float16x8& x) { return v_log_default_16f(x); } +inline void v_sincos(const v_float16x8& x, v_float16x8& s, v_float16x8& c) { v_sincos_default_16f(x, s, c); } +inline v_float16x8 v_sin(const v_float16x8& x) { return v_sin_default_16f(x); } +inline v_float16x8 v_cos(const v_float16x8& x) { return v_cos_default_16f(x); } +#endif +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } +#if CV_SIMD128_64F +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } +#endif + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp index 4900418df3ce..146335dc017d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp @@ -355,10 +355,12 @@ inline v_float64x2 v_reinterpret_as_f64(const v_float64x2& v) { return v_float64 #define OPENCV_HAL_IMPL_RISCVV_INIT_SET(__Tp, _Tp, suffix, len, num) \ inline v_##_Tp##x##num v_setzero_##suffix() { return v_##_Tp##x##num(vmv_v_x_##len##m1(0, num)); } \ -inline v_##_Tp##x##num v_setall_##suffix(__Tp v) { return v_##_Tp##x##num(vmv_v_x_##len##m1(v, num)); } +inline v_##_Tp##x##num v_setall_##suffix(__Tp v) { return v_##_Tp##x##num(vmv_v_x_##len##m1(v, num)); } \ +template <> inline v_##_Tp##x##num v_setzero_() { return v_setzero_##suffix(); } \ +template <> inline v_##_Tp##x##num v_setall_(__Tp v) { return v_setall_##suffix(v); } OPENCV_HAL_IMPL_RISCVV_INIT_SET(uchar, uint8, u8, u8, 16) -OPENCV_HAL_IMPL_RISCVV_INIT_SET(char, int8, s8, i8, 16) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(schar, int8, s8, i8, 16) OPENCV_HAL_IMPL_RISCVV_INIT_SET(ushort, uint16, u16, u16, 8) OPENCV_HAL_IMPL_RISCVV_INIT_SET(short, int16, s16, i16, 8) OPENCV_HAL_IMPL_RISCVV_INIT_SET(unsigned int, uint32, u32, u32, 4) @@ -371,6 +373,11 @@ inline v_float32x4 v_setall_f32(float v) { return v_float32x4(vfmv_v_f_f32m1(v, inline v_float64x2 v_setzero_f64() { return v_float64x2(vfmv_v_f_f64m1(0, 2)); } inline v_float64x2 v_setall_f64(double v) { return v_float64x2(vfmv_v_f_f64m1(v, 2)); } +template <> inline v_float32x4 v_setzero_() { return v_setzero_f32(); } +template <> inline v_float32x4 v_setall_(float v) { return v_setall_f32(v); } + +template <> inline v_float64x2 v_setzero_() { return v_setzero_f64(); } +template <> inline v_float64x2 v_setall_(double v) { return v_setall_f64(v); } #define OPENCV_HAL_IMPL_RISCVV_BIN_OP(bin_op, _Tpvec, intrin) \ inline _Tpvec bin_op(const _Tpvec& a, const _Tpvec& b) \ @@ -2859,6 +2866,20 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} +#include "intrin_math.hpp" +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp index 13c616b046a9..76288166051d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp @@ -182,6 +182,14 @@ inline v_##_Tpvec v_setzero_##suffix1() \ inline v_##_Tpvec v_setall_##suffix1(_Tp v) \ { \ return __riscv_vmv_v_x_##suffix2##m1(v, vl); \ +} \ +template <> inline v_##_Tpvec v_setzero_() \ +{ \ + return v_setzero_##suffix1(); \ +} \ +template <> inline v_##_Tpvec v_setall_(_Tp v) \ +{ \ + return v_setall_##suffix1(v); \ } OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8, uchar, u8, u8, VTraits::vlanes()) @@ -201,6 +209,14 @@ inline v_##_Tpv v_setzero_##suffix() \ inline v_##_Tpv v_setall_##suffix(_Tp v) \ { \ return __riscv_vfmv_v_f_##suffix##m1(v, vl); \ +} \ +template <> inline v_##_Tpv v_setzero_() \ +{ \ + return v_setzero_##suffix(); \ +} \ +template <> inline v_##_Tpv v_setall_(_Tp v) \ +{ \ + return v_setall_##suffix(v); \ } #if CV_SIMD_SCALABLE_FP16 @@ -2471,6 +2487,20 @@ inline v_float32 v_matmuladd(const v_float32& v, const v_float32& m0, inline void v_cleanup() {} +#include "intrin_math.hpp" +inline v_float32 v_exp(const v_float32& x) { return v_exp_default_32f(x); } +inline v_float32 v_log(const v_float32& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32& x, v_float32& s, v_float32& c) { v_sincos_default_32f(x, s, c); } +inline v_float32 v_sin(const v_float32& x) { return v_sin_default_32f(x); } +inline v_float32 v_cos(const v_float32& x) { return v_cos_default_32f(x); } +inline v_float32 v_erf(const v_float32& x) { return v_erf_default_32f(x); } + +inline v_float64 v_exp(const v_float64& x) { return v_exp_default_64f(x); } +inline v_float64 v_log(const v_float64& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64& x, v_float64& s, v_float64& c) { v_sincos_default_64f(x, s, c); } +inline v_float64 v_sin(const v_float64& x) { return v_sin_default_64f(x); } +inline v_float64 v_cos(const v_float64& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index ee4545db6bd6..26ea34026382 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -347,6 +347,8 @@ namespace hal_sse_internal #define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \ inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \ inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \ +template <> inline _Tpvec v_setzero_() { return v_setzero_##suffix(); } \ +template <> inline _Tpvec v_setall_(_Tp v) { return v_setall_##suffix(v); } \ template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ { return _Tpvec(cast(a.val)); } @@ -364,6 +366,11 @@ inline v_int64x2 v_setzero_s64() { return v_int64x2(_mm_setzero_si128()); } inline v_uint64x2 v_setall_u64(uint64 val) { return v_uint64x2(val, val); } inline v_int64x2 v_setall_s64(int64 val) { return v_int64x2(val, val); } +template <> inline v_uint64x2 v_setzero_() { return v_setzero_u64(); } +template <> inline v_int64x2 v_setzero_() { return v_setzero_s64(); } +template <> inline v_uint64x2 v_setall_(uint64 val) { return v_setall_u64(val); } +template <> inline v_int64x2 v_setall_(int64 val) { return v_setall_s64(val); } + template inline v_uint64x2 v_reinterpret_as_u64(const _Tpvec& a) { return v_uint64x2(a.val); } template inline @@ -3462,6 +3469,21 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} +#include "intrin_math.hpp" +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } + + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index fbe690461a5e..2157e1e87063 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -261,6 +261,8 @@ OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double) #define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \ inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \ inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \ +template <> inline _Tpvec v_setzero_() { return v_setzero_##suffix(); } \ +template <> inline _Tpvec v_setall_(_Tp v) { return v_setall_##suffix(_Tp v); } \ template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \ { return _Tpvec((cast)a.val); } @@ -1594,6 +1596,19 @@ template inline Tvec v_broadcast_element(const Tvec& v) { return Tvec(vec_splat(v.val, i)); } +#include "intrin_math.hpp" +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index 3a8069ca911e..70198451c084 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -8,9 +8,18 @@ #include #include #include -#include #include "opencv2/core/saturate.hpp" + +// Emscripten v2.0.13 (latest officially supported, as of 07/30/2024): +// __EMSCRIPTEN_major__, __EMSCRIPTEN_minor__ and __EMSCRIPTEN_tiny__ are defined via commandline in +// https://github.com/emscripten-core/emscripten/blob/1690a5802cd1241adc9714fb7fa2f633d38860dc/tools/shared.py#L506-L515 +// +// See https://github.com/opencv/opencv/pull/25909 +#ifndef __EMSCRIPTEN_major__ +#include +#endif + #define CV_SIMD128 1 #define CV_SIMD128_64F 0 // Now all implementation of f64 use fallback, so disable it. #define CV_SIMD128_FP16 0 @@ -392,6 +401,8 @@ inline v128_t v128_cvti32x4_i64x2_high(const v128_t& a) #define OPENCV_HAL_IMPL_WASM_INITVEC(_Tpvec, _Tp, suffix, zsuffix, _Tps) \ inline _Tpvec v_setzero_##suffix() { return _Tpvec(wasm_##zsuffix##_splat((_Tps)0)); } \ inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(wasm_##zsuffix##_splat((_Tps)v)); } \ +template <> inline _Tpvec v_setzero_() { return v_setzero_##suffix(); } \ +template <> inline _Tpvec v_setall_(_Tp v) { return v_setall_##suffix(v); } \ template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ { return _Tpvec(a.val); } @@ -2767,6 +2778,20 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& v) inline void v_cleanup() {} +#include "intrin_math.hpp" +inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f(x); } +inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f(x); } +inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f(x, s, c); } +inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f(x); } +inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f(x); } +inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f(x); } + +inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f(x); } +inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f(x); } +inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f(x, s, c); } +inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f(x); } +inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f(x); } + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index c6a4d9c7286e..a9a90e998964 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -568,6 +568,22 @@ typedef OutputArray OutputArrayOfArrays; typedef const _InputOutputArray& InputOutputArray; typedef InputOutputArray InputOutputArrayOfArrays; +/** @brief Returns an empty InputArray or OutputArray. + + This function is used to provide an "empty" or "null" array when certain functions + take optional input or output arrays that you don't want to provide. + + Many OpenCV functions accept optional arguments as `cv::InputArray` or `cv::OutputArray`. + When you don't want to pass any data for these optional parameters, you can use `cv::noArray()` + to indicate that you are omitting them. + + @return An empty `cv::InputArray` or `cv::OutputArray` that can be used as a placeholder. + + @note This is often used when a function has optional arrays, and you do not want to + provide a specific input or output array. + + @see cv::InputArray, cv::OutputArray + */ CV_EXPORTS InputOutputArray noArray(); /////////////////////////////////// MatAllocator ////////////////////////////////////// diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 221f2fee1df3..649ebd265726 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -155,10 +155,10 @@ inline Size _InputArray::getSz() const { return sz; } inline _InputArray::_InputArray() { init(0 + NONE, 0); } inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); } -inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); } -inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); } -inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); } -inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); } +inline _InputArray::_InputArray(const Mat& m) { init(+MAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(+STD_VECTOR_MAT+ACCESS_READ, &vec); } +inline _InputArray::_InputArray(const UMat& m) { init(+UMAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(+STD_VECTOR_UMAT+ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const std::vector<_Tp>& vec) @@ -170,7 +170,7 @@ _InputArray::_InputArray(const std::array<_Tp, _Nm>& arr) template inline _InputArray::_InputArray(const std::array& arr) -{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); } +{ init(+STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); } inline _InputArray::_InputArray(const std::vector& vec) @@ -200,16 +200,16 @@ inline _InputArray::_InputArray(const double& val) { init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); } inline _InputArray::_InputArray(const cuda::GpuMat& d_mat) -{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); } +{ init(+CUDA_GPU_MAT + ACCESS_READ, &d_mat); } inline _InputArray::_InputArray(const std::vector& d_mat) -{ init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_READ, &d_mat);} +{ init(+STD_VECTOR_CUDA_GPU_MAT + ACCESS_READ, &d_mat);} inline _InputArray::_InputArray(const ogl::Buffer& buf) -{ init(OPENGL_BUFFER + ACCESS_READ, &buf); } +{ init(+OPENGL_BUFFER + ACCESS_READ, &buf); } inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem) -{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); } +{ init(+CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); } template inline _InputArray _InputArray::rawIn(const std::vector<_Tp>& vec) @@ -253,12 +253,12 @@ inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray:: //////////////////////////////////////////////////////////////////////////////////////// -inline _OutputArray::_OutputArray() { init(NONE + ACCESS_WRITE, 0); } +inline _OutputArray::_OutputArray() { init(+NONE + ACCESS_WRITE, 0); } inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags + ACCESS_WRITE, _obj); } -inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); } -inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_MAT + ACCESS_WRITE, &vec); } -inline _OutputArray::_OutputArray(UMat& m) { init(UMAT + ACCESS_WRITE, &m); } -inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(Mat& m) { init(+MAT+ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(+STD_VECTOR_MAT + ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(UMat& m) { init(+UMAT + ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(+STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(std::vector<_Tp>& vec) @@ -270,7 +270,7 @@ _OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr) template inline _OutputArray::_OutputArray(std::array& arr) -{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } +{ init(+STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } template inline _OutputArray::_OutputArray(std::vector >& vec) @@ -325,16 +325,16 @@ _OutputArray::_OutputArray(const _Tp* vec, int n) { init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat) -{ init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } +{ init(+CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } inline _OutputArray::_OutputArray(std::vector& d_mat) -{ init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_WRITE, &d_mat);} +{ init(+STD_VECTOR_CUDA_GPU_MAT + ACCESS_WRITE, &d_mat);} inline _OutputArray::_OutputArray(ogl::Buffer& buf) -{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); } +{ init(+OPENGL_BUFFER + ACCESS_WRITE, &buf); } inline _OutputArray::_OutputArray(cuda::HostMem& cuda_mem) -{ init(CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } +{ init(+CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } inline _OutputArray::_OutputArray(const Mat& m) { init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); } @@ -403,10 +403,10 @@ std::vector >& _OutputArray::getVecVecRef() const inline _InputOutputArray::_InputOutputArray() { init(0+ACCESS_RW, 0); } inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags+ACCESS_RW, _obj); } -inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); } -inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); } -inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); } -inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); } +inline _InputOutputArray::_InputOutputArray(Mat& m) { init(+MAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(+STD_VECTOR_MAT+ACCESS_RW, &vec); } +inline _InputOutputArray::_InputOutputArray(UMat& m) { init(+UMAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(+STD_VECTOR_UMAT+ACCESS_RW, &vec); } template inline _InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec) @@ -418,7 +418,7 @@ _InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr) template inline _InputOutputArray::_InputOutputArray(std::array& arr) -{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } +{ init(+STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } template inline _InputOutputArray::_InputOutputArray(std::vector >& vec) @@ -473,13 +473,13 @@ _InputOutputArray::_InputOutputArray(const _Tp* vec, int n) { init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat) -{ init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); } +{ init(+CUDA_GPU_MAT + ACCESS_RW, &d_mat); } inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf) -{ init(OPENGL_BUFFER + ACCESS_RW, &buf); } +{ init(+OPENGL_BUFFER + ACCESS_RW, &buf); } inline _InputOutputArray::_InputOutputArray(cuda::HostMem& cuda_mem) -{ init(CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } +{ init(+CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } inline _InputOutputArray::_InputOutputArray(const Mat& m) { init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); } @@ -622,7 +622,7 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) template inline Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) - : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), + : flags(+MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) { if( !copyData ) diff --git a/modules/core/include/opencv2/core/operations.hpp b/modules/core/include/opencv2/core/operations.hpp index d345e24dc55d..1001d0460d2d 100644 --- a/modules/core/include/opencv2/core/operations.hpp +++ b/modules/core/include/opencv2/core/operations.hpp @@ -524,7 +524,7 @@ The generic function partition implements an \f$O(N^2)\f$ algorithm for splittin into one or more equivalency classes, as described in . The function returns the number of equivalency classes. -@param _vec Set of elements stored as a vector. +@param vec Set of elements stored as a vector. @param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is a 0-based cluster index of `vec[i]`. @param predicate Equivalence predicate (pointer to a boolean function of two arguments or an @@ -534,11 +534,11 @@ may or may not be in the same class. @ingroup core_cluster */ template int -partition( const std::vector<_Tp>& _vec, std::vector& labels, +partition( const std::vector<_Tp>& vec, std::vector& labels, _EqPredicate predicate=_EqPredicate()) { - int i, j, N = (int)_vec.size(); - const _Tp* vec = &_vec[0]; + int i, j, N = (int)vec.size(); + const _Tp* _vec = &vec[0]; const int PARENT=0; const int RANK=1; @@ -564,7 +564,7 @@ partition( const std::vector<_Tp>& _vec, std::vector& labels, for( j = 0; j < N; j++ ) { - if( i == j || !predicate(vec[i], vec[j])) + if( i == j || !predicate(_vec[i], _vec[j])) continue; int root2 = j; diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp index 9c4f33fb1457..33f24d62e504 100644 --- a/modules/core/include/opencv2/core/persistence.hpp +++ b/modules/core/include/opencv2/core/persistence.hpp @@ -53,50 +53,6 @@ # error persistence.hpp header must be compiled as C++ #endif -//! @addtogroup core_c -//! @{ - -/** @brief "black box" representation of the file storage associated with a file on disk. - -Several functions that are described below take CvFileStorage\* as inputs and allow the user to -save or to load hierarchical collections that consist of scalar values, standard CXCore objects -(such as matrices, sequences, graphs), and user-defined objects. - -OpenCV can read and write data in XML (), YAML () or -JSON () formats. Below is an example of 3x3 floating-point identity matrix A, -stored in XML and YAML files -using CXCore functions: -XML: -@code{.xml} - - - - 3 - 3 -
f
- 1. 0. 0. 0. 1. 0. 0. 0. 1. -
-
-@endcode -YAML: -@code{.yaml} - %YAML:1.0 - A: !!opencv-matrix - rows: 3 - cols: 3 - dt: f - data: [ 1., 0., 0., 0., 1., 0., 0., 0., 1.] -@endcode -As it can be seen from the examples, XML uses nested tags to represent hierarchy, while YAML uses -indentation for that purpose (similar to the Python programming language). - -The same functions can read and write data in both formats; the particular format is determined by -the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for -JSON. - */ - -//! @} core_c - #include "opencv2/core/types.hpp" #include "opencv2/core/mat.hpp" @@ -283,13 +239,14 @@ element is a structure of 2 integers, followed by a single-precision floating-po equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u` means that the array consists of bytes, and `2d` means the array consists of pairs of doubles. -@see @ref samples/cpp/filestorage.cpp +@see @ref samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp */ //! @{ -/** @example samples/cpp/filestorage.cpp +/** @example samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp A complete example using the FileStorage interface +Check @ref tutorial_file_input_output_with_xml_yml "the corresponding tutorial" for more details */ ////////////////////////// XML & YAML I/O ////////////////////////// @@ -322,10 +279,10 @@ class CV_EXPORTS_W FileStorage }; enum State { - UNDEFINED = 0, - VALUE_EXPECTED = 1, - NAME_EXPECTED = 2, - INSIDE_MAP = 4 + UNDEFINED = 0, //!< Initial or uninitialized state. + VALUE_EXPECTED = 1, //!< Expecting a value in the current position. + NAME_EXPECTED = 2, //!< Expecting a key/name in the current position. + INSIDE_MAP = 4 //!< Indicates being inside a map (a set of key-value pairs). }; /** @brief The constructors. diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index 36edd8ab31f9..39f2ddcdeb3d 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -134,6 +134,36 @@ namespace cv { namespace cuda template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; template<> struct NPPTypeTraits { typedef Npp64f npp_type; }; +#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func) +// NppStreamContext is introduced in NPP version 10100 included in CUDA toolkit 10.1 (CUDA_VERSION == 10010) however not all of the NPP functions called internally by OpenCV +// - have an NppStreamContext argument (e.g. nppiHistogramEvenGetBufferSize_8u_C1R_Ctx in CUDA 12.3) and/or +// - have a corresponding function in the supplied library (e.g. nppiEvenLevelsHost_32s_Ctx is not present in nppist.lib or libnppist.so as of CUDA 12.6) +// Because support for these functions has gradually been introduced without being mentioned in the release notes this flag is set to a version of NPP (version 12205 included in CUDA toolkit 12.4) which is known to work. +#define USE_NPP_STREAM_CTX NPP_VERSION >= 12205 +#if USE_NPP_STREAM_CTX + class NppStreamHandler + { + public: + inline explicit NppStreamHandler(cudaStream_t newStream) + { + nppStreamContext = {}; + nppSafeCall(nppGetStreamContext(&nppStreamContext)); + nppStreamContext.hStream = newStream; + cudaSafeCall(cudaStreamGetFlags(nppStreamContext.hStream, &nppStreamContext.nStreamFlags)); + } + + inline explicit NppStreamHandler(Stream& newStream) : NppStreamHandler(StreamAccessor::getStream(newStream)) {} + + inline operator NppStreamContext() const { + return nppStreamContext; + } + + inline NppStreamContext get() { return nppStreamContext; } + + private: + NppStreamContext nppStreamContext; + }; +#else class NppStreamHandler { public: @@ -157,9 +187,9 @@ namespace cv { namespace cuda private: cudaStream_t oldStream; }; +#endif }} -#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func) #define cuSafeCall(expr) cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func) #endif // HAVE_CUDA diff --git a/modules/core/include/opencv2/core/quaternion.hpp b/modules/core/include/opencv2/core/quaternion.hpp index 9e3e44332f60..e39065020c5f 100644 --- a/modules/core/include/opencv2/core/quaternion.hpp +++ b/modules/core/include/opencv2/core/quaternion.hpp @@ -31,7 +31,7 @@ #include namespace cv { -//! @addtogroup core +//! @addtogroup core_quaternion //! @{ //! Unit quaternion flag diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 5eed688c830d..cfe98401dbd5 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -176,7 +176,38 @@ extern "C" typedef int (*ErrorCallback)( int status, const char* func_name, */ CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0); +/** @brief Generates a unique temporary file name. + +This function generates a full, unique file path for a temporary file, +which can be used to create temporary files for various purposes. + +@param suffix (optional) The desired file extension or suffix for the temporary file (e.g., ".png", ".txt"). +If no suffix is provided (suffix = 0), the file will not have a specific extension. + +@return cv::String A full unique path for the temporary file. + +@note +- The function does not create the file, it only generates the name. +- The file name is unique for the system session. +- Works cross-platform (Windows, Linux, macOS). + */ CV_EXPORTS String tempfile( const char* suffix = 0); + +/** @brief Searches for files matching the specified pattern in a directory. + +This function searches for files that match a given pattern (e.g., `*.jpg`) +in the specified directory. The search can be limited to the directory itself +or be recursive, including subdirectories. + +@param pattern The file search pattern, which can include wildcards like `*` +(for matching multiple characters) or `?` (for matching a single character). + +@param result Output vector where the file paths matching the search +pattern will be stored. +@param recursive (optional) Boolean flag indicating whether to search +subdirectories recursively. If true, the search will include all subdirectories. +The default value is `false`. + */ CV_EXPORTS void glob(String pattern, std::vector& result, bool recursive = false); /** @brief OpenCV will try to set the number of threads for subsequent parallel regions. @@ -309,11 +340,12 @@ class CV_EXPORTS_W TickMeter //! stops counting ticks. CV_WRAP void stop() { - int64 time = cv::getTickCount(); + const int64 time = cv::getTickCount(); if (startTime == 0) return; ++counter; - sumTime += (time - startTime); + lastTime = time - startTime; + sumTime += lastTime; startTime = 0; } @@ -336,11 +368,35 @@ class CV_EXPORTS_W TickMeter } //! returns passed time in seconds. - CV_WRAP double getTimeSec() const + CV_WRAP double getTimeSec() const { return (double)getTimeTicks() / getTickFrequency(); } + //! returns counted ticks of the last iteration. + CV_WRAP int64 getLastTimeTicks() const + { + return lastTime; + } + + //! returns passed time of the last iteration in microseconds. + CV_WRAP double getLastTimeMicro() const + { + return getLastTimeMilli()*1e3; + } + + //! returns passed time of the last iteration in milliseconds. + CV_WRAP double getLastTimeMilli() const + { + return getLastTimeSec()*1e3; + } + + //! returns passed time of the last iteration in seconds. + CV_WRAP double getLastTimeSec() const + { + return (double)getLastTimeTicks() / getTickFrequency(); + } + //! returns internal counter value. CV_WRAP int64 getCounter() const { @@ -373,15 +429,17 @@ class CV_EXPORTS_W TickMeter //! resets internal values. CV_WRAP void reset() { - startTime = 0; - sumTime = 0; counter = 0; + sumTime = 0; + startTime = 0; + lastTime = 0; } private: int64 counter; int64 sumTime; int64 startTime; + int64 lastTime; }; /** @brief output operator diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp index 45aa121a4aaf..83be34f1477c 100644 --- a/modules/core/src/opengl.cpp +++ b/modules/core/src/opengl.cpp @@ -42,6 +42,12 @@ #include "precomp.hpp" +#if defined (__APPLE__) || defined(MACOSX) + #define GL_SHARING_EXTENSION "cl_APPLE_gl_sharing" +#else + #define GL_SHARING_EXTENSION "cl_khr_gl_sharing" +#endif + #ifdef HAVE_OPENGL # include "gl_core_3_1.hpp" # ifdef HAVE_CUDA @@ -1635,94 +1641,148 @@ Context& initializeContextFromGL() #elif !defined(HAVE_OPENCL_OPENGL_SHARING) NO_OPENCL_SHARING_ERROR; #else - cl_uint numPlatforms; - cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); + cl_uint platformsCnt = 0; + cl_uint devCnt = 0; + cl_device_id* devices = nullptr; + cl_uint devUsed = 0; + cl_context context = nullptr; + + cl_int status = clGetPlatformIDs(0, NULL, &platformsCnt); if (status != CL_SUCCESS) CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: Can't get number of platforms: %d", status)); - if (numPlatforms == 0) + if (platformsCnt == 0) CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms"); - std::vector platforms(numPlatforms); - status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL); + std::vector platforms(platformsCnt); + status = clGetPlatformIDs(platformsCnt, &platforms[0], NULL); if (status != CL_SUCCESS) - CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: Can't get number of platforms: %d", status)); + CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: Can't get platforms: %d", status)); + // TODO Filter platforms by name from OPENCV_OPENCL_DEVICE + bool sharingSupported = false; - int found = -1; - cl_device_id device = NULL; - cl_context context = NULL; + for (unsigned int i = 0; (!sharingSupported && (i < platformsCnt)); ++i) { + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &devCnt); + if (status != CL_SUCCESS) + CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: No devices available: %d", status)); - for (int i = 0; i < (int)numPlatforms; i++) - { - // query platform extension: presence of "cl_khr_gl_sharing" extension is required - { - AutoBuffer extensionStr; + try { + devices = new cl_device_id[devCnt]; - size_t extensionSize; - status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &extensionSize); - if (status == CL_SUCCESS) - { - extensionStr.allocate(extensionSize+1); - status = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, extensionSize, (char*)extensionStr.data(), NULL); - } + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, devCnt, devices, NULL); if (status != CL_SUCCESS) - CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: Can't get platform extension string: %d", status)); - - if (!strstr((const char*)extensionStr.data(), "cl_khr_gl_sharing")) - continue; - } - - clGetGLContextInfoKHR_fn clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) - clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetGLContextInfoKHR"); - if (!clGetGLContextInfoKHR) + CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: Can't get platform devices: %d", status)); + + for (unsigned int j = 0; (!sharingSupported && (j < devCnt)); ++j) { + size_t extensionSize; + status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, 0, NULL, &extensionSize ); + if (status != CL_SUCCESS) + CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: No devices available: %d", status)); + + if(extensionSize > 0) + { + char* extensions = nullptr; + + try { + extensions = new char[extensionSize]; + + status = clGetDeviceInfo(devices[j], CL_DEVICE_EXTENSIONS, extensionSize, extensions, &extensionSize); + if (status != CL_SUCCESS) + continue; + } catch(...) { + CV_Error(cv::Error::OpenCLInitError, "OpenCL: Exception thrown during device extensions gathering"); + } + + std::string devString; + + if(extensions != nullptr) { + devString = extensions; + delete[] extensions; + } + else { + CV_Error(cv::Error::OpenCLInitError, "OpenCL: Unexpected error during device extensions gathering"); + } + + size_t oldPos = 0; + size_t spacePos = devString.find(' ', oldPos); // extensions string is space delimited + while (spacePos != devString.npos) { + if (strcmp(GL_SHARING_EXTENSION, + devString.substr(oldPos, spacePos - oldPos).c_str()) + == 0) { + // Device supports context sharing with OpenGL + devUsed = i; + sharingSupported = true; + break; + } + do { + oldPos = spacePos + 1; + spacePos = devString.find(' ', oldPos); + } while (spacePos == oldPos); + } + } + } + } catch(...) { + CV_Error(cv::Error::OpenCLInitError, "OpenCL: Exception thrown during device information gathering"); + if(devices != nullptr) { + delete[] devices; + } continue; + } - cl_context_properties properties[] = - { -#if defined(_WIN32) - CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], - CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), - CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), + if (sharingSupported) { + // Define OS-specific context properties and create the OpenCL context +#if defined (__APPLE__) + CGLContextObj cglContext = CGLGetCurrentContext(); + CGLShareGroupObj cglShareGroup = CGLGetShareGroup(cglContext); + cl_context_properties props[] = + { + CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)cglShareGroup, + 0 + }; + context = clCreateContext(props, 0,0, NULL, NULL, &ciErrNum); #elif defined(__ANDROID__) - CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], - CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(), - CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), + cl_context_properties props[] = + { + CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), + CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), + CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], + 0 + }; + context = clCreateContext(props, 1, &devices[devUsed], NULL, NULL, &status); +#elif defined(_WIN32) + cl_context_properties props[] = + { + CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), + CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), + CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], + 0 + }; + context = clCreateContext(props, 1, &devices[devUsed], NULL, NULL, &status); #elif defined(__linux__) - CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], - CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), - CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), + cl_context_properties props[] = + { + CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), + CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), + CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], + 0 + }; + context = clCreateContext(props, 1, &devices[devUsed], NULL, NULL, &status); #endif - 0 - }; - - // query device - device = NULL; - status = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), (void*)&device, NULL); - if (status != CL_SUCCESS) - continue; + } - // create context - context = clCreateContext(properties, 1, &device, NULL, NULL, &status); if (status != CL_SUCCESS) - { - clReleaseDevice(device); - } + CV_Error_(cv::Error::OpenCLInitError, ("OpenCL: Can't create context for OpenGL interop: %d", status)); else - { - found = i; break; - } } - if (found < 0) - CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for OpenGL interop"); - cl_platform_id platform = platforms[found]; + cl_platform_id platform = platforms[devUsed]; std::string platformName = PlatformInfo(&platform).name(); - OpenCLExecutionContext clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device); - clReleaseDevice(device); + OpenCLExecutionContext clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, devices[devUsed]); + clReleaseDevice(devices[devUsed]); clReleaseContext(context); clExecCtx.bind(); return const_cast(clExecCtx.getContext()); diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 92b9dff2b1da..d42652a33575 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -307,14 +307,20 @@ template struct TheTest #else #error "Configuration error" #endif + R setall_res3 = v_setall_((LaneType)7); + R setall_resz = v_setzero_(); #if CV_SIMD_WIDTH > 0 Data setall_res1_; v_store(setall_res1_.d, setall_res1); Data setall_res2_; v_store(setall_res2_.d, setall_res2); + Data setall_res3_; v_store(setall_res3_.d, setall_res3); + Data setall_resz_; v_store(setall_resz_.d, setall_resz); for (int i = 0; i < VTraits::vlanes(); ++i) { SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)5, setall_res1_[i]); EXPECT_EQ((LaneType)6, setall_res2_[i]); + EXPECT_EQ((LaneType)7, setall_res3_[i]); + EXPECT_EQ((LaneType)0, setall_resz_[i]); } #endif @@ -2084,6 +2090,100 @@ template struct TheTest return *this; } + + void __test_sincos(LaneType diff_thr, LaneType flt_min) { + int n = VTraits::vlanes(); + // Test each value for a period, from -PI to PI + const LaneType step = (LaneType) 0.01; + for (LaneType i = (LaneType)0; i <= (LaneType)M_PI;) { + Data dataPosPI, dataNegPI; + for (int j = 0; j < n; ++j) { + dataPosPI[j] = i; + dataNegPI[j] = LaneType(-1*i); + i = LaneType(i + step); + } + R posPI = dataPosPI, negPI = dataNegPI, sinPos, cosPos, sinNeg, cosNeg; + v_sincos(posPI, sinPos, cosPos); + v_sincos(negPI, sinNeg, cosNeg); + Data resSinPos = sinPos, resCosPos = cosPos, resSinNeg = sinNeg, resCosNeg = cosNeg; + for (int j = 0; j < n; ++j) { + LaneType std_sin_pos = (LaneType) std::sin(dataPosPI[j]); + LaneType std_cos_pos = (LaneType) std::cos(dataPosPI[j]); + LaneType std_sin_neg = (LaneType) std::sin(dataNegPI[j]); + LaneType std_cos_neg = (LaneType) std::cos(dataNegPI[j]); + SCOPED_TRACE(cv::format("Period test value: %lf and %lf", (double) dataPosPI[j], (double) dataNegPI[j])); + EXPECT_LT(std::abs(resSinPos[j] - std_sin_pos), diff_thr * (std::abs(std_sin_pos) + flt_min * 100)); + EXPECT_LT(std::abs(resCosPos[j] - std_cos_pos), diff_thr * (std::abs(std_cos_pos) + flt_min * 100)); + EXPECT_LT(std::abs(resSinNeg[j] - std_sin_neg), diff_thr * (std::abs(std_sin_neg) + flt_min * 100)); + EXPECT_LT(std::abs(resCosNeg[j] - std_cos_neg), diff_thr * (std::abs(std_cos_neg) + flt_min * 100)); + } + } + + // Test special values + std::vector specialValues = {(LaneType) 0, (LaneType) M_PI, (LaneType) (M_PI / 2), (LaneType) INFINITY, (LaneType) -INFINITY, (LaneType) NAN}; + const int testRandNum = 10000; + const double specialValueProbability = 0.1; // 10% chance to insert a special value + cv::RNG_MT19937 rng; + + for (int i = 0; i < testRandNum; i++) { + Data dataRand; + for (int j = 0; j < n; ++j) { + if (rng.uniform(0.f, 1.f) <= specialValueProbability) { + // Insert a special value + int specialValueIndex = rng.uniform(0, (int) specialValues.size()); + dataRand[j] = specialValues[specialValueIndex]; + } else { + // Generate uniform random data in [-1000, 1000] + dataRand[j] = (LaneType) rng.uniform(-1000, 1000); + } + } + + // Compare with std::sin and std::cos + R x = dataRand, s, c; + v_sincos(x, s, c); + Data resSin = s, resCos = c; + for (int j = 0; j < n; ++j) { + SCOPED_TRACE(cv::format("Random test value: %lf", (double) dataRand[j])); + LaneType std_sin = (LaneType) std::sin(dataRand[j]); + LaneType std_cos = (LaneType) std::cos(dataRand[j]); + // input NaN, +INF, -INF -> output NaN + if (std::isnan(dataRand[j]) || std::isinf(dataRand[j])) { + EXPECT_TRUE(std::isnan(resSin[j])); + EXPECT_TRUE(std::isnan(resCos[j])); + } else if(dataRand[j] == 0) { + // sin(0) -> 0, cos(0) -> 1 + EXPECT_EQ(resSin[j], 0); + EXPECT_EQ(resCos[j], 1); + } else { + EXPECT_LT(std::abs(resSin[j] - std_sin), diff_thr * (std::abs(std_sin) + flt_min * 100)); + EXPECT_LT(std::abs(resCos[j] - std_cos), diff_thr * (std::abs(std_cos) + flt_min * 100)); + } + } + } + } + + // BUG: https://github.com/opencv/opencv/issues/26362 + TheTest &test_sincos_fp16() { +#if 0 // CV_SIMD_FP16 + hfloat flt16_min; + uint16_t flt16_min_hex = 0x0400; + std::memcpy(&flt16_min, &flt16_min_hex, sizeof(hfloat)); + __test_sincos((hfloat) 1e-3, flt16_min); +#endif + return *this; + } + + TheTest &test_sincos_fp32() { + __test_sincos(1e-6f, FLT_MIN); + return *this; + } + + TheTest &test_sincos_fp64() { +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + __test_sincos(1e-11, DBL_MIN); +#endif + return *this; + } }; #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits::vlanes(), CV__TRACE_FUNCTION); @@ -2399,6 +2499,7 @@ void test_hal_intrin_float32() .test_pack_triplets() .test_exp_fp32() .test_log_fp32() + .test_sincos_fp32() .test_erf_fp32() #if CV_SIMD_WIDTH == 32 .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() @@ -2433,6 +2534,7 @@ void test_hal_intrin_float64() .test_extract_highest() .test_exp_fp64() .test_log_fp64() + .test_sincos_fp64() //.test_broadcast_element<0>().test_broadcast_element<1>() #if CV_SIMD_WIDTH == 32 .test_extract<2>().test_extract<3>() @@ -2476,6 +2578,7 @@ void test_hal_intrin_float16() .test_extract_n<0>().test_extract_n<1>() .test_exp_fp16() .test_log_fp16() + .test_sincos_fp16() #else std::cout << "SKIP: CV_SIMD_FP16 || CV_SIMD_SCALABLE_FP16 is not available" << std::endl; #endif diff --git a/modules/gapi/cmake/DownloadADE.cmake b/modules/gapi/cmake/DownloadADE.cmake index 871f99b419c1..8ddaadb51190 100644 --- a/modules/gapi/cmake/DownloadADE.cmake +++ b/modules/gapi/cmake/DownloadADE.cmake @@ -1,7 +1,7 @@ set(ade_src_dir "${OpenCV_BINARY_DIR}/3rdparty/ade") -set(ade_filename "v0.1.2d.zip") -set(ade_subdir "ade-0.1.2d") -set(ade_md5 "dbb095a8bf3008e91edbbf45d8d34885") +set(ade_filename "v0.1.2e.zip") +set(ade_subdir "ade-0.1.2e") +set(ade_md5 "962ce79e0b95591f226431f7b5f152cd") ocv_download(FILENAME ${ade_filename} HASH ${ade_md5} URL diff --git a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp index 0b6dab6a9d41..f7bb2599242d 100644 --- a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp @@ -54,6 +54,9 @@ class GAPI_EXPORTS_W_SIMPLE PyParams { GAPI_WRAP PyParams& cfgSessionOptions(const std::map& options); + GAPI_WRAP + PyParams& cfgOptLevel(const int opt_level); + GBackend backend() const; std::string tag() const; cv::util::any params() const; diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp index fd0f69a768e4..eb6316b44681 100644 --- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp @@ -15,6 +15,7 @@ #include #include +#include #include // GAPI_EXPORTS #include // GKernelPackage @@ -354,6 +355,7 @@ struct ParamDesc { std::map session_options; std::vector execution_providers; bool disable_mem_pattern; + cv::util::optional opt_level; }; } // namespace detail @@ -648,6 +650,17 @@ template class Params { return *this; } + /** @brief Configures optimization level for ONNX Runtime. + + @param opt_level [optimization level]: Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all). + Please see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. + @return the reference on modified object. + */ + Params& cfgOptLevel(const int opt_level) { + desc.opt_level = cv::util::make_optional(opt_level); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::onnx::backend(); } std::string tag() const { return Net::tag(); } @@ -675,7 +688,7 @@ class Params { @param model_path path to model file (.onnx file). */ Params(const std::string& tag, const std::string& model_path) - : desc{model_path, 0u, 0u, {}, {}, {}, {}, {}, {}, {}, {}, {}, true, {}, {}, {}, {}, false}, m_tag(tag) {} + : desc{ model_path, 0u, 0u, {}, {}, {}, {}, {}, {}, {}, {}, {}, true, {}, {}, {}, {}, false, {} }, m_tag(tag) {} /** @see onnx::Params::cfgMeanStdDev. */ void cfgMeanStdDev(const std::string &layer, @@ -724,6 +737,11 @@ class Params { desc.session_options.insert(options.begin(), options.end()); } +/** @see onnx::Params::cfgOptLevel. */ + void cfgOptLevel(const int opt_level) { + desc.opt_level = cv::util::make_optional(opt_level); + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::onnx::backend(); } std::string tag() const { return m_tag; } diff --git a/modules/gapi/src/backends/onnx/bindings_onnx.cpp b/modules/gapi/src/backends/onnx/bindings_onnx.cpp index 294ad8a3cc21..5a2e3d2f6ded 100644 --- a/modules/gapi/src/backends/onnx/bindings_onnx.cpp +++ b/modules/gapi/src/backends/onnx/bindings_onnx.cpp @@ -63,6 +63,12 @@ cv::gapi::onnx::PyParams::cfgSessionOptions(const std::mapcfgOptLevel(opt_level); + return *this; +} + cv::gapi::GBackend cv::gapi::onnx::PyParams::backend() const { return m_priv->backend(); } diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp index 0d9a16a7bd7d..fc9b12b081f4 100644 --- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp +++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp @@ -701,6 +701,26 @@ namespace cv { namespace gimpl { namespace onnx { +static GraphOptimizationLevel convertToGraphOptimizationLevel(const int opt_level) { + switch (opt_level) { + case ORT_DISABLE_ALL: + return ORT_DISABLE_ALL; + case ORT_ENABLE_BASIC: + return ORT_ENABLE_BASIC; + case ORT_ENABLE_EXTENDED: + return ORT_ENABLE_EXTENDED; + case ORT_ENABLE_ALL: + return ORT_ENABLE_ALL; + default: + if (opt_level > ORT_ENABLE_ALL) { // relax constraint + return ORT_ENABLE_ALL; + } + else { + cv::util::throw_error(std::invalid_argument("Invalid argument opt_level = " + std::to_string(opt_level))); + } + } +} + ONNXCompiled::ONNXCompiled(const gapi::onnx::detail::ParamDesc &pp) : params(pp) { // Validate input parameters before allocating any resources @@ -726,6 +746,10 @@ ONNXCompiled::ONNXCompiled(const gapi::onnx::detail::ParamDesc &pp) if (pp.disable_mem_pattern) { session_options.DisableMemPattern(); } + + if (pp.opt_level.has_value()) { + session_options.SetGraphOptimizationLevel(convertToGraphOptimizationLevel(pp.opt_level.value())); + } this_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, ""); #ifndef _WIN32 this_session = Ort::Session(this_env, params.model_path.data(), session_options); diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index 9063aafe2c0f..6e317f585b7c 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -405,7 +405,7 @@ The function imencode compresses the image and stores it in the memory buffer th result. See cv::imwrite for the list of supported formats and flags description. @param ext File extension that defines the output format. Must include a leading period. -@param img Image to be written. +@param img Image to be compressed. @param buf Output buffer resized to fit the compressed image. @param params Format-specific parameters. See cv::imwrite and cv::ImwriteFlags. */ @@ -413,6 +413,20 @@ CV_EXPORTS_W bool imencode( const String& ext, InputArray img, CV_OUT std::vector& buf, const std::vector& params = std::vector()); +/** @brief Encodes array of images into a memory buffer. + +The function is analog to cv::imencode for in-memory multi-page image compression. +See cv::imwrite for the list of supported formats and flags description. + +@param ext File extension that defines the output format. Must include a leading period. +@param imgs Vector of images to be written. +@param buf Output buffer resized to fit the compressed data. +@param params Format-specific parameters. See cv::imwrite and cv::ImwriteFlags. +*/ +CV_EXPORTS_W bool imencodemulti( const String& ext, InputArrayOfArrays imgs, + CV_OUT std::vector& buf, + const std::vector& params = std::vector()); + /** @brief Checks if the specified image file can be decoded by OpenCV. The function haveImageReader checks if OpenCV is capable of reading the specified file. diff --git a/modules/imgcodecs/src/grfmt_avif.cpp b/modules/imgcodecs/src/grfmt_avif.cpp index 98ddb7336268..4752c6ece41b 100644 --- a/modules/imgcodecs/src/grfmt_avif.cpp +++ b/modules/imgcodecs/src/grfmt_avif.cpp @@ -142,8 +142,7 @@ static constexpr size_t kAvifSignatureSize = 500; AvifDecoder::AvifDecoder() { m_buf_supported = true; channels_ = 0; - decoder_ = avifDecoderCreate(); - decoder_->strictFlags = AVIF_STRICT_DISABLED; + decoder_ = nullptr; } AvifDecoder::~AvifDecoder() { @@ -181,6 +180,11 @@ bool AvifDecoder::checkSignature(const String &signature) const { ImageDecoder AvifDecoder::newDecoder() const { return makePtr(); } bool AvifDecoder::readHeader() { + if (decoder_) + return true; + + decoder_ = avifDecoderCreate(); + decoder_->strictFlags = AVIF_STRICT_DISABLED; if (!m_buf.empty()) { CV_Assert(m_buf.type() == CV_8UC1); CV_Assert(m_buf.rows == 1); diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index e2184663aafd..ccc6579a012d 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -171,7 +171,7 @@ class TiffDecoderBufHelper { n = size - pos; } - memcpy(buffer, buf.ptr() + pos, n); + std::memcpy(buffer, buf.ptr() + pos, n); helper->m_buf_pos += n; return n; } @@ -848,9 +848,9 @@ bool TiffDecoder::readData( Mat& img ) switch ( convert_flag ) { case MAKE_FLAG( 1, 1 ): // GRAY to GRAY - memcpy( (void*) img_line_buffer, - (void*) bstart, - tile_width * sizeof(uchar) ); + std::memcpy( (void*) img_line_buffer, + (void*) bstart, + tile_width * sizeof(uchar) ); break; case MAKE_FLAG( 1, 3 ): // GRAY to BGR @@ -867,9 +867,9 @@ bool TiffDecoder::readData( Mat& img ) case MAKE_FLAG( 3, 3 ): // RGB to BGR if (m_use_rgb) - memcpy( (void*) img_line_buffer, - (void*) bstart, - tile_width * sizeof(uchar) ); + std::memcpy( (void*) img_line_buffer, + (void*) bstart, + tile_width * sizeof(uchar) ); else icvCvt_BGR2RGB_8u_C3R( bstart, 0, img_line_buffer, 0, @@ -979,7 +979,7 @@ bool TiffDecoder::readData( Mat& img ) { CV_CheckEQ(wanted_channels, 3, ""); if (m_use_rgb) - memcpy(buffer16, img.ptr(img_y + i, x), tile_width * sizeof(ushort)); + std::memcpy(buffer16, img.ptr(img_y + i, x), tile_width * sizeof(ushort)); else icvCvt_RGB2BGR_16u_C3R(buffer16, 0, img.ptr(img_y + i, x), 0, @@ -1011,9 +1011,9 @@ bool TiffDecoder::readData( Mat& img ) CV_CheckEQ(wanted_channels, 1, ""); if( ncn == 1 ) { - memcpy(img.ptr(img_y + i, x), - buffer16, - tile_width*sizeof(ushort)); + std::memcpy(img.ptr(img_y + i, x), + buffer16, + tile_width*sizeof(ushort)); } else { @@ -1118,10 +1118,16 @@ class TiffEncoderBufHelper /*map=*/0, /*unmap=*/0 ); } - static tmsize_t read( thandle_t /*handle*/, void* /*buffer*/, tmsize_t /*n*/ ) + static tmsize_t read( thandle_t handle, void* buffer, tmsize_t n ) { - // Not used for encoding. - return 0; + // Used for imencodemulti() to stores multi-images. + TiffEncoderBufHelper *helper = reinterpret_cast(handle); + size_t begin = (size_t)helper->m_buf_pos; + size_t end = begin + n; + CV_CheckGT( helper->m_buf->size(), end , "do not be over-run buffer"); + std::memcpy(buffer, &(*helper->m_buf)[begin], n); + helper->m_buf_pos = end; + return n; } static tmsize_t write( thandle_t handle, void* buffer, tmsize_t n ) @@ -1133,7 +1139,7 @@ class TiffEncoderBufHelper { helper->m_buf->resize(end); } - memcpy(&(*helper->m_buf)[begin], buffer, n); + std::memcpy(&(*helper->m_buf)[begin], buffer, n); helper->m_buf_pos = end; return n; } @@ -1350,7 +1356,7 @@ bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vect { case 1: { - memcpy(buffer, img.ptr(y), scanlineSize); + std::memcpy(buffer, img.ptr(y), scanlineSize); break; } diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp index 745b8633cc24..4f8d894aeeaa 100644 --- a/modules/imgcodecs/src/loadsave.cpp +++ b/modules/imgcodecs/src/loadsave.cpp @@ -723,6 +723,7 @@ static bool imwrite_( const String& filename, const std::vector& img_vec, Mat temp; if( !encoder->isFormatSupported(image.depth()) ) { + CV_LOG_ONCE_WARNING(NULL, "Unsupported depth image for selected encoder is fallbacked to CV_8U."); CV_Assert( encoder->isFormatSupported(CV_8U) ); image.convertTo( temp, CV_8U ); image = temp; @@ -769,10 +770,12 @@ static bool imwrite_( const String& filename, const std::vector& img_vec, catch (const cv::Exception& e) { CV_LOG_ERROR(NULL, "imwrite_('" << filename << "'): can't write data: " << e.what()); + code = false; } catch (...) { CV_LOG_ERROR(NULL, "imwrite_('" << filename << "'): can't write data: unknown exception"); + code = false; } return code; @@ -960,7 +963,7 @@ imdecodemulti_(const Mat& buf, int flags, std::vector& mats, int start, int ImageDecoder decoder = findDecoder(buf_row); if (!decoder) - return 0; + return false; // Try to decode image by RGB instead of BGR. if (flags & IMREAD_COLOR_RGB && flags != IMREAD_UNCHANGED) @@ -977,7 +980,7 @@ imdecodemulti_(const Mat& buf, int flags, std::vector& mats, int start, int filename = tempfile(); FILE* f = fopen(filename.c_str(), "wb"); if (!f) - return 0; + return false; size_t bufSize = buf_row.total() * buf.elemSize(); if (fwrite(buf_row.ptr(), 1, bufSize, f) != bufSize) { @@ -1103,49 +1106,80 @@ bool imdecodemulti(InputArray _buf, int flags, CV_OUT std::vector& mats, co } } -bool imencode( const String& ext, InputArray _image, +bool imencode( const String& ext, InputArray _img, std::vector& buf, const std::vector& params ) { CV_TRACE_FUNCTION(); - Mat image = _image.getMat(); - CV_Assert(!image.empty()); - - int channels = image.channels(); - CV_Assert( channels == 1 || channels == 3 || channels == 4 ); - ImageEncoder encoder = findEncoder( ext ); if( !encoder ) CV_Error( Error::StsError, "could not find encoder for the specified extension" ); - if( !encoder->isFormatSupported(image.depth()) ) + std::vector img_vec; + CV_Assert(!_img.empty()); + if (_img.isMatVector() || _img.isUMatVector()) + _img.getMatVector(img_vec); + else + img_vec.push_back(_img.getMat()); + + CV_Assert(!img_vec.empty()); + const bool isMultiImg = img_vec.size() > 1; + + std::vector write_vec; + for (size_t page = 0; page < img_vec.size(); page++) { - CV_Assert( encoder->isFormatSupported(CV_8U) ); + Mat image = img_vec[page]; + CV_Assert(!image.empty()); + + const int channels = image.channels(); + CV_Assert( channels == 1 || channels == 3 || channels == 4 ); + Mat temp; - image.convertTo(temp, CV_8U); - image = temp; + if( !encoder->isFormatSupported(image.depth()) ) + { + CV_LOG_ONCE_WARNING(NULL, "Unsupported depth image for selected encoder is fallbacked to CV_8U."); + CV_Assert( encoder->isFormatSupported(CV_8U) ); + image.convertTo( temp, CV_8U ); + image = temp; + } + + write_vec.push_back(image); } CV_Check(params.size(), (params.size() & 1) == 0, "Encoding 'params' must be key-value pairs"); CV_CheckLE(params.size(), (size_t)(CV_IO_MAX_IMAGE_PARAMS*2), ""); - bool code; - if( encoder->setDestination(buf) ) - { - code = encoder->write(image, params); - encoder->throwOnEror(); - CV_Assert( code ); - } - else + bool code = false; + String filename; + if( !encoder->setDestination(buf) ) { - String filename = tempfile(); + filename = tempfile(); code = encoder->setDestination(filename); CV_Assert( code ); + } + + try { + if (!isMultiImg) + code = encoder->write(write_vec[0], params); + else + code = encoder->writemulti(write_vec, params); - code = encoder->write(image, params); encoder->throwOnEror(); CV_Assert( code ); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "imencode(): can't encode data: " << e.what()); + code = false; + } + catch (...) + { + CV_LOG_ERROR(NULL, "imencode(): can't encode data: unknown exception"); + code = false; + } + if( !filename.empty() && code ) + { FILE* f = fopen( filename.c_str(), "rb" ); CV_Assert(f != 0); fseek( f, 0, SEEK_END ); @@ -1159,6 +1193,12 @@ bool imencode( const String& ext, InputArray _image, return code; } +bool imencodemulti( const String& ext, InputArrayOfArrays imgs, + std::vector& buf, const std::vector& params) +{ + return imencode(ext, imgs, buf, params); +} + bool haveImageReader( const String& filename ) { ImageDecoder decoder = cv::findDecoder(filename); diff --git a/modules/imgcodecs/test/test_avif.cpp b/modules/imgcodecs/test/test_avif.cpp index 0d8a718756e6..d94e5d458c96 100644 --- a/modules/imgcodecs/test/test_avif.cpp +++ b/modules/imgcodecs/test/test_avif.cpp @@ -161,14 +161,14 @@ TEST_P(Imgcodecs_Avif_Image_EncodeDecodeSuite, imencode_imdecode) { // Encode. std::vector buf; - if (!IsBitDepthValid()) { - EXPECT_THROW(cv::imencode(".avif", img_original, buf, encoding_params_), - cv::Exception); - return; - } bool result = true; EXPECT_NO_THROW( result = cv::imencode(".avif", img_original, buf, encoding_params_);); + + if (!IsBitDepthValid()) { + EXPECT_FALSE(result); + return; + } EXPECT_TRUE(result); // Read back. @@ -337,11 +337,20 @@ TEST_P(Imgcodecs_Avif_Animation_WriteDecodeSuite, encode_decode) { std::vector buf(size); EXPECT_TRUE(file.read(reinterpret_cast(buf.data()), size)); file.close(); - EXPECT_EQ(0, remove(output.c_str())); std::vector anim; ASSERT_TRUE(cv::imdecodemulti(buf, imread_mode_, anim)); ValidateRead(anim_original, anim); + + if (imread_mode_ == IMREAD_UNCHANGED) { + ImageCollection collection(output, IMREAD_UNCHANGED); + anim.clear(); + for (auto&& i : collection) + anim.push_back(i); + ValidateRead(anim_original, anim); + } + + EXPECT_EQ(0, remove(output.c_str())); } INSTANTIATE_TEST_CASE_P( diff --git a/modules/imgcodecs/test/test_exr.impl.hpp b/modules/imgcodecs/test/test_exr.impl.hpp index c8cda11a6374..32984ff731c7 100644 --- a/modules/imgcodecs/test/test_exr.impl.hpp +++ b/modules/imgcodecs/test/test_exr.impl.hpp @@ -314,4 +314,27 @@ TEST(Imgcodecs_EXR, read_RGBA_unchanged) EXPECT_EQ(0, remove(filenameOutput.c_str())); } +// See https://github.com/opencv/opencv/pull/26211 +// ( related with https://github.com/opencv/opencv/issues/26207 ) +TEST(Imgcodecs_EXR, imencode_regression_26207_extra) +{ + // CV_8U is not supported depth for EXR Encoder. + const cv::Mat src(100, 100, CV_8UC1, cv::Scalar::all(0)); + std::vector buf; + bool ret = false; + EXPECT_ANY_THROW(ret = imencode(".exr", src, buf)); + EXPECT_FALSE(ret); +} +TEST(Imgcodecs_EXR, imwrite_regression_26207_extra) +{ + // CV_8U is not supported depth for EXR Encoder. + const cv::Mat src(100, 100, CV_8UC1, cv::Scalar::all(0)); + const string filename = cv::tempfile(".exr"); + bool ret = false; + EXPECT_ANY_THROW(ret = imwrite(filename, src)); + EXPECT_FALSE(ret); + remove(filename.c_str()); +} + + }} // namespace diff --git a/modules/imgcodecs/test/test_read_write.cpp b/modules/imgcodecs/test/test_read_write.cpp index 255f819a9a94..7dfd02c67ca9 100644 --- a/modules/imgcodecs/test/test_read_write.cpp +++ b/modules/imgcodecs/test/test_read_write.cpp @@ -520,8 +520,78 @@ TEST(ImgCodecs, multipage_collection_two_iterator_operatorpp) EXPECT_TRUE(cv::norm(img1, img[i], NORM_INF) == 0); } } + +// See https://github.com/opencv/opencv/issues/26207 +TEST(Imgcodecs, imencodemulti_regression_26207) +{ + vector imgs; + const cv::Mat img(100, 100, CV_8UC1, cv::Scalar::all(0)); + imgs.push_back(img); + std::vector buf; + bool ret = false; + + // Encode single image + EXPECT_NO_THROW(ret = imencode(".tiff", img, buf)); + EXPECT_TRUE(ret); + EXPECT_NO_THROW(ret = imencode(".tiff", imgs, buf)); + EXPECT_TRUE(ret); + EXPECT_NO_THROW(ret = imencodemulti(".tiff", imgs, buf)); + EXPECT_TRUE(ret); + + // Encode multiple images + imgs.push_back(img.clone()); + EXPECT_NO_THROW(ret = imencode(".tiff", imgs, buf)); + EXPECT_TRUE(ret); + EXPECT_NO_THROW(ret = imencodemulti(".tiff", imgs, buf)); + EXPECT_TRUE(ret); + + // Count stored images from buffer. + // imcount() doesn't support buffer, so encoded buffer outputs to file temporary. + const size_t len = buf.size(); + const string filename = cv::tempfile(".tiff"); + FILE *f = fopen(filename.c_str(), "wb"); + EXPECT_NE(f, nullptr); + EXPECT_EQ(len, fwrite(&buf[0], 1, len, f)); + fclose(f); + + EXPECT_EQ(2, (int)imcount(filename)); + EXPECT_EQ(0, remove(filename.c_str())); +} #endif +// See https://github.com/opencv/opencv/pull/26211 +// ( related with https://github.com/opencv/opencv/issues/26207 ) +TEST(Imgcodecs, imencode_regression_26207_extra) +{ + // CV_32F is not supported depth for BMP Encoder. + // Encoded buffer contains CV_8U image which is fallbacked. + const cv::Mat src(100, 100, CV_32FC1, cv::Scalar::all(0)); + std::vector buf; + bool ret = false; + EXPECT_NO_THROW(ret = imencode(".bmp", src, buf)); + EXPECT_TRUE(ret); + + cv::Mat dst; + EXPECT_NO_THROW(dst = imdecode(buf, IMREAD_GRAYSCALE)); + EXPECT_FALSE(dst.empty()); + EXPECT_EQ(CV_8UC1, dst.type()); +} +TEST(Imgcodecs, imwrite_regression_26207_extra) +{ + // CV_32F is not supported depth for BMP Encoder. + // Encoded buffer contains CV_8U image which is fallbacked. + const cv::Mat src(100, 100, CV_32FC1, cv::Scalar::all(0)); + const string filename = cv::tempfile(".bmp"); + bool ret = false; + EXPECT_NO_THROW(ret = imwrite(filename, src)); + EXPECT_TRUE(ret); + + cv::Mat dst; + EXPECT_NO_THROW(dst = imread(filename, IMREAD_GRAYSCALE)); + EXPECT_FALSE(dst.empty()); + EXPECT_EQ(CV_8UC1, dst.type()); + EXPECT_EQ(0, remove(filename.c_str())); +} TEST(Imgcodecs_Params, imwrite_regression_22752) { diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 16af2b969cad..7a80e1c2c452 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -48,7 +48,7 @@ /** @defgroup imgproc Image Processing -This module includes image-processing functions. +This module offers a comprehensive suite of image processing functions, enabling tasks such as those listed above. @{ @defgroup imgproc_filter Image Filtering diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp index 8d20a57a8767..ea10da3bb682 100644 --- a/modules/imgproc/src/drawing.cpp +++ b/modules/imgproc/src/drawing.cpp @@ -64,7 +64,7 @@ CollectPolyEdges( Mat& img, const Point2l* v, int npts, int shift, Point offset=Point() ); static void -FillEdgeCollection( Mat& img, std::vector& edges, const void* color, int line_type); +FillEdgeCollection( Mat& img, std::vector& edges, const void* color ); static void PolyLine( Mat& img, const Point2l* v, int npts, bool closed, @@ -1051,7 +1051,7 @@ EllipseEx( Mat& img, Point2l center, Size2l axes, v.push_back(center); std::vector edges; CollectPolyEdges( img, &v[0], (int)v.size(), edges, color, line_type, XY_SHIFT ); - FillEdgeCollection( img, edges, color, line_type ); + FillEdgeCollection( img, edges, color ); } } @@ -1299,15 +1299,11 @@ CollectPolyEdges( Mat& img, const Point2l* v, int count, std::vector& if (t0.y != t1.y) { pt0c.y = t0.y; pt1c.y = t1.y; - pt0c.x = (int64)(t0.x) << XY_SHIFT; - pt1c.x = (int64)(t1.x) << XY_SHIFT; } } - else - { - pt0c.x += XY_ONE >> 1; - pt1c.x += XY_ONE >> 1; - } + + pt0c.x = (int64)(t0.x) << XY_SHIFT; + pt1c.x = (int64)(t1.x) << XY_SHIFT; } else { @@ -1349,7 +1345,7 @@ struct CmpEdges /**************** helper macros and functions for sequence/contour processing ***********/ static void -FillEdgeCollection( Mat& img, std::vector& edges, const void* color, int line_type) +FillEdgeCollection( Mat& img, std::vector& edges, const void* color ) { PolyEdge tmp; int i, y, total = (int)edges.size(); @@ -1358,12 +1354,7 @@ FillEdgeCollection( Mat& img, std::vector& edges, const void* color, i int y_max = INT_MIN, y_min = INT_MAX; int64 x_max = 0xFFFFFFFFFFFFFFFF, x_min = 0x7FFFFFFFFFFFFFFF; int pix_size = (int)img.elemSize(); - int delta; - - if (line_type < LINE_AA) - delta = 0; - else - delta = XY_ONE - 1; + int delta = XY_ONE - 1; if( total < 2 ) return; @@ -2051,7 +2042,7 @@ void fillPoly( InputOutputArray _img, const Point** pts, const int* npts, int nc } } - FillEdgeCollection(img, edges, buf, line_type); + FillEdgeCollection(img, edges, buf); } void polylines( InputOutputArray _img, const Point* const* pts, const int* npts, int ncontours, bool isClosed, diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index c1dd68d74988..8da46d26da46 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2508,7 +2508,7 @@ class ipp_warpAffineParallel: public ParallelLoopBody #endif -static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation, int borderType, InputArray _M, int flags ) +static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation, int borderType, const Scalar & borderValue, InputArray _M, int flags ) { #ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP(); @@ -2527,7 +2527,7 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation Mat dst = _dst.getMat(); ::ipp::IwiImage iwSrc = ippiGetImage(src); ::ipp::IwiImage iwDst = ippiGetImage(dst); - ::ipp::IwiBorderType ippBorder(ippiGetBorderType(borderType)); + ::ipp::IwiBorderType ippBorder(ippiGetBorderType(borderType), ippiGetValue(borderValue)); IwTransDirection iwTransDirection; if(!ippBorder) return false; @@ -2570,7 +2570,7 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation return true; #else CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(interpolation); - CV_UNUSED(borderType); CV_UNUSED(_M); CV_UNUSED(flags); + CV_UNUSED(borderType); CV_UNUSED(borderValue); CV_UNUSED(_M); CV_UNUSED(flags); return false; #endif } @@ -2795,7 +2795,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 ); M0.convertTo(matM, matM.type()); - CV_IPP_RUN_FAST(ipp_warpAffine(src, dst, interpolation, borderType, matM, flags)); + CV_IPP_RUN_FAST(ipp_warpAffine(src, dst, interpolation, borderType, borderValue, matM, flags)); if( !(flags & WARP_INVERSE_MAP) ) { diff --git a/modules/imgproc/test/test_drawing.cpp b/modules/imgproc/test/test_drawing.cpp index 12784e0bdbcd..b0b1b47080e1 100755 --- a/modules/imgproc/test/test_drawing.cpp +++ b/modules/imgproc/test/test_drawing.cpp @@ -857,6 +857,74 @@ TEST(Drawing, ttf_text) } #endif +TEST(Drawing, fillpoly_contours) +{ + const int imgSize = 50; + const int type = CV_8UC1; + const int shift = 0; + const Scalar cl = Scalar::all(255); + const cv::LineTypes lineType = LINE_8; + + // check that contours of fillPoly and polylines match + { + cv::Mat img(imgSize, imgSize, type); + img = 0; + std::vector> polygonPoints{ + { {44, 27}, {7, 37}, {7, 19}, {38, 19} } + }; + cv::fillPoly(img, polygonPoints, cl, lineType, shift); + cv::polylines(img, polygonPoints, true, 0, 1, lineType, shift); + + { + cv::Mat labelImage(img.size(), CV_32S); + int labels = cv::connectedComponents(img, labelImage, 4); + EXPECT_EQ(2, labels) << "filling went over the border"; + } + } + + // check that line generated with fillPoly and polylines match + { + cv::Mat img1(imgSize, imgSize, type), img2(imgSize, imgSize, type); + img1 = 0; + img2 = 0; + std::vector> polygonPoints{ + { {44, 27}, {38, 19} } + }; + cv::fillPoly(img1, polygonPoints, cl, lineType, shift); + cv::polylines(img2, polygonPoints, true, cl, 1, lineType, shift); + EXPECT_MAT_N_DIFF(img1, img2, 0); + } +} + +TEST(Drawing, fillpoly_match_lines) +{ + const int imgSize = 49; + const int type = CV_8UC1; + const int shift = 0; + const Scalar cl = Scalar::all(255); + const cv::LineTypes lineType = LINE_8; + cv::Mat img1(imgSize, imgSize, type), img2(imgSize, imgSize, type); + for (int x1 = 0; x1 < imgSize; x1 += imgSize / 2) + { + for (int y1 = 0; y1 < imgSize; y1 += imgSize / 2) + { + for (int x2 = 0; x2 < imgSize; x2++) + { + for (int y2 = 0; y2 < imgSize; y2++) + { + img1 = 0; + img2 = 0; + std::vector> polygonPoints{ + { {x1, y1}, {x2, y2} } + }; + cv::fillPoly(img1, polygonPoints, cl, lineType, shift); + cv::polylines(img2, polygonPoints, true, cl, 1, lineType, shift); + EXPECT_MAT_N_DIFF(img1, img2, 0); + } + } + } + } +} TEST(Drawing, fillpoly_fully) { diff --git a/modules/js/perf/package.json b/modules/js/perf/package.json index 04607ddffe56..01a2a8a4ccb6 100644 --- a/modules/js/perf/package.json +++ b/modules/js/perf/package.json @@ -1,19 +1,19 @@ { "name": "opencv_js_perf", - "description": "Perfermance tests for opencv js bindings", + "description": "Performance tests for opencv js bindings", "version": "1.0.0", - "dependencies" : { - "benchmark" : "latest" + "dependencies": { + "benchmark": "latest" }, "repository": { - "type": "git", - "url": "https://github.com/opencv/opencv.git" + "type": "git", + "url": "https://github.com/opencv/opencv.git" }, "keywords": [], "author": "", "license": "Apache 2.0 License", "bugs": { - "url": "https://github.com/opencv/opencv/issues" + "url": "https://github.com/opencv/opencv/issues" }, "homepage": "https://github.com/opencv/opencv" - } \ No newline at end of file +} diff --git a/modules/objdetect/include/opencv2/objdetect/face.hpp b/modules/objdetect/include/opencv2/objdetect/face.hpp index cf09c79d50dd..566204f7f926 100644 --- a/modules/objdetect/include/opencv2/objdetect/face.hpp +++ b/modules/objdetect/include/opencv2/objdetect/face.hpp @@ -155,6 +155,22 @@ class CV_EXPORTS_W FaceRecognizerSF * @param target_id the id of target device */ CV_WRAP static Ptr create(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config, int backend_id = 0, int target_id = 0); + + /** + * @brief Creates an instance of this class from a buffer containing the model weights and configuration. + * @param framework Name of the framework (ONNX, etc.) + * @param bufferModel A buffer containing the binary model weights. + * @param bufferConfig A buffer containing the network configuration. + * @param backend_id The id of the backend. + * @param target_id The id of the target device. + * + * @return A pointer to the created instance of FaceRecognizerSF. + */ + CV_WRAP static Ptr create(const String& framework, + const std::vector& bufferModel, + const std::vector& bufferConfig, + int backend_id = 0, + int target_id = 0); }; //! @} diff --git a/modules/objdetect/src/face_recognize.cpp b/modules/objdetect/src/face_recognize.cpp index 8183573ce982..a5f4641da306 100644 --- a/modules/objdetect/src/face_recognize.cpp +++ b/modules/objdetect/src/face_recognize.cpp @@ -26,6 +26,19 @@ class FaceRecognizerSFImpl : public FaceRecognizerSF net.setPreferableBackend(backend_id); net.setPreferableTarget(target_id); } + + FaceRecognizerSFImpl(const String& framework, + const std::vector& bufferModel, + const std::vector& bufferConfig, + int backend_id, int target_id) + { + net = dnn::readNet(framework, bufferModel, bufferConfig); + CV_Assert(!net.empty()); + + net.setPreferableBackend(backend_id); + net.setPreferableTarget(target_id); + } + void alignCrop(InputArray _src_img, InputArray _face_mat, OutputArray _aligned_img) const override { Mat face_mat = _face_mat.getMat(); @@ -189,4 +202,17 @@ Ptr FaceRecognizerSF::create(const String& model, const String #endif } +Ptr FaceRecognizerSF::create(const String& framework, + const std::vector& bufferModel, + const std::vector& bufferConfig, + int backend_id, int target_id) +{ +#ifdef HAVE_OPENCV_DNN + return makePtr(framework, bufferModel, bufferConfig, backend_id, target_id); +#else + CV_UNUSED(bufferModel); CV_UNUSED(bufferConfig); CV_UNUSED(backend_id); CV_UNUSED(target_id); + CV_Error(cv::Error::StsNotImplemented, "cv::FaceRecognizerSF requires enabled 'dnn' module"); +#endif +} + } // namespace cv diff --git a/modules/video/src/hal_replacement.hpp b/modules/video/src/hal_replacement.hpp index 8d10ab39d1f3..396fa9a2d548 100644 --- a/modules/video/src/hal_replacement.hpp +++ b/modules/video/src/hal_replacement.hpp @@ -27,7 +27,9 @@ //! @{ /** -@brief Lucas-Kanade optical flow for single pyramid layer. See calcOpticalFlowPyrLK +@brief Lucas-Kanade optical flow for single pyramid layer. See calcOpticalFlowPyrLK. +@note OpenCV builds pyramid levels with `win_size` padding. Out-of-bound access to source +image data is legal within `+-win_size` range. @param prev_data previous frame image data @param prev_data_step previous frame image data step @param prev_deriv_data previous frame Schaar derivatives @@ -67,6 +69,29 @@ inline int hal_ni_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_st #define cv_hal_LKOpticalFlowLevel hal_ni_LKOpticalFlowLevel //! @endcond +/** +@brief Computes Schaar derivatives with inteleaved layout xyxy... +@note OpenCV builds pyramid levels with `win_size` padding. Out-of-bound access to source +image data is legal within `+-win_size` range. +@param src_data source image data +@param src_step source image step +@param dst_data destination buffer data +@param dst_step destination buffer step +@param width image width +@param height image height +@param cn source image channels +**/ +inline int hal_ni_ScharrDeriv(const uchar* src_data, size_t src_step, + short* dst_data, size_t dst_step, + int width, int height, int cn) +{ + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + +//! @cond IGNORED +#define cv_hal_ScharrDeriv hal_ni_ScharrDeriv +//! @endcond + //! @} #if defined(__clang__) diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 03de93ee08a8..25fdc35c5b71 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -62,6 +62,9 @@ static void calcScharrDeriv(const cv::Mat& src, cv::Mat& dst) int rows = src.rows, cols = src.cols, cn = src.channels(), depth = src.depth(); CV_Assert(depth == CV_8U); dst.create(rows, cols, CV_MAKETYPE(DataType::depth, cn*2)); + + CALL_HAL(ScharrDeriv, cv_hal_ScharrDeriv, src.data, src.step, (short*)dst.data, dst.step, cols, rows, cn); + parallel_for_(Range(0, rows), cv::detail::ScharrDerivInvoker(src, dst), cv::getNumThreads()); } diff --git a/modules/videoio/src/precomp.hpp b/modules/videoio/src/precomp.hpp index 6a9546a6f332..8ebeec4a9eb0 100644 --- a/modules/videoio/src/precomp.hpp +++ b/modules/videoio/src/precomp.hpp @@ -53,7 +53,7 @@ #include // -D_FORTIFY_SOURCE=2 workaround: https://github.com/opencv/opencv/issues/15020 #endif - +#include "opencv2/core/cvdef.h" #include "opencv2/videoio.hpp" #include "opencv2/core/utility.hpp" diff --git a/samples/cpp/filestorage.cpp b/samples/cpp/filestorage.cpp deleted file mode 100644 index e0b462bba6b6..000000000000 --- a/samples/cpp/filestorage.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* - * filestorage_sample demonstrate the usage of the opencv serialization functionality - */ - -#include "opencv2/core.hpp" -#include -#include - -using std::string; -using std::cout; -using std::endl; -using std::cerr; -using std::ostream; -using namespace cv; - -static void help(char** av) -{ - cout << "\nfilestorage_sample demonstrate the usage of the opencv serialization functionality.\n" - << "usage:\n" - << av[0] << " outputfile.yml.gz\n" - << "\n outputfile above can have many different extensions, see below." - << "\nThis program demonstrates the use of FileStorage for serialization, that is in use << and >> in OpenCV\n" - << "For example, how to create a class and have it serialize, but also how to use it to read and write matrices.\n" - << "FileStorage allows you to serialize to various formats specified by the file end type." - << "\nYou should try using different file extensions.(e.g. yaml yml xml xml.gz yaml.gz etc...)\n" << endl; -} - -struct MyData -{ - MyData() : - A(0), X(0), id() - { - } - explicit MyData(int) : - A(97), X(CV_PI), id("mydata1234") - { - } - int A; - double X; - string id; - void write(FileStorage& fs) const //Write serialization for this class - { - fs << "{" << "A" << A << "X" << X << "id" << id << "}"; - } - void read(const FileNode& node) //Read serialization for this class - { - - A = (int)node["A"]; - X = (double)node["X"]; - id = (string)node["id"]; - } -}; - -//These write and read functions must exist as per the inline functions in operations.hpp -static void write(FileStorage& fs, const std::string&, const MyData& x){ - x.write(fs); -} -static void read(const FileNode& node, MyData& x, const MyData& default_value = MyData()){ - if(node.empty()) - x = default_value; - else - x.read(node); -} - -static ostream& operator<<(ostream& out, const MyData& m){ - out << "{ id = " << m.id << ", "; - out << "X = " << m.X << ", "; - out << "A = " << m.A << "}"; - return out; -} -int main(int ac, char** av) -{ - cv::CommandLineParser parser(ac, av, - "{@input||}{help h ||}" - ); - if (parser.has("help")) - { - help(av); - return 0; - } - string filename = parser.get("@input"); - if (filename.empty()) - { - help(av); - return 1; - } - - //write - { - FileStorage fs(filename, FileStorage::WRITE); - - cout << "writing images\n"; - fs << "images" << "["; - - fs << "image1.jpg" << "myfi.png" << "baboon.jpg"; - cout << "image1.jpg" << " myfi.png" << " baboon.jpg" << endl; - - fs << "]"; - - cout << "writing mats\n"; - Mat R =Mat_::eye(3, 3),T = Mat_::zeros(3, 1); - cout << "R = " << R << "\n"; - cout << "T = " << T << "\n"; - fs << "R" << R; - fs << "T" << T; - - cout << "writing MyData struct\n"; - MyData m(1); - fs << "mdata" << m; - cout << m << endl; - } - - //read - { - FileStorage fs(filename, FileStorage::READ); - - if (!fs.isOpened()) - { - cerr << "failed to open " << filename << endl; - help(av); - return 1; - } - - FileNode n = fs["images"]; - if (n.type() != FileNode::SEQ) - { - cerr << "images is not a sequence! FAIL" << endl; - return 1; - } - - cout << "reading images\n"; - FileNodeIterator it = n.begin(), it_end = n.end(); - for (; it != it_end; ++it) - { - cout << (string)*it << "\n"; - } - - Mat R, T; - cout << "reading R and T" << endl; - - fs["R"] >> R; - fs["T"] >> T; - - cout << "R = " << R << "\n"; - cout << "T = " << T << endl; - - MyData m; - fs["mdata"] >> m; - - cout << "read mdata\n"; - cout << m << endl; - - cout << "attempting to read mdata_b\n"; //Show default behavior for empty matrix - fs["mdata_b"] >> m; - cout << "read mdata_b\n"; - cout << m << endl; - - } - - cout << "Try opening " << filename << " to see the serialized data." << endl << endl; - - //read from string - { - cout << "Read data from string\n"; - string dataString = - "%YAML:1.0\n" - "mdata:\n" - " A: 97\n" - " X: 3.1415926535897931e+00\n" - " id: mydata1234\n"; - MyData m; - FileStorage fs(dataString, FileStorage::READ | FileStorage::MEMORY); - cout << "attempting to read mdata_b from string\n"; //Show default behavior for empty matrix - fs["mdata"] >> m; - cout << "read mdata\n"; - cout << m << endl; - } - - //write to string - { - cout << "Write data to string\n"; - FileStorage fs(filename, FileStorage::WRITE | FileStorage::MEMORY | FileStorage::FORMAT_YAML); - - cout << "writing MyData struct\n"; - MyData m(1); - fs << "mdata" << m; - cout << m << endl; - string createdString = fs.releaseAndGetString(); - cout << "Created string:\n" << createdString << "\n"; - } - - return 0; -} diff --git a/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp b/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp index b17e24eaf475..8a04f6f84454 100644 --- a/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp +++ b/samples/cpp/tutorial_code/core/file_input_output/file_input_output.cpp @@ -8,14 +8,14 @@ using namespace std; static void help(char** av) { cout << endl - << av[0] << " shows the usage of the OpenCV serialization functionality." << endl + << av[0] << " shows the usage of the OpenCV serialization functionality." << endl << endl << "usage: " << endl - << av[0] << " outputfile.yml.gz" << endl - << "The output file may be either XML (xml) or YAML (yml/yaml). You can even compress it by " - << "specifying this in its extension like xml.gz yaml.gz etc... " << endl + << av[0] << " [output file name] (default outputfile.yml.gz)" << endl << endl + << "The output file may be XML (xml), YAML (yml/yaml), or JSON (json)." << endl + << "You can even compress it by specifying this in its extension like xml.gz yaml.gz etc... " << endl << "With FileStorage you can serialize objects in OpenCV by using the << and >> operators" << endl << "For example: - create a class and have it serialized" << endl - << " - use it to read and write matrices." << endl; + << " - use it to read and write matrices." << endl << endl; } class MyData @@ -68,13 +68,16 @@ static ostream& operator<<(ostream& out, const MyData& m) int main(int ac, char** av) { + string filename; + if (ac != 2) { help(av); - return 1; + filename = "outputfile.yml.gz"; } + else + filename = av[1]; - string filename = av[1]; { //write //! [iomati] Mat R = Mat_::eye(3, 3), @@ -118,7 +121,7 @@ int main(int ac, char** av) //! [close] fs.release(); // explicit close //! [close] - cout << "Write Done." << endl; + cout << "Write operation to file:" << filename << " completed successfully." << endl; } {//read diff --git a/samples/cpp/tutorial_code/snippets/core_various.cpp b/samples/cpp/tutorial_code/snippets/core_various.cpp index 2be97f989da3..b3d590100dec 100644 --- a/samples/cpp/tutorial_code/snippets/core_various.cpp +++ b/samples/cpp/tutorial_code/snippets/core_various.cpp @@ -78,6 +78,7 @@ int main() tm.start(); // do something ... tm.stop(); + cout << "Last iteration: " << tm.getLastTimeSec() << endl; } cout << "Average time per iteration in seconds: " << tm.getAvgTimeSec() << endl; cout << "Average FPS: " << tm.getFPS() << endl; diff --git a/samples/python/tutorial_code/core/file_input_output/file_input_output.py b/samples/python/tutorial_code/core/file_input_output/file_input_output.py index 66b3108dbaf3..95eb1afcc94d 100644 --- a/samples/python/tutorial_code/core/file_input_output/file_input_output.py +++ b/samples/python/tutorial_code/core/file_input_output/file_input_output.py @@ -9,10 +9,10 @@ def help(filename): ''' {0} shows the usage of the OpenCV serialization functionality. \n\n usage:\n - python3 {0} outputfile.yml.gz\n\n - The output file may be either in XML, YAML or JSON. You can even compress it\n - by specifying this in its extension like xml.gz yaml.gz etc... With\n - FileStorage you can serialize objects in OpenCV.\n\n + python3 {0} [output file name] (default outputfile.yml.gz)\n\n + The output file may be XML (xml), YAML (yml/yaml), or JSON (json).\n + You can even compress it by specifying this in its extension like xml.gz yaml.gz etc...\n + With FileStorage you can serialize objects in OpenCV.\n\n For example: - create a class and have it serialized\n - use it to read and write matrices.\n '''.format(filename) @@ -49,7 +49,9 @@ def read(self, node): def main(argv): if len(argv) != 2: help(argv[0]) - exit(1) + filename = 'outputfile.yml.gz' + else : + filename = argv[1] # write ## [iomati] @@ -60,8 +62,6 @@ def main(argv): m = MyData() ## [customIOi] - filename = argv[1] - ## [open] s = cv.FileStorage(filename, cv.FileStorage_WRITE) # or: @@ -98,7 +98,7 @@ def main(argv): ## [close] s.release() ## [close] - print ('Write Done.') + print ('Write operation to file:', filename, 'completed successfully.') # read print ('\nReading: ')