Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
6ef357f
build: fix AVX2/AVX512 builds failed due to intrinsics operator usage
mshabunin Sep 20, 2024
b2e118e
Merge pull request #26166 from mshabunin:fix-intrin-ops
asmorkalov Sep 20, 2024
50eebbd
Fixed minor typos in js tutorials
Sep 22, 2024
a6ec12f
Merge pull request #26163 from asmorkalov:as/HAL_schaar_deriv
asmorkalov Sep 23, 2024
450e741
Merge pull request #26176 from najasnake12:fixed_minor_typos_in_js_tu…
asmorkalov Sep 23, 2024
679931d
Enhance cv::TickMeter to be able to get the last ellapsed time
jamacias Sep 28, 2024
8a36f11
Add the HAL implementation for the merge function on RISC-V Vector
hanliutong Sep 29, 2024
086b999
SQPnP solver updates
mlourakis Sep 30, 2024
658336b
Merge pull request #26219 from mlourakis:4.x
asmorkalov Oct 1, 2024
305b57e
C-API cleanup: backport videoio changes from 5.x
mshabunin Oct 1, 2024
7202395
C-API cleanup: use AutoBuffer in MSER
mshabunin Oct 1, 2024
807170d
C-API cleanup: inpaint algorithms in photo
mshabunin Oct 1, 2024
93a882d
Fix fillPoly drawing over boundaries
inayd Oct 1, 2024
b8eed54
Merge pull request #26228 from mshabunin:cpp-features2d-4x
asmorkalov Oct 2, 2024
292ee28
Merge pull request #26230 from mshabunin:cpp-photo-4x
asmorkalov Oct 2, 2024
1aa325a
Added HAL documentation note for out-of-bound hack in optical flow LK.
asmorkalov Oct 2, 2024
73b3b24
Merge pull request #26236 from asmorkalov:as/HAL_pyrlk_hack_documenta…
asmorkalov Oct 2, 2024
783fe72
Resolve Compilation Error for v_func Function in SIMD Emulator (#25891)
WanliZhong Oct 2, 2024
ae1fb8c
Merge pull request #26224 from mshabunin:cpp-videoio-backport
asmorkalov Oct 3, 2024
3901426
Merge pull request #26241 from asmorkalov:as/kelidicv-0.2
asmorkalov Oct 3, 2024
e375d57
cuda - update npp calls to use the new NppStreamContext API if available
cudawarped Oct 2, 2024
fa6d652
inversion checks
mlourakis Oct 6, 2024
73d68f3
RISC-V: fix build with RVV 0.7.1
mshabunin Oct 7, 2024
cda9f41
Merge pull request #26266 from mshabunin:fix-rvv071-build
asmorkalov Oct 7, 2024
28efc21
Merge pull request #26187 from inayd:26130-fixFillPolyBoundaries
asmorkalov Oct 7, 2024
40428d9
Merge pull request #26259 from Kumataro:fix26258
Kumataro Oct 8, 2024
7d9014e
Merge pull request #26263 from mlourakis:4.x
asmorkalov Oct 8, 2024
cefde84
Merge pull request #25909 from gblikas:patch-1
gblikas Oct 9, 2024
e72efd0
Merge pull request #26260 from sturkmen72:upd_doc_4_x
sturkmen72 Oct 9, 2024
e1b0637
Added buffer-based model loading to FaceRecognizerSF
Quantizs Oct 9, 2024
69803e7
Merge pull request #26216 from hanliutong:rvv-hal-merge
asmorkalov Oct 9, 2024
dceeb47
rewrote clgl device discovery
Oct 9, 2024
885bbc6
renaming
Oct 10, 2024
8ba7389
properly size the devices array
Oct 10, 2024
63b5dee
fixed bug: variable shadowing
Oct 10, 2024
2a681bb
C++26 Deprecated Arithmetic Conversion: Fix core/mat.inl.hpp
Oct 10, 2024
687e37e
Merge pull request #25892 from WanliZhong:v_sincos
WanliZhong Oct 10, 2024
50f6d54
renaming
Oct 10, 2024
4cbb96b
use new instead of malloc and guard it
Oct 10, 2024
0f23420
Merge pull request #26278 from Quantizs:feature-create-face-recognize…
asmorkalov Oct 10, 2024
3edcf41
more guarding
kallaballa Oct 11, 2024
08f7f13
Merge pull request #26234 from zachlowry:apply-gcc6-fix-on-each-direc…
zachlowry Oct 11, 2024
1909ac8
Merge pull request #26212 from jamacias:feature/TickMeter-lasttime
asmorkalov Oct 14, 2024
8e5dbc0
Merge pull request #26298 from sturkmen72:avif
sturkmen72 Oct 14, 2024
8ba76e6
build: set cmake policy for if(IN_LIST) support
mshabunin Oct 16, 2024
d20c456
Merge pull request #26320 from mshabunin:fix-cmake-in-list
asmorkalov Oct 17, 2024
489df18
Merge pull request #26313 from FantasqueX:ipp-warp-affine-border-value
FantasqueX Oct 17, 2024
3919f33
Merge pull request #26293 from SeptimiuIoachimNeagaIntel:EISW-140103_…
SeptimiuIoachimNeagaIntel Oct 17, 2024
888469a
fix: performance typo
migueldaipre Oct 18, 2024
35dbf32
Merge pull request #26211 from Kumataro:fix26207
Kumataro Oct 18, 2024
c79b72a
Merge pull request #26335 from migueldaipre:4.x
asmorkalov Oct 18, 2024
e026a5a
Merge pull request #26281 from kallaballa:clgl_device_discovery
asmorkalov Oct 18, 2024
94d5ad0
Merge pull request #26284 from fzuuzf:enum_arithmetic_fixes_for_c++26
asmorkalov Oct 21, 2024
4398e0b
Merge pull request #26340 from Kumataro:wa26339
Kumataro Oct 22, 2024
57ccbee
Merge pull request #26245 from cudawarped:cuda_update_to_npp_stream_ctx
asmorkalov Oct 22, 2024
9830864
ADE update to 0.1.2e
asmorkalov Oct 22, 2024
898a2a3
Merge pull request #26353 from asmorkalov:as/ade_1.2e
asmorkalov Oct 23, 2024
8e55659
Merge branch 4.x
asmorkalov Oct 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions 3rdparty/carotene/hal/tegra_hal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1962,4 +1962,20 @@ inline int TEGRA_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_ste
#define cv_hal_LKOpticalFlowLevel TEGRA_LKOpticalFlowLevel
#endif // __ARM_ARCH=7

#if 0 // OpenCV provides fater parallel implementation
inline int TEGRA_ScharrDeriv(const uchar* src_data, size_t src_step,
short* dst_data, size_t dst_step,
int width, int height, int cn)
{
if (!CAROTENE_NS::isSupportedConfiguration())
return CV_HAL_ERROR_NOT_IMPLEMENTED;

CAROTENE_NS::ScharrDeriv(CAROTENE_NS::Size2D(width, height), cn, src_data, src_step, dst_data, dst_step);
return CV_HAL_ERROR_OK;
}

#undef cv_hal_ScharrDeriv
#define cv_hal_ScharrDeriv TEGRA_ScharrDeriv
#endif

#endif
6 changes: 5 additions & 1 deletion 3rdparty/hal_rvv/hal_rvv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,8 @@
#include "version/hal_rvv_071.hpp"
#endif

#endif
#if defined(__riscv_v) && __riscv_v == 1000000
#include "hal_rvv_1p0/merge.hpp" // core
#endif

#endif
363 changes: 363 additions & 0 deletions 3rdparty/hal_rvv/hal_rvv_1p0/merge.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
#ifndef OPENCV_HAL_RVV_MERGE_HPP_INCLUDED
#define OPENCV_HAL_RVV_MERGE_HPP_INCLUDED

#include <riscv_vector.h>

namespace cv { namespace cv_hal_rvv {

#undef cv_hal_merge8u
#define cv_hal_merge8u cv::cv_hal_rvv::merge8u
#undef cv_hal_merge16u
#define cv_hal_merge16u cv::cv_hal_rvv::merge16u
#undef cv_hal_merge32s
#define cv_hal_merge32s cv::cv_hal_rvv::merge32s
#undef cv_hal_merge64s
#define cv_hal_merge64s cv::cv_hal_rvv::merge64s

#if defined __GNUC__
__attribute__((optimize("no-tree-vectorize")))
#endif
static int merge8u(const uchar** src, uchar* dst, int len, int cn ) {
int k = cn % 4 ? cn % 4 : 4;
int i = 0, j;
int vl = __riscv_vsetvlmax_e8m1();
if( k == 1 )
{
const uchar* src0 = src[0];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
__riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*2, a, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++)
dst[i*cn] = src0[i];
}
else if( k == 2 )
{
const uchar *src0 = src[0], *src1 = src[1];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
__riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*2, a, vl);
__riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*2, b, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++ )
{
dst[i*cn] = src0[i];
dst[i*cn+1] = src1[i];
}
}
else if( k == 3 )
{
const uchar *src0 = src[0], *src1 = src[1], *src2 = src[2];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
auto c = __riscv_vle8_v_u8m1(src2 + i, vl);
__riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*3, a, vl);
__riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*3, b, vl);
__riscv_vsse8_v_u8m1(dst + i*cn + 2, sizeof(uchar)*3, c, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++ )
{
dst[i*cn] = src0[i];
dst[i*cn+1] = src1[i];
dst[i*cn+2] = src2[i];
}
}
else
{
const uchar *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle8_v_u8m1(src0 + i, vl);
auto b = __riscv_vle8_v_u8m1(src1 + i, vl);
auto c = __riscv_vle8_v_u8m1(src2 + i, vl);
auto d = __riscv_vle8_v_u8m1(src3 + i, vl);
__riscv_vsse8_v_u8m1(dst + i*cn, sizeof(uchar)*4, a, vl);
__riscv_vsse8_v_u8m1(dst + i*cn + 1, sizeof(uchar)*4, b, vl);
__riscv_vsse8_v_u8m1(dst + i*cn + 2, sizeof(uchar)*4, c, vl);
__riscv_vsse8_v_u8m1(dst + i*cn + 3, sizeof(uchar)*4, d, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++ )
{
dst[i*cn] = src0[i];
dst[i*cn+1] = src1[i];
dst[i*cn+2] = src2[i];
dst[i*cn+3] = src3[i];
}
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; k < cn; k += 4 )
{
const uchar *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
for( i = 0, j = k; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];
}
}
return CV_HAL_ERROR_OK;
}

#if defined __GNUC__
__attribute__((optimize("no-tree-vectorize")))
#endif
static int merge16u(const ushort** src, ushort* dst, int len, int cn ) {
int k = cn % 4 ? cn % 4 : 4;
int i = 0, j;
int vl = __riscv_vsetvlmax_e16m1();
if( k == 1 )
{
const ushort* src0 = src[0];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
__riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*2, a, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++)
dst[i*cn] = src0[i];
}
else if( k == 2 )
{
const ushort *src0 = src[0], *src1 = src[1];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
__riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*2, a, vl);
__riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*2, b, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++ )
{
dst[i*cn] = src0[i];
dst[i*cn+1] = src1[i];
}
}
else if( k == 3 )
{
const ushort *src0 = src[0], *src1 = src[1], *src2 = src[2];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
auto c = __riscv_vle16_v_u16m1(src2 + i, vl);
__riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*3, a, vl);
__riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*3, b, vl);
__riscv_vsse16_v_u16m1(dst + i*cn + 2, sizeof(ushort)*3, c, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++ )
{
dst[i*cn] = src0[i];
dst[i*cn+1] = src1[i];
dst[i*cn+2] = src2[i];
}
}
else
{
const ushort *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
for( ; i <= len - vl; i += vl)
{
auto a = __riscv_vle16_v_u16m1(src0 + i, vl);
auto b = __riscv_vle16_v_u16m1(src1 + i, vl);
auto c = __riscv_vle16_v_u16m1(src2 + i, vl);
auto d = __riscv_vle16_v_u16m1(src3 + i, vl);
__riscv_vsse16_v_u16m1(dst + i*cn, sizeof(ushort)*4, a, vl);
__riscv_vsse16_v_u16m1(dst + i*cn + 1, sizeof(ushort)*4, b, vl);
__riscv_vsse16_v_u16m1(dst + i*cn + 2, sizeof(ushort)*4, c, vl);
__riscv_vsse16_v_u16m1(dst + i*cn + 3, sizeof(ushort)*4, d, vl);
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++ )
{
dst[i*cn] = src0[i];
dst[i*cn+1] = src1[i];
dst[i*cn+2] = src2[i];
dst[i*cn+3] = src3[i];
}
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; k < cn; k += 4 )
{
const uint16_t *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
for( i = 0, j = k; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];
}
}
return CV_HAL_ERROR_OK;
}

#if defined __GNUC__
__attribute__((optimize("no-tree-vectorize")))
#endif
static int merge32s(const int** src, int* dst, int len, int cn ) {
int k = cn % 4 ? cn % 4 : 4;
int i, j;
if( k == 1 )
{
const int* src0 = src[0];
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( i = j = 0; i < len; i++, j += cn )
dst[j] = src0[i];
}
else if( k == 2 )
{
const int *src0 = src[0], *src1 = src[1];
i = j = 0;
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
dst[j+1] = src1[i];
}
}
else if( k == 3 )
{
const int *src0 = src[0], *src1 = src[1], *src2 = src[2];
i = j = 0;
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
dst[j+1] = src1[i];
dst[j+2] = src2[i];
}
}
else
{
const int *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
i = j = 0;
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];
}
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; k < cn; k += 4 )
{
const int *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
for( i = 0, j = k; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];
}
}
return CV_HAL_ERROR_OK;
}

#if defined __GNUC__
__attribute__((optimize("no-tree-vectorize")))
#endif
static int merge64s(const int64** src, int64* dst, int len, int cn ) {
int k = cn % 4 ? cn % 4 : 4;
int i, j;
if( k == 1 )
{
const int64* src0 = src[0];
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( i = j = 0; i < len; i++, j += cn )
dst[j] = src0[i];
}
else if( k == 2 )
{
const int64 *src0 = src[0], *src1 = src[1];
i = j = 0;
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
dst[j+1] = src1[i];
}
}
else if( k == 3 )
{
const int64 *src0 = src[0], *src1 = src[1], *src2 = src[2];
i = j = 0;
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
dst[j+1] = src1[i];
dst[j+2] = src2[i];
}
}
else
{
const int64 *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
i = j = 0;
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];
}
}
#if defined(__clang__)
#pragma clang loop vectorize(disable)
#endif
for( ; k < cn; k += 4 )
{
const int64 *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3];
for( i = 0, j = k; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];
}
}
return CV_HAL_ERROR_OK;
}

}}

#endif
Loading
Loading