Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions 3rdparty/ndsrvp/include/imgproc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#ifndef OPENCV_NDSRVP_IMGPROC_HPP
#define OPENCV_NDSRVP_IMGPROC_HPP

struct cvhalFilter2D;

namespace cv {

namespace ndsrvp {
Expand Down Expand Up @@ -71,6 +73,34 @@ int threshold(const uchar* src_data, size_t src_step,
#undef cv_hal_threshold
#define cv_hal_threshold (cv::ndsrvp::threshold)

// ################ filter ################

int filterInit(cvhalFilter2D **context,
uchar *kernel_data, size_t kernel_step,
int kernel_type, int kernel_width,
int kernel_height, int max_width, int max_height,
int src_type, int dst_type, int borderType,
double delta, int anchor_x, int anchor_y,
bool allowSubmatrix, bool allowInplace);

#undef cv_hal_filterInit
#define cv_hal_filterInit (cv::ndsrvp::filterInit)

int filter(cvhalFilter2D *context,
const uchar *src_data, size_t src_step,
uchar *dst_data, size_t dst_step,
int width, int height,
int full_width, int full_height,
int offset_x, int offset_y);

#undef cv_hal_filter
#define cv_hal_filter (cv::ndsrvp::filter)

int filterFree(cvhalFilter2D *context);

#undef cv_hal_filterFree
#define cv_hal_filterFree (cv::ndsrvp::filterFree)

} // namespace ndsrvp

} // namespace cv
Expand Down
34 changes: 34 additions & 0 deletions 3rdparty/ndsrvp/src/cvutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,40 @@ int borderInterpolate(int p, int len, int borderType)
return p;
}

int16x4_t borderInterpolate_vector(int16x4_t vp, short len, int borderType)
{
int16x4_t vzero = (int16x4_t){0, 0, 0, 0};
int16x4_t vone = (int16x4_t){1, 1, 1, 1};
int16x4_t vlen = (int16x4_t){len, len, len, len};
if(borderType == CV_HAL_BORDER_REPLICATE)
vp = (int16x4_t)__nds__bpick(0, __nds__bpick((long)(vlen - 1), (long)vp, (long)(vp >= vlen)), (long)(vp < 0));
else if(borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101)
{
int16x4_t vdelta = (borderType == CV_HAL_BORDER_REFLECT_101) ? vone : vzero;
if(len == 1)
return vzero;
do
{
int16x4_t vneg = -vp - 1 + vdelta;
int16x4_t vpos = vlen - 1 - (vp - vlen) - vdelta;
vp = (int16x4_t)__nds__bpick((long)vneg, __nds__bpick((long)vpos, (long)vp, (long)(vp >= vlen)), (long)(vp < 0));
}
while( (long)(vp >= vlen) || (long)(vp < 0) );
}
else if(borderType == CV_HAL_BORDER_WRAP)
{
ndsrvp_assert(len > 0);
int16x4_t vneg = vp - ((vp - vlen + 1) / vlen) * vlen;
int16x4_t vpos = vp % vlen;
vp = (int16x4_t)__nds__bpick((long)vneg, __nds__bpick((long)vpos, (long)vp, (long)(vp >= vlen)), (long)(vp < 0));
}
else if(borderType == CV_HAL_BORDER_CONSTANT)
vp = (int16x4_t)__nds__bpick((long)-vone, (long)vp, (long)(vp < 0 || vp >= vlen));
else
ndsrvp_error(Error::StsBadArg, "borderInterpolate_vector(): Unknown/unsupported border type");
return vp;
}

} // namespace ndsrvp

} // namespace cv
160 changes: 160 additions & 0 deletions 3rdparty/ndsrvp/src/cvutils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <iostream>
#include <string>
#include <array>
#include <vector>
#include <climits>
#include <algorithm>

Expand All @@ -26,16 +27,26 @@ namespace ndsrvp {
void* fastMalloc(size_t size);
void fastFree(void* ptr);
int borderInterpolate(int p, int len, int borderType);
int16x4_t borderInterpolate_vector(int16x4_t vp, short len, int borderType);

#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif

#ifndef MIN
# define MIN(a,b) ((a) > (b) ? (b) : (a))
#endif

#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)

#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))

#define CV_MALLOC_ALIGN 64

inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); }

// error codes

enum Error{
Expand Down Expand Up @@ -69,6 +80,135 @@ inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b)
return (int32x2_t)__nds__bpick((long)a, __nds__bpick((long)(b - 1), (long)x, (long)(x < b)), (long)(x >= a));
}

// expand

/*
[0] [1] [2] [3] [4] [5] [6] [7]
810 [ 0 ] [ 1 ] [ 4 ] [ 5 ]
832 [ 2 ] [ 3 ] [ 6 ] [ 7 ]
bb [ 0 ] [ 1 ] [ 2 ] [ 3 ]
tt [ 4 ] [ 5 ] [ 6 ] [ 7 ]
*/

inline void ndsrvp_u8_u16_expand8(const unsigned long vs, ushort* dst)
{
unsigned long vs810 = __nds__zunpkd810(vs);
unsigned long vs832 = __nds__zunpkd832(vs);
*(unsigned long*)dst = __nds__pkbb32(vs832, vs810);
*(unsigned long*)(dst + 4) = __nds__pktt32(vs832, vs810);
}

/*
[0] [1] [2] [3] [4] [5] [6] [7]
820 [ 0 ] [ 2 ] [ 4 ] [ 6 ]
831 [ 1 ] [ 3 ] [ 5 ] [ 7 ]
bb [ 0 ] [ 2 ] [ 1 ] [ 3 ]
tt [ 4 ] [ 6 ] [ 5 ] [ 7 ]
*/

inline void ndsrvp_u8_u16_eswap8(const unsigned long vs, ushort* dst)
{
unsigned long vs820 = __nds__zunpkd820(vs);
unsigned long vs831 = __nds__zunpkd831(vs);
*(unsigned long*)dst = __nds__pkbb32(vs831, vs820);
*(unsigned long*)(dst + 4) = __nds__pktt32(vs831, vs820);
}

/*
[0] [1] [2] [3] [4] [5] [6] [7]
820 [ 0 ] [ 2 ] [ 4 ] [ 6 ]
831 [ 1 ] [ 3 ] [ 5 ] [ 7 ]
bb [ 0 ] [ 2 ] [ 1 ] [ 3 ]
tt [ 4 ] [ 6 ] [ 5 ] [ 7 ]
bbbb[ 0 ] [ 1 ]
bbtt[ 2 ] [ 3 ]
ttbb[ 4 ] [ 5 ]
tttt[ 6 ] [ 7 ]
*/


inline void ndsrvp_u8_u32_expand8(const unsigned long vs, uint* dst)
{
unsigned long vs820 = __nds__zunpkd820(vs);
unsigned long vs831 = __nds__zunpkd831(vs);
unsigned long vsbb = __nds__pkbb32(vs831, vs820);
unsigned long vstt = __nds__pktt32(vs831, vs820);
*(unsigned long*)dst = __nds__pkbb16(0, vsbb);
*(unsigned long*)(dst + 2) = __nds__pktt16(0, vsbb);
*(unsigned long*)(dst + 4) = __nds__pkbb16(0, vstt);
*(unsigned long*)(dst + 6) = __nds__pktt16(0, vstt);
}

// float replacement

inline void ndsrvp_f32_add8(const float* a, const float* b, float* c)
{
c[0] = a[0] + b[0];
c[1] = a[1] + b[1];
c[2] = a[2] + b[2];
c[3] = a[3] + b[3];
c[4] = a[4] + b[4];
c[5] = a[5] + b[5];
c[6] = a[6] + b[6];
c[7] = a[7] + b[7];
}

/*
[1] [8] [23]
[24] [8]
*/

inline void ndsrvp_f32_u8_mul8(const float* a, const unsigned long b, float* c) // experimental, not bit exact
{
const int mask_frac = 0x007FFFFF;
const int mask_sign = 0x7FFFFFFF;
const int mask_lead = 0x40000000;
const int ofs_exp = 23;

uint32x2_t va01 = *(uint32x2_t*)a;
uint32x2_t va23 = *(uint32x2_t*)(a + 2);
uint32x2_t va45 = *(uint32x2_t*)(a + 4);
uint32x2_t va67 = *(uint32x2_t*)(a + 6);

uint32x2_t vaexp01 = va01 >> ofs_exp;
uint32x2_t vaexp23 = va23 >> ofs_exp;
uint32x2_t vaexp45 = va45 >> ofs_exp;
uint32x2_t vaexp67 = va67 >> ofs_exp;

uint32x2_t vafrac01 = ((va01 << 7) & mask_sign) | mask_lead;
uint32x2_t vafrac23 = ((va23 << 7) & mask_sign) | mask_lead;
uint32x2_t vafrac45 = ((va45 << 7) & mask_sign) | mask_lead;
uint32x2_t vafrac67 = ((va67 << 7) & mask_sign) | mask_lead;

int16x4_t vb[2]; // fake signed for signed multiply
ndsrvp_u8_u16_eswap8(b, (ushort*)vb);

vafrac01 = (uint32x2_t)__nds__kmmwb2_u((long)vafrac01, (unsigned long)vb[0]);
vafrac23 = (uint32x2_t)__nds__kmmwt2_u((long)vafrac23, (unsigned long)vb[0]);
vafrac45 = (uint32x2_t)__nds__kmmwb2_u((long)vafrac45, (unsigned long)vb[1]);
vafrac67 = (uint32x2_t)__nds__kmmwt2_u((long)vafrac67, (unsigned long)vb[1]);

uint32x2_t vaclz01 = __nds__v_clz32(vafrac01) - 8;
uint32x2_t vaclz23 = __nds__v_clz32(vafrac23) - 8;
uint32x2_t vaclz45 = __nds__v_clz32(vafrac45) - 8;
uint32x2_t vaclz67 = __nds__v_clz32(vafrac67) - 8;

vaexp01 += 8 - vaclz01;
vaexp23 += 8 - vaclz23;
vaexp45 += 8 - vaclz45;
vaexp67 += 8 - vaclz67;

vafrac01 <<= vaclz01;
vafrac23 <<= vaclz23;
vafrac45 <<= vaclz45;
vafrac67 <<= vaclz67;

*(uint32x2_t*)c = (vaexp01 << ofs_exp) | (vafrac01 & mask_frac);
*(uint32x2_t*)(c + 2) = (vaexp23 << ofs_exp) | (vafrac23 & mask_frac);
*(uint32x2_t*)(c + 4) = (vaexp45 << ofs_exp) | (vafrac45 & mask_frac);
*(uint32x2_t*)(c + 6) = (vaexp67 << ofs_exp) | (vafrac67 & mask_frac);
}

// saturate

template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(v); }
Expand All @@ -94,6 +234,26 @@ template<> inline short saturate_cast<short>(double v) { return saturate_cas
template<> inline int saturate_cast<int>(float v) { return (int)lrintf(v); }
template<> inline int saturate_cast<int>(double v) { return (int)lrint(v); }

inline double cast_ptr_to_double(const uchar* v, int depth) {
switch (depth) {
case CV_8U: return (double)*(uchar*)v;
case CV_8S: return (double)*(char*)v;
case CV_16U: return (double)*(ushort*)v;
case CV_16S: return (double)*(short*)v;
case CV_32S: return (double)*(int*)v;
case CV_32F: return (double)*(float*)v;
case CV_64F: return (double)*(double*)v;
case CV_16F: return (double)*(float*)v;
default: return 0;
}
}

template <typename _Tp>
inline _Tp data_at(const uchar* data, int step, int y, int x, int cn)
{
return ((_Tp*)(data + y * step))[x * cn];
}

// align

inline long align(size_t v, int n)
Expand Down
Loading