Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions modules/imgproc/src/filter.simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ Ptr<BaseFilter> getLinearFilter(

#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY

typedef int CV_DECL_ALIGNED(1) unaligned_int;
#define VEC_ALIGN CV_MALLOC_ALIGN

int FilterEngine__start(FilterEngine& this_, const Size &_wholeSize, const Size &sz, const Point &ofs)
Expand Down Expand Up @@ -1083,21 +1082,6 @@ struct SymmColumnVec_32s8u
v_pack_u_store(dst + i, v_pack(v_round(s0), v_round(s1)));
i += VTraits<v_uint16>::vlanes();
}
#if CV_SIMD_WIDTH > 16
while( i <= width - 4 /*VTraits<v_int32x4>::vlanes()*/ )
#else
if( i <= width - VTraits<v_int32>::vlanes() )
#endif
{
v_float32 s0 = v_muladd(v_cvt_f32(vx_load(src[0] + i)), vx_setall_f32(ky[0]), vx_setall_f32(delta));
s0 = v_muladd(v_cvt_f32(v_add(vx_load(src[1] + i), vx_load(src[-1] + i))), vx_setall_f32(ky[1]), s0);
for( k = 2; k <= ksize2; k++ )
s0 = v_muladd(v_cvt_f32(v_add(vx_load(src[k] + i), vx_load(src[-k] + i))), vx_setall_f32(ky[k]), s0);
v_int32 s32 = v_round(s0);
v_int16 s16 = v_pack(s32, s32);
*(unaligned_int*)(dst + i) = v_get0(v_reinterpret_as_s32(v_pack_u(s16, s16)));
i += 4 /*v_int32x4::nlanes*/ ;
}
}
else
{
Expand Down Expand Up @@ -1139,20 +1123,6 @@ struct SymmColumnVec_32s8u
v_pack_u_store(dst + i, v_pack(v_round(s0), v_round(s1)));
i += VTraits<v_uint16>::vlanes();
}
#if CV_SIMD_WIDTH > 16
while( i <= width - 4 /*VTraits<v_int32x4>::vlanes()*/ )
#else
if( i <= width - VTraits<v_int32>::vlanes() )
#endif
{
v_float32 s0 = v_muladd(v_cvt_f32(v_sub(vx_load(src[1] + i), vx_load(src[-1] + i))), vx_setall_f32(ky[1]), vx_setall_f32(delta));
for (k = 2; k <= ksize2; k++)
s0 = v_muladd(v_cvt_f32(v_sub(vx_load(src[k] + i), vx_load(src[-k] + i))), vx_setall_f32(ky[k]), s0);
v_int32 s32 = v_round(s0);
v_int16 s16 = v_pack(s32, s32);
*(unaligned_int*)(dst + i) = v_get0(v_reinterpret_as_s32(v_pack_u(s16, s16)));
i += 4 /*v_int32x4::nlanes*/ ;
}
}
return i;
}
Expand Down Expand Up @@ -2236,20 +2206,6 @@ struct FilterVec_8u
v_pack_u_store(dst + i, v_pack(v_round(s0), v_round(s1)));
i += VTraits<v_uint16>::vlanes();
}
#if CV_SIMD_WIDTH > 16
while( i <= width - 4 /*VTraits<v_int32x4>::vlanes()*/ )
#else
if( i <= width - VTraits<v_int32>::vlanes() )
#endif
{
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src[0] + i))), vx_setall_f32(kf[0]), vx_setall_f32(delta));
for( k = 1; k < nz; k++ )
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src[k] + i))), vx_setall_f32(kf[k]), s0);
v_int32 s32 = v_round(s0);
v_int16 s16 = v_pack(s32, s32);
*(unaligned_int*)(dst + i) = v_get0(v_reinterpret_as_s32(v_pack_u(s16, s16)));
i += 4 /*VTraits<v_int32x4>::vlanes()*/ ;
}
return i;
}

Expand Down
2 changes: 1 addition & 1 deletion modules/imgproc/src/fixedpoint.inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ class ufixedpoint16
static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); }

static CV_ALWAYS_INLINE ufixedpoint16 fromRaw(uint16_t v) { return ufixedpoint16(v); }
CV_ALWAYS_INLINE uint16_t raw() { return val; }
CV_ALWAYS_INLINE uint16_t raw() const { return val; }
};

}
Expand Down
13 changes: 11 additions & 2 deletions modules/imgproc/src/smooth.simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1634,6 +1634,15 @@ void vlineSmooth(const FT* const * src, const FT* m, int n, ET* dst, int len)
dst[i] = val;
}
}

inline uint32_t read_pair_as_u32(const ufixedpoint16 * mem)
{
union Cv32sufX2 { uint32_t v32; int16_t v16[2]; } res;
res.v16[0] = mem->raw();
res.v16[1] = (mem + 1)->raw();
return res.v32;
}

template <>
void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int n, uint8_t* dst, int len)
{
Expand All @@ -1655,7 +1664,7 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
v_int16 v_tmp0, v_tmp1;

v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(read_pair_as_u32(m)));

const int16_t* src0 = (const int16_t*)src[0] + i;
const int16_t* src1 = (const int16_t*)src[1] + i;
Expand Down Expand Up @@ -1683,7 +1692,7 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
int j = 2;
for (; j < n - 1; j+=2)
{
v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)(m+j))));
v_mul = v_reinterpret_as_s16(vx_setall_u32(read_pair_as_u32(m + j)));

const int16_t* srcj0 = (const int16_t*)src[j] + i;
const int16_t* srcj1 = (const int16_t*)src[j + 1] + i;
Expand Down