Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
edacd91a27
@ -534,7 +534,7 @@ void cv::fisheye::undistortImage(InputArray distorted, OutputArray undistorted,
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
Size size = new_size.area() != 0 ? new_size : distorted.size();
|
||||
Size size = !new_size.empty() ? new_size : distorted.size();
|
||||
|
||||
cv::Mat map1, map2;
|
||||
fisheye::initUndistortRectifyMap(K, D, cv::Matx33d::eye(), Knew, size, CV_16SC2, map1, map2 );
|
||||
@ -601,7 +601,7 @@ void cv::fisheye::estimateNewCameraMatrixForUndistortRectify(InputArray K, Input
|
||||
new_f[1] /= aspect_ratio;
|
||||
new_c[1] /= aspect_ratio;
|
||||
|
||||
if (new_size.area() > 0)
|
||||
if (!new_size.empty())
|
||||
{
|
||||
double rx = new_size.width /(double)image_size.width;
|
||||
double ry = new_size.height/(double)image_size.height;
|
||||
|
||||
@ -1226,8 +1226,8 @@ public:
|
||||
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1);
|
||||
|
||||
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
|
||||
validDisparityRect = getValidDisparityROI(R1.area() > 0 ? R1 : validDisparityRect,
|
||||
R2.area() > 0 ? R2 : validDisparityRect,
|
||||
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
|
||||
!R2.empty() ? R2 : validDisparityRect,
|
||||
params.minDisparity, params.numDisparities,
|
||||
params.SADWindowSize);
|
||||
|
||||
|
||||
@ -139,8 +139,14 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
# undef CV_FP16
|
||||
#endif
|
||||
|
||||
#if CV_SSE2 || CV_NEON || CV_VSX
|
||||
#define CV__SIMD_FORWARD 128
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#endif
|
||||
|
||||
#if CV_SSE2
|
||||
|
||||
#include "opencv2/core/hal/intrin_sse_em.hpp"
|
||||
#include "opencv2/core/hal/intrin_sse.hpp"
|
||||
|
||||
#elif CV_NEON
|
||||
@ -168,6 +174,8 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
|
||||
#if CV_AVX2
|
||||
|
||||
#define CV__SIMD_FORWARD 256
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#include "opencv2/core/hal/intrin_avx.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
@ -82,6 +82,14 @@ inline __m128 _v256_extract_low(const __m256& v)
|
||||
inline __m128d _v256_extract_low(const __m256d& v)
|
||||
{ return _mm256_castpd256_pd128(v); }
|
||||
|
||||
inline __m256i _v256_packs_epu32(const __m256i& a, const __m256i& b)
|
||||
{
|
||||
const __m256i m = _mm256_set1_epi32(65535);
|
||||
__m256i am = _mm256_min_epu32(a, m);
|
||||
__m256i bm = _mm256_min_epu32(b, m);
|
||||
return _mm256_packus_epi32(am, bm);
|
||||
}
|
||||
|
||||
///////// Types ////////////
|
||||
|
||||
struct v_uint8x32
|
||||
@ -626,10 +634,8 @@ OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int8x32, _mm256_adds_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int8x32, _mm256_subs_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint16x16, _mm256_adds_epu16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint16x16, _mm256_subs_epu16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_uint16x16, _mm256_mullo_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int16x16, _mm256_adds_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int16x16, _mm256_subs_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_int16x16, _mm256_mullo_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint32x8, _mm256_add_epi32)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint32x8, _mm256_sub_epi32)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_uint32x8, _mm256_mullo_epi32)
|
||||
@ -650,13 +656,103 @@ OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float64x4, _mm256_sub_pd)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float64x4, _mm256_mul_pd)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float64x4, _mm256_div_pd)
|
||||
|
||||
// saturating multiply 8-bit, 16-bit
|
||||
inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
v_uint16x16 c, d;
|
||||
v_mul_expand(a, b, c, d);
|
||||
return v_pack_u(v_reinterpret_as_s16(c), v_reinterpret_as_s16(d));
|
||||
}
|
||||
inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b)
|
||||
{
|
||||
v_int16x16 c, d;
|
||||
v_mul_expand(a, b, c, d);
|
||||
return v_pack(c, d);
|
||||
}
|
||||
inline v_uint16x16 operator * (const v_uint16x16& a, const v_uint16x16& b)
|
||||
{
|
||||
__m256i pl = _mm256_mullo_epi16(a.val, b.val);
|
||||
__m256i ph = _mm256_mulhi_epu16(a.val, b.val);
|
||||
__m256i p0 = _mm256_unpacklo_epi16(pl, ph);
|
||||
__m256i p1 = _mm256_unpackhi_epi16(pl, ph);
|
||||
return v_uint16x16(_v256_packs_epu32(p0, p1));
|
||||
}
|
||||
inline v_int16x16 operator * (const v_int16x16& a, const v_int16x16& b)
|
||||
{
|
||||
__m256i pl = _mm256_mullo_epi16(a.val, b.val);
|
||||
__m256i ph = _mm256_mulhi_epi16(a.val, b.val);
|
||||
__m256i p0 = _mm256_unpacklo_epi16(pl, ph);
|
||||
__m256i p1 = _mm256_unpackhi_epi16(pl, ph);
|
||||
return v_int16x16(_mm256_packs_epi32(p0, p1));
|
||||
}
|
||||
inline v_uint8x32& operator *= (v_uint8x32& a, const v_uint8x32& b)
|
||||
{ a = a * b; return a; }
|
||||
inline v_int8x32& operator *= (v_int8x32& a, const v_int8x32& b)
|
||||
{ a = a * b; return a; }
|
||||
inline v_uint16x16& operator *= (v_uint16x16& a, const v_uint16x16& b)
|
||||
{ a = a * b; return a; }
|
||||
inline v_int16x16& operator *= (v_int16x16& a, const v_int16x16& b)
|
||||
{ a = a * b; return a; }
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \
|
||||
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(intrin(a.val, b.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint8x32, _mm256_add_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int8x32, _mm256_add_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint16x16, _mm256_add_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int16x16, _mm256_add_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint8x32, _mm256_sub_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int8x32, _mm256_sub_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint16x16, _mm256_sub_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int16x16, _mm256_sub_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_uint16x16, _mm256_mullo_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_int16x16, _mm256_mullo_epi16)
|
||||
|
||||
inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
__m256i ad = _mm256_srai_epi16(a.val, 8);
|
||||
__m256i bd = _mm256_srai_epi16(b.val, 8);
|
||||
__m256i p0 = _mm256_mullo_epi16(a.val, b.val); // even
|
||||
__m256i p1 = _mm256_slli_epi16(_mm256_mullo_epi16(ad, bd), 8); // odd
|
||||
|
||||
const __m256i b01 = _mm256_set1_epi32(0xFF00FF00);
|
||||
return v_uint8x32(_mm256_blendv_epi8(p0, p1, b01));
|
||||
}
|
||||
inline v_int8x32 v_mul_wrap(const v_int8x32& a, const v_int8x32& b)
|
||||
{
|
||||
return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
|
||||
}
|
||||
|
||||
// Multiply and expand
|
||||
inline void v_mul_expand(const v_uint8x32& a, const v_uint8x32& b,
|
||||
v_uint16x16& c, v_uint16x16& d)
|
||||
{
|
||||
v_uint16x16 a0, a1, b0, b1;
|
||||
v_expand(a, a0, a1);
|
||||
v_expand(b, b0, b1);
|
||||
c = v_mul_wrap(a0, b0);
|
||||
d = v_mul_wrap(a1, b1);
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_int8x32& a, const v_int8x32& b,
|
||||
v_int16x16& c, v_int16x16& d)
|
||||
{
|
||||
v_int16x16 a0, a1, b0, b1;
|
||||
v_expand(a, a0, a1);
|
||||
v_expand(b, b0, b1);
|
||||
c = v_mul_wrap(a0, b0);
|
||||
d = v_mul_wrap(a1, b1);
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_int16x16& a, const v_int16x16& b,
|
||||
v_int32x8& c, v_int32x8& d)
|
||||
{
|
||||
v_int16x16 vhi = v_int16x16(_mm256_mulhi_epi16(a.val, b.val));
|
||||
|
||||
v_int16x16 v0, v1;
|
||||
v_zip(a * b, vhi, v0, v1);
|
||||
v_zip(v_mul_wrap(a, b), vhi, v0, v1);
|
||||
|
||||
c = v_reinterpret_as_s32(v0);
|
||||
d = v_reinterpret_as_s32(v1);
|
||||
@ -668,7 +764,7 @@ inline void v_mul_expand(const v_uint16x16& a, const v_uint16x16& b,
|
||||
v_uint16x16 vhi = v_uint16x16(_mm256_mulhi_epu16(a.val, b.val));
|
||||
|
||||
v_uint16x16 v0, v1;
|
||||
v_zip(a * b, vhi, v0, v1);
|
||||
v_zip(v_mul_wrap(a, b), vhi, v0, v1);
|
||||
|
||||
c = v_reinterpret_as_u32(v0);
|
||||
d = v_reinterpret_as_u32(v1);
|
||||
@ -685,20 +781,6 @@ inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b,
|
||||
inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); }
|
||||
inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); }
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \
|
||||
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(intrin(a.val, b.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint8x32, _mm256_add_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int8x32, _mm256_add_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint16x16, _mm256_add_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int16x16, _mm256_add_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint8x32, _mm256_sub_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int8x32, _mm256_sub_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint16x16, _mm256_sub_epi16)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int16x16, _mm256_sub_epi16)
|
||||
|
||||
/** Bitwise shifts **/
|
||||
#define OPENCV_HAL_IMPL_AVX_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \
|
||||
inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
|
||||
@ -1385,6 +1467,10 @@ OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_float32x8, ps, _mm256_castps_si256, _mm256_ca
|
||||
b0.val = intrin(_v256_extract_low(a.val)); \
|
||||
b1.val = intrin(_v256_extract_high(a.val)); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ return _Tpwvec(intrin(_v256_extract_low(a.val))); } \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ return _Tpwvec(intrin(_v256_extract_high(a.val))); } \
|
||||
inline _Tpwvec v256_load_expand(const _Tp* ptr) \
|
||||
{ \
|
||||
__m128i a = _mm_loadu_si128((const __m128i*)ptr); \
|
||||
@ -1430,7 +1516,12 @@ inline void v_pack_store(schar* ptr, const v_int16x16& a)
|
||||
{ v_store_low(ptr, v_pack(a, a)); }
|
||||
|
||||
inline void v_pack_store(uchar* ptr, const v_uint16x16& a)
|
||||
{ v_store_low(ptr, v_pack(a, a)); }
|
||||
{
|
||||
const __m256i m = _mm256_set1_epi16(255);
|
||||
__m256i am = _mm256_min_epu16(a.val, m);
|
||||
am = _v256_shuffle_odd_64(_mm256_packus_epi16(am, am));
|
||||
v_store_low(ptr, v_uint8x32(am));
|
||||
}
|
||||
|
||||
inline void v_pack_u_store(uchar* ptr, const v_int16x16& a)
|
||||
{ v_store_low(ptr, v_pack_u(a, a)); }
|
||||
@ -1484,16 +1575,21 @@ inline v_int16x16 v_pack(const v_int32x8& a, const v_int32x8& b)
|
||||
{ return v_int16x16(_v256_shuffle_odd_64(_mm256_packs_epi32(a.val, b.val))); }
|
||||
|
||||
inline v_uint16x16 v_pack(const v_uint32x8& a, const v_uint32x8& b)
|
||||
{ return v_uint16x16(_v256_shuffle_odd_64(_mm256_packus_epi32(a.val, b.val))); }
|
||||
{ return v_uint16x16(_v256_shuffle_odd_64(_v256_packs_epu32(a.val, b.val))); }
|
||||
|
||||
inline v_uint16x16 v_pack_u(const v_int32x8& a, const v_int32x8& b)
|
||||
{ return v_pack(v_reinterpret_as_u32(a), v_reinterpret_as_u32(b)); }
|
||||
{ return v_uint16x16(_v256_shuffle_odd_64(_mm256_packus_epi32(a.val, b.val))); }
|
||||
|
||||
inline void v_pack_store(short* ptr, const v_int32x8& a)
|
||||
{ v_store_low(ptr, v_pack(a, a)); }
|
||||
|
||||
inline void v_pack_store(ushort* ptr, const v_uint32x8& a)
|
||||
{ v_store_low(ptr, v_pack(a, a)); }
|
||||
{
|
||||
const __m256i m = _mm256_set1_epi32(65535);
|
||||
__m256i am = _mm256_min_epu32(a.val, m);
|
||||
am = _v256_shuffle_odd_64(_mm256_packus_epi32(am, am));
|
||||
v_store_low(ptr, v_uint16x16(am));
|
||||
}
|
||||
|
||||
inline void v_pack_u_store(ushort* ptr, const v_int32x8& a)
|
||||
{ v_store_low(ptr, v_pack_u(a, a)); }
|
||||
|
||||
@ -108,7 +108,7 @@ block and to save contents of the register to memory block.
|
||||
These operations allow to reorder or recombine elements in one or multiple vectors.
|
||||
|
||||
- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
|
||||
- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
|
||||
- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high
|
||||
- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
|
||||
@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
|
||||
- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
|
||||
@ -185,11 +185,14 @@ Regular integers:
|
||||
|load, store | x | x | x | x | x | x |
|
||||
|interleave | x | x | x | x | x | x |
|
||||
|expand | x | x | x | x | x | x |
|
||||
|expand_low | x | x | x | x | x | x |
|
||||
|expand_high | x | x | x | x | x | x |
|
||||
|expand_q | x | x | | | | |
|
||||
|add, sub | x | x | x | x | x | x |
|
||||
|add_wrap, sub_wrap | x | x | x | x | | |
|
||||
|mul | | | x | x | x | x |
|
||||
|mul_expand | | | x | x | x | |
|
||||
|mul_wrap | x | x | x | x | | |
|
||||
|mul | x | x | x | x | x | x |
|
||||
|mul_expand | x | x | x | x | x | |
|
||||
|compare | x | x | x | x | x | x |
|
||||
|shift | | | x | x | x | x |
|
||||
|dotprod | | | | x | | |
|
||||
@ -680,7 +683,7 @@ OPENCV_HAL_IMPL_CMP_OP(!=)
|
||||
|
||||
//! @brief Helper macro
|
||||
//! @ingroup core_hal_intrin_impl
|
||||
#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
|
||||
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
|
||||
template<typename _Tp, int n> \
|
||||
inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
|
||||
{ \
|
||||
@ -694,12 +697,17 @@ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
|
||||
/** @brief Add values without saturation
|
||||
|
||||
For 8- and 16-bit integer values. */
|
||||
OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp)
|
||||
OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
|
||||
|
||||
/** @brief Subtract values without saturation
|
||||
|
||||
For 8- and 16-bit integer values. */
|
||||
OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp)
|
||||
OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
|
||||
|
||||
/** @brief Multiply values without saturation
|
||||
|
||||
For 8- and 16-bit integer values. */
|
||||
OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename T> inline T _absdiff(T a, T b)
|
||||
@ -1106,6 +1114,44 @@ template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Expand lower values to the wider pack type
|
||||
|
||||
Same as cv::v_expand, but return lower half of the vector.
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
int32x4 int64x2
|
||||
{A B C D} ==> {A B}
|
||||
@endcode */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_expand_low(const v_reg<_Tp, n>& a)
|
||||
{
|
||||
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
b.s[i] = a.s[i];
|
||||
return b;
|
||||
}
|
||||
|
||||
/** @brief Expand higher values to the wider pack type
|
||||
|
||||
Same as cv::v_expand_low, but expand higher half of the vector instead.
|
||||
|
||||
Scheme:
|
||||
@code
|
||||
int32x4 int64x2
|
||||
{A B C D} ==> {C D}
|
||||
@endcode */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_expand_high(const v_reg<_Tp, n>& a)
|
||||
{
|
||||
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
b.s[i] = a.s[i+(n/2)];
|
||||
return b;
|
||||
}
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
|
||||
v_reinterpret_as_int(const v_reg<_Tp, n>& a)
|
||||
|
||||
158
modules/core/include/opencv2/core/hal/intrin_forward.hpp
Normal file
158
modules/core/include/opencv2/core/hal/intrin_forward.hpp
Normal file
@ -0,0 +1,158 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef CV__SIMD_FORWARD
|
||||
#error "Need to pre-define forward width"
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
/** Types **/
|
||||
#if CV__SIMD_FORWARD == 512
|
||||
// [todo] 512
|
||||
#error "AVX512 Not implemented yet"
|
||||
#elif CV__SIMD_FORWARD == 256
|
||||
// 256
|
||||
#define __CV_VX(fun) v256_##fun
|
||||
#define __CV_V_UINT8 v_uint8x32
|
||||
#define __CV_V_INT8 v_int8x32
|
||||
#define __CV_V_UINT16 v_uint16x16
|
||||
#define __CV_V_INT16 v_int16x16
|
||||
#define __CV_V_UINT32 v_uint32x8
|
||||
#define __CV_V_INT32 v_int32x8
|
||||
#define __CV_V_UINT64 v_uint64x4
|
||||
#define __CV_V_INT64 v_int64x4
|
||||
#define __CV_V_FLOAT32 v_float32x8
|
||||
#define __CV_V_FLOAT64 v_float64x4
|
||||
struct v_uint8x32;
|
||||
struct v_int8x32;
|
||||
struct v_uint16x16;
|
||||
struct v_int16x16;
|
||||
struct v_uint32x8;
|
||||
struct v_int32x8;
|
||||
struct v_uint64x4;
|
||||
struct v_int64x4;
|
||||
struct v_float32x8;
|
||||
struct v_float64x4;
|
||||
#else
|
||||
// 128
|
||||
#define __CV_VX(fun) v_##fun
|
||||
#define __CV_V_UINT8 v_uint8x16
|
||||
#define __CV_V_INT8 v_int8x16
|
||||
#define __CV_V_UINT16 v_uint16x8
|
||||
#define __CV_V_INT16 v_int16x8
|
||||
#define __CV_V_UINT32 v_uint32x4
|
||||
#define __CV_V_INT32 v_int32x4
|
||||
#define __CV_V_UINT64 v_uint64x2
|
||||
#define __CV_V_INT64 v_int64x2
|
||||
#define __CV_V_FLOAT32 v_float32x4
|
||||
#define __CV_V_FLOAT64 v_float64x2
|
||||
struct v_uint8x16;
|
||||
struct v_int8x16;
|
||||
struct v_uint16x8;
|
||||
struct v_int16x8;
|
||||
struct v_uint32x4;
|
||||
struct v_int32x4;
|
||||
struct v_uint64x2;
|
||||
struct v_int64x2;
|
||||
struct v_float32x4;
|
||||
struct v_float64x2;
|
||||
#endif
|
||||
|
||||
/** Value reordering **/
|
||||
|
||||
// Expansion
|
||||
void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
||||
void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
||||
void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
||||
void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
||||
void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
||||
void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||
// Low Expansion
|
||||
__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
|
||||
__CV_V_INT16 v_expand_low(const __CV_V_INT8&);
|
||||
__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
|
||||
__CV_V_INT32 v_expand_low(const __CV_V_INT16&);
|
||||
__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
|
||||
__CV_V_INT64 v_expand_low(const __CV_V_INT32&);
|
||||
// High Expansion
|
||||
__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
|
||||
__CV_V_INT16 v_expand_high(const __CV_V_INT8&);
|
||||
__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
|
||||
__CV_V_INT32 v_expand_high(const __CV_V_INT16&);
|
||||
__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
|
||||
__CV_V_INT64 v_expand_high(const __CV_V_INT32&);
|
||||
// Load & Low Expansion
|
||||
__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
|
||||
__CV_V_INT16 __CV_VX(load_expand)(const schar*);
|
||||
__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
|
||||
__CV_V_INT32 __CV_VX(load_expand)(const short*);
|
||||
__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
|
||||
__CV_V_INT64 __CV_VX(load_expand)(const int*);
|
||||
// Load lower 8-bit and expand into 32-bit
|
||||
__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
|
||||
__CV_V_INT32 __CV_VX(load_expand_q)(const schar*);
|
||||
|
||||
// Saturating Pack
|
||||
__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
||||
__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||
__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
|
||||
__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&);
|
||||
// Non-saturating Pack
|
||||
__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
|
||||
__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&);
|
||||
// Pack signed integers with unsigned saturation
|
||||
__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||
__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
|
||||
|
||||
/** Arithmetic, bitwise and comparison operations **/
|
||||
|
||||
// Non-saturating multiply
|
||||
#if CV_VSX
|
||||
template<typename Tvec>
|
||||
Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
|
||||
#else
|
||||
__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&);
|
||||
__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&);
|
||||
__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
||||
__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||
#endif
|
||||
|
||||
// Multiply and expand
|
||||
#if CV_VSX
|
||||
template<typename Tvec, typename Twvec>
|
||||
void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
|
||||
#else
|
||||
void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
||||
void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
||||
void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
||||
void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
||||
void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
||||
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||
#endif
|
||||
|
||||
/** Cleanup **/
|
||||
#undef CV__SIMD_FORWARD
|
||||
#undef __CV_VX
|
||||
#undef __CV_V_UINT8
|
||||
#undef __CV_V_INT8
|
||||
#undef __CV_V_UINT16
|
||||
#undef __CV_V_INT16
|
||||
#undef __CV_V_UINT32
|
||||
#undef __CV_V_INT32
|
||||
#undef __CV_V_UINT64
|
||||
#undef __CV_V_INT64
|
||||
#undef __CV_V_FLOAT32
|
||||
#undef __CV_V_FLOAT64
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
} // cv::
|
||||
@ -435,10 +435,8 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
|
||||
@ -476,6 +474,37 @@ inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
|
||||
}
|
||||
#endif
|
||||
|
||||
// saturating multiply 8-bit, 16-bit
|
||||
#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec) \
|
||||
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
_Tpwvec c, d; \
|
||||
v_mul_expand(a, b, c, d); \
|
||||
return v_pack(c, d); \
|
||||
} \
|
||||
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
|
||||
{ a = a * b; return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int8x16, v_int16x8)
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint8x16, v_uint16x8)
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int16x8, v_int32x4)
|
||||
OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint16x8, v_uint32x4)
|
||||
|
||||
// Multiply and expand
|
||||
inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
|
||||
v_int16x8& c, v_int16x8& d)
|
||||
{
|
||||
c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
|
||||
d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
|
||||
v_uint16x8& c, v_uint16x8& d)
|
||||
{
|
||||
c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
|
||||
d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
|
||||
v_int32x4& c, v_int32x4& d)
|
||||
{
|
||||
@ -714,6 +743,10 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_mul_wrap, vmulq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16)
|
||||
|
||||
// TODO: absdiff for signed integers
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
|
||||
@ -1056,6 +1089,14 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
|
||||
b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \
|
||||
} \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
|
||||
|
||||
@ -59,6 +59,8 @@ namespace cv
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
///////// Types ////////////
|
||||
|
||||
struct v_uint8x16
|
||||
{
|
||||
typedef uchar lane_type;
|
||||
@ -436,13 +438,7 @@ inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b)
|
||||
}
|
||||
|
||||
inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
|
||||
{
|
||||
__m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768);
|
||||
__m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32);
|
||||
__m128i b1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, b.val), maxval32, b.val), delta32);
|
||||
__m128i r = _mm_packs_epi32(a1, b1);
|
||||
return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
|
||||
}
|
||||
{ return v_uint16x8(_v128_packs_epu32(a.val, b.val)); }
|
||||
|
||||
inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
|
||||
{
|
||||
@ -678,14 +674,14 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint16x8, _mm_mullo_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int16x8, _mm_mullo_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint32x4, _v128_mullo_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int32x4, _v128_mullo_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps)
|
||||
@ -699,35 +695,49 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64)
|
||||
|
||||
inline v_uint32x4 operator * (const v_uint32x4& a, const v_uint32x4& b)
|
||||
// saturating multiply 8-bit, 16-bit
|
||||
#define OPENCV_HAL_IMPL_SSE_MUL_SAT(_Tpvec, _Tpwvec) \
|
||||
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
_Tpwvec c, d; \
|
||||
v_mul_expand(a, b, c, d); \
|
||||
return v_pack(c, d); \
|
||||
} \
|
||||
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
|
||||
{ a = a * b; return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16, v_int16x8)
|
||||
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4)
|
||||
OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8, v_int32x4)
|
||||
|
||||
inline v_uint8x16 operator * (const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
__m128i c0 = _mm_mul_epu32(a.val, b.val);
|
||||
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
|
||||
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
|
||||
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
|
||||
return v_uint32x4(_mm_unpacklo_epi64(d0, d1));
|
||||
v_uint16x8 c, d;
|
||||
v_mul_expand(a, b, c, d);
|
||||
return v_pack_u(v_reinterpret_as_s16(c), v_reinterpret_as_s16(d));
|
||||
}
|
||||
inline v_int32x4 operator * (const v_int32x4& a, const v_int32x4& b)
|
||||
inline v_uint8x16& operator *= (v_uint8x16& a, const v_uint8x16& b)
|
||||
{ a = a * b; return a; }
|
||||
|
||||
// Multiply and expand
|
||||
inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
|
||||
v_uint16x8& c, v_uint16x8& d)
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
return v_int32x4(_mm_mullo_epi32(a.val, b.val));
|
||||
#else
|
||||
__m128i c0 = _mm_mul_epu32(a.val, b.val);
|
||||
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
|
||||
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
|
||||
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
|
||||
return v_int32x4(_mm_unpacklo_epi64(d0, d1));
|
||||
#endif
|
||||
v_uint16x8 a0, a1, b0, b1;
|
||||
v_expand(a, a0, a1);
|
||||
v_expand(b, b0, b1);
|
||||
c = v_mul_wrap(a0, b0);
|
||||
d = v_mul_wrap(a1, b1);
|
||||
}
|
||||
inline v_uint32x4& operator *= (v_uint32x4& a, const v_uint32x4& b)
|
||||
|
||||
inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
|
||||
v_int16x8& c, v_int16x8& d)
|
||||
{
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
inline v_int32x4& operator *= (v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
a = a * b;
|
||||
return a;
|
||||
v_int16x8 a0, a1, b0, b1;
|
||||
v_expand(a, a0, a1);
|
||||
v_expand(b, b0, b1);
|
||||
c = v_mul_wrap(a0, b0);
|
||||
d = v_mul_wrap(a1, b1);
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
|
||||
@ -1018,6 +1028,22 @@ OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_mul_wrap, _mm_mullo_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_mul_wrap, _mm_mullo_epi16)
|
||||
|
||||
inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
__m128i ad = _mm_srai_epi16(a.val, 8);
|
||||
__m128i bd = _mm_srai_epi16(b.val, 8);
|
||||
__m128i p0 = _mm_mullo_epi16(a.val, b.val); // even
|
||||
__m128i p1 = _mm_slli_epi16(_mm_mullo_epi16(ad, bd), 8); // odd
|
||||
const __m128i b01 = _mm_set1_epi32(0xFF00FF00);
|
||||
return v_uint8x16(_v128_blendv_epi8(p0, p1, b01));
|
||||
}
|
||||
inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \
|
||||
inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \
|
||||
@ -1502,70 +1528,39 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
|
||||
OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
|
||||
inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
|
||||
{ \
|
||||
__m128i z = _mm_setzero_si128(); \
|
||||
b0.val = _mm_unpacklo_##suffix(a.val, z); \
|
||||
b1.val = _mm_unpackhi_##suffix(a.val, z); \
|
||||
} \
|
||||
inline _Tpwuvec v_load_expand(const _Tpu* ptr) \
|
||||
{ \
|
||||
__m128i z = _mm_setzero_si128(); \
|
||||
return _Tpwuvec(_mm_unpacklo_##suffix(_mm_loadl_epi64((const __m128i*)ptr), z)); \
|
||||
} \
|
||||
inline void v_expand(const _Tpsvec& a, _Tpwsvec& b0, _Tpwsvec& b1) \
|
||||
{ \
|
||||
b0.val = _mm_srai_##wsuffix(_mm_unpacklo_##suffix(a.val, a.val), shift); \
|
||||
b1.val = _mm_srai_##wsuffix(_mm_unpackhi_##suffix(a.val, a.val), shift); \
|
||||
} \
|
||||
inline _Tpwsvec v_load_expand(const _Tps* ptr) \
|
||||
{ \
|
||||
__m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
|
||||
return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \
|
||||
}
|
||||
/* Expand */
|
||||
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \
|
||||
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
{ \
|
||||
b0.val = intrin(a.val); \
|
||||
b1.val = __CV_CAT(intrin, _high)(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ return _Tpwvec(intrin(a.val)); } \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ \
|
||||
__m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
|
||||
return _Tpwvec(intrin(a)); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, _v128_cvtepu8_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_int8x16, v_int16x8, schar, _v128_cvtepi8_epi16)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, _v128_cvtepu16_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_int16x8, v_int32x4, short, _v128_cvtepi16_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_uint32x4, v_uint64x2, unsigned, _v128_cvtepu32_epi64)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND(v_int32x4, v_int64x2, int, _v128_cvtepi32_epi64)
|
||||
|
||||
inline void v_expand(const v_uint32x4& a, v_uint64x2& b0, v_uint64x2& b1)
|
||||
{
|
||||
__m128i z = _mm_setzero_si128();
|
||||
b0.val = _mm_unpacklo_epi32(a.val, z);
|
||||
b1.val = _mm_unpackhi_epi32(a.val, z);
|
||||
}
|
||||
inline v_uint64x2 v_load_expand(const unsigned* ptr)
|
||||
{
|
||||
__m128i z = _mm_setzero_si128();
|
||||
return v_uint64x2(_mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i*)ptr), z));
|
||||
}
|
||||
inline void v_expand(const v_int32x4& a, v_int64x2& b0, v_int64x2& b1)
|
||||
{
|
||||
__m128i s = _mm_srai_epi32(a.val, 31);
|
||||
b0.val = _mm_unpacklo_epi32(a.val, s);
|
||||
b1.val = _mm_unpackhi_epi32(a.val, s);
|
||||
}
|
||||
inline v_int64x2 v_load_expand(const int* ptr)
|
||||
{
|
||||
__m128i a = _mm_loadl_epi64((const __m128i*)ptr);
|
||||
__m128i s = _mm_srai_epi32(a, 31);
|
||||
return v_int64x2(_mm_unpacklo_epi32(a, s));
|
||||
}
|
||||
#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin) \
|
||||
inline _Tpvec v_load_expand_q(const _Tp* ptr) \
|
||||
{ \
|
||||
__m128i a = _mm_cvtsi32_si128(*(const int*)ptr); \
|
||||
return _Tpvec(intrin(a)); \
|
||||
}
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{
|
||||
__m128i z = _mm_setzero_si128();
|
||||
__m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
|
||||
return v_uint32x4(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{
|
||||
__m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
|
||||
a = _mm_unpacklo_epi8(a, a);
|
||||
a = _mm_unpacklo_epi8(a, a);
|
||||
return v_int32x4(_mm_srai_epi32(a, 24));
|
||||
}
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_uint32x4, uchar, _v128_cvtepu8_epi32)
|
||||
OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_int32x4, schar, _v128_cvtepi8_epi32)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \
|
||||
inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
|
||||
|
||||
167
modules/core/include/opencv2/core/hal/intrin_sse_em.hpp
Normal file
167
modules/core/include/opencv2/core/hal/intrin_sse_em.hpp
Normal file
@ -0,0 +1,167 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
#define OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
|
||||
inline tp _v128_##fun(const tp& a) \
|
||||
{ return _mm_##fun(a); }
|
||||
|
||||
#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
|
||||
inline tp _v128_##fun(const tp& a, const tp& b) \
|
||||
{ return _mm_##fun(a, b); }
|
||||
|
||||
#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
|
||||
inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
|
||||
{ return _mm_##fun(a, b, c); }
|
||||
|
||||
///////////////////////////// XOP /////////////////////////////
|
||||
|
||||
// [todo] define CV_XOP
|
||||
#if 1 // CV_XOP
|
||||
inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
|
||||
{
|
||||
const __m128i delta = _mm_set1_epi32((int)0x80000000);
|
||||
return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
|
||||
}
|
||||
// wrapping XOP
|
||||
#else
|
||||
OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
|
||||
#endif // !CV_XOP
|
||||
|
||||
///////////////////////////// SSE4.1 /////////////////////////////
|
||||
|
||||
#if !CV_SSE4_1
|
||||
|
||||
/** Swizzle **/
|
||||
inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
|
||||
{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
|
||||
|
||||
/** Convert **/
|
||||
// 8 >> 16
|
||||
inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi8(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
|
||||
{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
|
||||
// 8 >> 32
|
||||
inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
|
||||
{
|
||||
__m128i r = _mm_unpacklo_epi8(a, a);
|
||||
r = _mm_unpacklo_epi8(r, r);
|
||||
return _mm_srai_epi32(r, 24);
|
||||
}
|
||||
// 16 >> 32
|
||||
inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi16(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
|
||||
{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
|
||||
// 32 >> 64
|
||||
inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpacklo_epi32(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
|
||||
{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
|
||||
|
||||
/** Arithmetic **/
|
||||
inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
|
||||
{
|
||||
__m128i c0 = _mm_mul_epu32(a, b);
|
||||
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
|
||||
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
|
||||
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
|
||||
return _mm_unpacklo_epi64(d0, d1);
|
||||
}
|
||||
|
||||
/** Math **/
|
||||
inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
|
||||
{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
|
||||
|
||||
// wrapping SSE4.1
|
||||
#else
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
|
||||
OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
|
||||
#endif // !CV_SSE4_1
|
||||
|
||||
///////////////////////////// Revolutionary /////////////////////////////
|
||||
|
||||
/** Convert **/
|
||||
// 16 << 8
|
||||
inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpackhi_epi8(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
|
||||
{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
|
||||
// 32 << 16
|
||||
inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpackhi_epi16(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
|
||||
{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
|
||||
// 64 << 32
|
||||
inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
|
||||
{
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
return _mm_unpackhi_epi32(a, z);
|
||||
}
|
||||
inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
|
||||
{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
|
||||
|
||||
/** Miscellaneous **/
|
||||
inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
|
||||
{
|
||||
const __m128i m = _mm_set1_epi32(65535);
|
||||
__m128i am = _v128_min_epu32(a, m);
|
||||
__m128i bm = _v128_min_epu32(b, m);
|
||||
#if CV_SSE4_1
|
||||
return _mm_packus_epi32(am, bm);
|
||||
#else
|
||||
const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
|
||||
am = _mm_sub_epi32(am, d);
|
||||
bm = _mm_sub_epi32(bm, d);
|
||||
am = _mm_packs_epi32(am, bm);
|
||||
return _mm_sub_epi16(am, nd);
|
||||
#endif
|
||||
}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
} // cv::
|
||||
|
||||
#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
@ -315,6 +315,10 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
b0.val = fh(a.val); \
|
||||
b1.val = fl(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ return _Tpwvec(fh(a.val)); } \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ return _Tpwvec(fl(a.val)); } \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
|
||||
|
||||
@ -418,10 +422,8 @@ OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint16x8, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int16x8, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
|
||||
@ -441,16 +443,30 @@ OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d)
|
||||
// saturating multiply
|
||||
#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec) \
|
||||
inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
_Tpwvec c, d; \
|
||||
v_mul_expand(a, b, c, d); \
|
||||
return v_pack(c, d); \
|
||||
} \
|
||||
inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
|
||||
{ a = a * b; return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int8x16, v_int16x8)
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint8x16, v_uint16x8)
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int16x8, v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint16x8, v_uint32x4)
|
||||
|
||||
template<typename Tvec, typename Twvec>
|
||||
inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackh(a.val), vec_unpackh(b.val));
|
||||
d.val = vec_mul(vec_unpackl(a.val), vec_unpackl(b.val));
|
||||
}
|
||||
inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, v_uint32x4& c, v_uint32x4& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
Twvec p0 = Twvec(vec_mule(a.val, b.val));
|
||||
Twvec p1 = Twvec(vec_mulo(a.val, b.val));
|
||||
v_zip(p0, p1, c, d);
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
@ -459,17 +475,17 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
return v_int16x8(vec_packs(
|
||||
vec_sra(vec_mul(vec_unpackh(a.val), vec_unpackh(b.val)), vec_uint4_sp(16)),
|
||||
vec_sra(vec_mul(vec_unpackl(a.val), vec_unpackl(b.val)), vec_uint4_sp(16))
|
||||
));
|
||||
vec_int4 p0 = vec_mule(a.val, b.val);
|
||||
vec_int4 p1 = vec_mulo(a.val, b.val);
|
||||
static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
|
||||
return v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
return v_uint16x8(vec_packs(
|
||||
vec_sr(vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val)), vec_uint4_sp(16)),
|
||||
vec_sr(vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val)), vec_uint4_sp(16))
|
||||
));
|
||||
vec_uint4 p0 = vec_mule(a.val, b.val);
|
||||
vec_uint4 p1 = vec_mulo(a.val, b.val);
|
||||
static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
|
||||
return v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm));
|
||||
}
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
@ -480,6 +496,7 @@ inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul)
|
||||
|
||||
/** Bitwise shifts **/
|
||||
#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \
|
||||
|
||||
@ -37,6 +37,10 @@ namespace trace {
|
||||
//! @cond IGNORED
|
||||
#define CV_TRACE_NS cv::utils::trace
|
||||
|
||||
#if !defined(OPENCV_DISABLE_TRACE) && defined(__EMSCRIPTEN__)
|
||||
#define OPENCV_DISABLE_TRACE 1
|
||||
#endif
|
||||
|
||||
namespace details {
|
||||
|
||||
#ifndef __OPENCV_TRACE
|
||||
|
||||
@ -130,19 +130,21 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \
|
||||
# undef vec_mul
|
||||
# endif
|
||||
/*
|
||||
* there's no a direct instruction for supporting 16-bit multiplication in ISA 2.07,
|
||||
* there's no a direct instruction for supporting 8-bit, 16-bit multiplication in ISA 2.07,
|
||||
* XLC Implement it by using instruction "multiply even", "multiply odd" and "permute"
|
||||
* todo: Do I need to support 8-bit ?
|
||||
**/
|
||||
# define VSX_IMPL_MULH(Tvec, Tcast) \
|
||||
VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b) \
|
||||
{ \
|
||||
static const vec_uchar16 even_perm = {0, 1, 16, 17, 4, 5, 20, 21, \
|
||||
8, 9, 24, 25, 12, 13, 28, 29}; \
|
||||
return vec_perm(Tcast(vec_mule(a, b)), Tcast(vec_mulo(a, b)), even_perm); \
|
||||
# define VSX_IMPL_MULH(Tvec, cperm) \
|
||||
VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b) \
|
||||
{ \
|
||||
static const vec_uchar16 ev_od = {cperm}; \
|
||||
return vec_perm((Tvec)vec_mule(a, b), (Tvec)vec_mulo(a, b), ev_od); \
|
||||
}
|
||||
VSX_IMPL_MULH(vec_short8, vec_short8_c)
|
||||
VSX_IMPL_MULH(vec_ushort8, vec_ushort8_c)
|
||||
#define VSX_IMPL_MULH_P16 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
|
||||
VSX_IMPL_MULH(vec_char16, VSX_IMPL_MULH_P16)
|
||||
VSX_IMPL_MULH(vec_uchar16, VSX_IMPL_MULH_P16)
|
||||
#define VSX_IMPL_MULH_P8 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29
|
||||
VSX_IMPL_MULH(vec_short8, VSX_IMPL_MULH_P8)
|
||||
VSX_IMPL_MULH(vec_ushort8, VSX_IMPL_MULH_P8)
|
||||
// vmuluwm can be used for unsigned or signed integers, that's what they said
|
||||
VSX_IMPL_2VRG(vec_int4, vec_int4, vmuluwm, vec_mul)
|
||||
VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul)
|
||||
|
||||
@ -90,20 +90,21 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
|
||||
const uchar* src = (const uchar*)_src;
|
||||
uchar* dst = (uchar*)_dst;
|
||||
int x = 0;
|
||||
#if CV_SIMD128
|
||||
#if CV_SIMD
|
||||
{
|
||||
v_uint8x16 v_zero = v_setzero_u8();
|
||||
v_uint8 v_zero = vx_setzero_u8();
|
||||
|
||||
for( ; x <= size.width - 16; x += 16 )
|
||||
for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes )
|
||||
{
|
||||
v_uint8x16 v_src = v_load(src + x),
|
||||
v_dst = v_load(dst + x),
|
||||
v_nmask = v_load(mask + x) == v_zero;
|
||||
v_uint8 v_src = vx_load(src + x),
|
||||
v_dst = vx_load(dst + x),
|
||||
v_nmask = vx_load(mask + x) == v_zero;
|
||||
|
||||
v_dst = v_select(v_nmask, v_dst, v_src);
|
||||
v_store(dst + x, v_dst);
|
||||
}
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for( ; x < size.width; x++ )
|
||||
if( mask[x] )
|
||||
@ -121,25 +122,26 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
|
||||
const ushort* src = (const ushort*)_src;
|
||||
ushort* dst = (ushort*)_dst;
|
||||
int x = 0;
|
||||
#if CV_SIMD128
|
||||
#if CV_SIMD
|
||||
{
|
||||
v_uint8x16 v_zero = v_setzero_u8();
|
||||
v_uint8 v_zero = vx_setzero_u8();
|
||||
|
||||
for( ; x <= size.width - 16; x += 16 )
|
||||
for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes )
|
||||
{
|
||||
v_uint16x8 v_src1 = v_load(src + x), v_src2 = v_load(src + x + 8),
|
||||
v_dst1 = v_load(dst + x), v_dst2 = v_load(dst + x + 8);
|
||||
v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + v_uint16::nlanes),
|
||||
v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + v_uint16::nlanes);
|
||||
|
||||
v_uint8x16 v_nmask1, v_nmask2;
|
||||
v_uint8x16 v_nmask = v_load(mask + x) == v_zero;
|
||||
v_uint8 v_nmask1, v_nmask2;
|
||||
v_uint8 v_nmask = vx_load(mask + x) == v_zero;
|
||||
v_zip(v_nmask, v_nmask, v_nmask1, v_nmask2);
|
||||
|
||||
v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1);
|
||||
v_dst2 = v_select(v_reinterpret_as_u16(v_nmask2), v_dst2, v_src2);
|
||||
v_store(dst + x, v_dst1);
|
||||
v_store(dst + x + 8, v_dst2);
|
||||
v_store(dst + x + v_uint16::nlanes, v_dst2);
|
||||
}
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for( ; x < size.width; x++ )
|
||||
if( mask[x] )
|
||||
|
||||
@ -277,40 +277,42 @@ template<typename T> struct VBLAS
|
||||
int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
|
||||
};
|
||||
|
||||
#if CV_SIMD128
|
||||
#if CV_SIMD
|
||||
template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
|
||||
{
|
||||
if( n < 8 )
|
||||
if( n < 2*v_float32::nlanes )
|
||||
return 0;
|
||||
int k = 0;
|
||||
v_float32x4 s0 = v_setzero_f32();
|
||||
for( ; k <= n - v_float32x4::nlanes; k += v_float32x4::nlanes )
|
||||
v_float32 s0 = vx_setzero_f32();
|
||||
for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
|
||||
{
|
||||
v_float32x4 a0 = v_load(a + k);
|
||||
v_float32x4 b0 = v_load(b + k);
|
||||
v_float32 a0 = vx_load(a + k);
|
||||
v_float32 b0 = vx_load(b + k);
|
||||
|
||||
s0 += a0 * b0;
|
||||
}
|
||||
*result = v_reduce_sum(s0);
|
||||
vx_cleanup();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
|
||||
{
|
||||
if( n < 4 )
|
||||
if( n < v_float32::nlanes)
|
||||
return 0;
|
||||
int k = 0;
|
||||
v_float32x4 c4 = v_setall_f32(c), s4 = v_setall_f32(s);
|
||||
for( ; k <= n - v_float32x4::nlanes; k += v_float32x4::nlanes )
|
||||
v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
|
||||
for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
|
||||
{
|
||||
v_float32x4 a0 = v_load(a + k);
|
||||
v_float32x4 b0 = v_load(b + k);
|
||||
v_float32x4 t0 = (a0 * c4) + (b0 * s4);
|
||||
v_float32x4 t1 = (b0 * c4) - (a0 * s4);
|
||||
v_float32 a0 = vx_load(a + k);
|
||||
v_float32 b0 = vx_load(b + k);
|
||||
v_float32 t0 = (a0 * c4) + (b0 * s4);
|
||||
v_float32 t1 = (b0 * c4) - (a0 * s4);
|
||||
v_store(a + k, t0);
|
||||
v_store(b + k, t1);
|
||||
}
|
||||
vx_cleanup();
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -318,17 +320,17 @@ template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, f
|
||||
template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
|
||||
float* anorm, float* bnorm) const
|
||||
{
|
||||
if( n < 4 )
|
||||
if( n < v_float32::nlanes)
|
||||
return 0;
|
||||
int k = 0;
|
||||
v_float32x4 c4 = v_setall_f32(c), s4 = v_setall_f32(s);
|
||||
v_float32x4 sa = v_setzero_f32(), sb = v_setzero_f32();
|
||||
for( ; k <= n - v_float32x4::nlanes; k += v_float32x4::nlanes )
|
||||
v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
|
||||
v_float32 sa = vx_setzero_f32(), sb = vx_setzero_f32();
|
||||
for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
|
||||
{
|
||||
v_float32x4 a0 = v_load(a + k);
|
||||
v_float32x4 b0 = v_load(b + k);
|
||||
v_float32x4 t0 = (a0 * c4) + (b0 * s4);
|
||||
v_float32x4 t1 = (b0 * c4) - (a0 * s4);
|
||||
v_float32 a0 = vx_load(a + k);
|
||||
v_float32 b0 = vx_load(b + k);
|
||||
v_float32 t0 = (a0 * c4) + (b0 * s4);
|
||||
v_float32 t1 = (b0 * c4) - (a0 * s4);
|
||||
v_store(a + k, t0);
|
||||
v_store(b + k, t1);
|
||||
sa += t0 + t0;
|
||||
@ -336,26 +338,28 @@ template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c,
|
||||
}
|
||||
*anorm = v_reduce_sum(sa);
|
||||
*bnorm = v_reduce_sum(sb);
|
||||
vx_cleanup();
|
||||
return k;
|
||||
}
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
#if CV_SIMD_64F
|
||||
template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
|
||||
{
|
||||
if( n < 4 )
|
||||
if( n < 2*v_float64::nlanes )
|
||||
return 0;
|
||||
int k = 0;
|
||||
v_float64x2 s0 = v_setzero_f64();
|
||||
for( ; k <= n - v_float64x2::nlanes; k += v_float64x2::nlanes )
|
||||
v_float64 s0 = vx_setzero_f64();
|
||||
for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
|
||||
{
|
||||
v_float64x2 a0 = v_load(a + k);
|
||||
v_float64x2 b0 = v_load(b + k);
|
||||
v_float64 a0 = vx_load(a + k);
|
||||
v_float64 b0 = vx_load(b + k);
|
||||
|
||||
s0 += a0 * b0;
|
||||
}
|
||||
double sbuf[2];
|
||||
v_store(sbuf, s0);
|
||||
*result = sbuf[0] + sbuf[1];
|
||||
vx_cleanup();
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -363,16 +367,17 @@ template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n
|
||||
template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
|
||||
{
|
||||
int k = 0;
|
||||
v_float64x2 c2 = v_setall_f64(c), s2 = v_setall_f64(s);
|
||||
for( ; k <= n - v_float64x2::nlanes; k += v_float64x2::nlanes )
|
||||
v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
|
||||
for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
|
||||
{
|
||||
v_float64x2 a0 = v_load(a + k);
|
||||
v_float64x2 b0 = v_load(b + k);
|
||||
v_float64x2 t0 = (a0 * c2) + (b0 * s2);
|
||||
v_float64x2 t1 = (b0 * c2) - (a0 * s2);
|
||||
v_float64 a0 = vx_load(a + k);
|
||||
v_float64 b0 = vx_load(b + k);
|
||||
v_float64 t0 = (a0 * c2) + (b0 * s2);
|
||||
v_float64 t1 = (b0 * c2) - (a0 * s2);
|
||||
v_store(a + k, t0);
|
||||
v_store(b + k, t1);
|
||||
}
|
||||
vx_cleanup();
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -381,14 +386,14 @@ template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double
|
||||
double* anorm, double* bnorm) const
|
||||
{
|
||||
int k = 0;
|
||||
v_float64x2 c2 = v_setall_f64(c), s2 = v_setall_f64(s);
|
||||
v_float64x2 sa = v_setzero_f64(), sb = v_setzero_f64();
|
||||
for( ; k <= n - v_float64x2::nlanes; k += v_float64x2::nlanes )
|
||||
v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
|
||||
v_float64 sa = vx_setzero_f64(), sb = vx_setzero_f64();
|
||||
for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
|
||||
{
|
||||
v_float64x2 a0 = v_load(a + k);
|
||||
v_float64x2 b0 = v_load(b + k);
|
||||
v_float64x2 t0 = (a0 * c2) + (b0 * s2);
|
||||
v_float64x2 t1 = (b0 * c2) - (a0 * s2);
|
||||
v_float64 a0 = vx_load(a + k);
|
||||
v_float64 b0 = vx_load(b + k);
|
||||
v_float64 t0 = (a0 * c2) + (b0 * s2);
|
||||
v_float64 t1 = (b0 * c2) - (a0 * s2);
|
||||
v_store(a + k, t0);
|
||||
v_store(b + k, t1);
|
||||
sa += t0 * t0;
|
||||
@ -401,8 +406,8 @@ template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double
|
||||
*bnorm = bbuf[0] + bbuf[1];
|
||||
return k;
|
||||
}
|
||||
#endif //CV_SIMD128_64F
|
||||
#endif //CV_SIMD128
|
||||
#endif //CV_SIMD_64F
|
||||
#endif //CV_SIMD
|
||||
|
||||
template<typename _Tp> void
|
||||
JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
|
||||
|
||||
@ -607,17 +607,15 @@ void polarToCart( InputArray src1, InputArray src2,
|
||||
{
|
||||
k = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
if( hasSIMD128() )
|
||||
#if CV_SIMD
|
||||
int cWidth = v_float32::nlanes;
|
||||
for( ; k <= len - cWidth; k += cWidth )
|
||||
{
|
||||
int cWidth = v_float32x4::nlanes;
|
||||
for( ; k <= len - cWidth; k += cWidth )
|
||||
{
|
||||
v_float32x4 v_m = v_load(mag + k);
|
||||
v_store(x + k, v_load(x + k) * v_m);
|
||||
v_store(y + k, v_load(y + k) * v_m);
|
||||
}
|
||||
v_float32 v_m = vx_load(mag + k);
|
||||
v_store(x + k, vx_load(x + k) * v_m);
|
||||
v_store(y + k, vx_load(y + k) * v_m);
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
|
||||
for( ; k < len; k++ )
|
||||
@ -736,7 +734,7 @@ struct iPow_SIMD
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_SIMD128
|
||||
#if CV_SIMD
|
||||
|
||||
template <>
|
||||
struct iPow_SIMD<uchar, int>
|
||||
@ -744,13 +742,13 @@ struct iPow_SIMD<uchar, int>
|
||||
int operator() ( const uchar * src, uchar * dst, int len, int power )
|
||||
{
|
||||
int i = 0;
|
||||
v_uint32x4 v_1 = v_setall_u32(1u);
|
||||
v_uint32 v_1 = vx_setall_u32(1u);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes)
|
||||
{
|
||||
v_uint32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_uint16x8 v = v_load_expand(src + i);
|
||||
v_uint32x4 v_b1, v_b2;
|
||||
v_uint32 v_a1 = v_1, v_a2 = v_1;
|
||||
v_uint16 v = vx_load_expand(src + i);
|
||||
v_uint32 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
@ -772,6 +770,7 @@ struct iPow_SIMD<uchar, int>
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_pack_store(dst + i, v);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -783,13 +782,13 @@ struct iPow_SIMD<schar, int>
|
||||
int operator() ( const schar * src, schar * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_int32x4 v_1 = v_setall_s32(1);
|
||||
v_int32 v_1 = vx_setall_s32(1);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes)
|
||||
{
|
||||
v_int32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int16x8 v = v_load_expand(src + i);
|
||||
v_int32x4 v_b1, v_b2;
|
||||
v_int32 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int16 v = vx_load_expand(src + i);
|
||||
v_int32 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
@ -811,6 +810,7 @@ struct iPow_SIMD<schar, int>
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_pack_store(dst + i, v);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -822,13 +822,13 @@ struct iPow_SIMD<ushort, int>
|
||||
int operator() ( const ushort * src, ushort * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_uint32x4 v_1 = v_setall_u32(1u);
|
||||
v_uint32 v_1 = vx_setall_u32(1u);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes)
|
||||
{
|
||||
v_uint32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_uint16x8 v = v_load(src + i);
|
||||
v_uint32x4 v_b1, v_b2;
|
||||
v_uint32 v_a1 = v_1, v_a2 = v_1;
|
||||
v_uint16 v = vx_load(src + i);
|
||||
v_uint32 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
@ -850,6 +850,7 @@ struct iPow_SIMD<ushort, int>
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_store(dst + i, v);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -861,13 +862,13 @@ struct iPow_SIMD<short, int>
|
||||
int operator() ( const short * src, short * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_int32x4 v_1 = v_setall_s32(1);
|
||||
v_int32 v_1 = vx_setall_s32(1);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes)
|
||||
{
|
||||
v_int32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int16x8 v = v_load(src + i);
|
||||
v_int32x4 v_b1, v_b2;
|
||||
v_int32 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int16 v = vx_load(src + i);
|
||||
v_int32 v_b1, v_b2;
|
||||
v_expand(v, v_b1, v_b2);
|
||||
int p = power;
|
||||
|
||||
@ -889,6 +890,7 @@ struct iPow_SIMD<short, int>
|
||||
v = v_pack(v_a1, v_a2);
|
||||
v_store(dst + i, v);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -900,12 +902,12 @@ struct iPow_SIMD<int, int>
|
||||
int operator() ( const int * src, int * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_int32x4 v_1 = v_setall_s32(1);
|
||||
v_int32 v_1 = vx_setall_s32(1);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
for ( ; i <= len - v_int32::nlanes*2; i += v_int32::nlanes*2)
|
||||
{
|
||||
v_int32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
|
||||
v_int32 v_a1 = v_1, v_a2 = v_1;
|
||||
v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_int32::nlanes);
|
||||
int p = power;
|
||||
|
||||
while( p > 1 )
|
||||
@ -924,8 +926,9 @@ struct iPow_SIMD<int, int>
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v_store(dst + i, v_a1);
|
||||
v_store(dst + i + 4, v_a2);
|
||||
v_store(dst + i + v_int32::nlanes, v_a2);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -937,12 +940,12 @@ struct iPow_SIMD<float, float>
|
||||
int operator() ( const float * src, float * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_float32x4 v_1 = v_setall_f32(1.f);
|
||||
v_float32 v_1 = vx_setall_f32(1.f);
|
||||
|
||||
for ( ; i <= len - 8; i += 8)
|
||||
for ( ; i <= len - v_float32::nlanes*2; i += v_float32::nlanes*2)
|
||||
{
|
||||
v_float32x4 v_a1 = v_1, v_a2 = v_1;
|
||||
v_float32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
|
||||
v_float32 v_a1 = v_1, v_a2 = v_1;
|
||||
v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float32::nlanes);
|
||||
int p = std::abs(power);
|
||||
if( power < 0 )
|
||||
{
|
||||
@ -966,26 +969,27 @@ struct iPow_SIMD<float, float>
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v_store(dst + i, v_a1);
|
||||
v_store(dst + i + 4, v_a2);
|
||||
v_store(dst + i + v_float32::nlanes, v_a2);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
#if CV_SIMD_64F
|
||||
template <>
|
||||
struct iPow_SIMD<double, double>
|
||||
{
|
||||
int operator() ( const double * src, double * dst, int len, int power)
|
||||
{
|
||||
int i = 0;
|
||||
v_float64x2 v_1 = v_setall_f64(1.);
|
||||
v_float64 v_1 = vx_setall_f64(1.);
|
||||
|
||||
for ( ; i <= len - 4; i += 4)
|
||||
for ( ; i <= len - v_float64::nlanes*2; i += v_float64::nlanes*2)
|
||||
{
|
||||
v_float64x2 v_a1 = v_1, v_a2 = v_1;
|
||||
v_float64x2 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 2);
|
||||
v_float64 v_a1 = v_1, v_a2 = v_1;
|
||||
v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float64::nlanes);
|
||||
int p = std::abs(power);
|
||||
if( power < 0 )
|
||||
{
|
||||
@ -1009,8 +1013,9 @@ struct iPow_SIMD<double, double>
|
||||
v_a2 *= v_b2;
|
||||
|
||||
v_store(dst + i, v_a1);
|
||||
v_store(dst + i + 2, v_a2);
|
||||
v_store(dst + i + v_float64::nlanes, v_a2);
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -1272,10 +1277,17 @@ void pow( InputArray _src, double power, OutputArray _dst )
|
||||
Cv64suf inf64, nan64;
|
||||
float* fbuf = 0;
|
||||
double* dbuf = 0;
|
||||
#ifndef __EMSCRIPTEN__
|
||||
inf32.i = 0x7f800000;
|
||||
nan32.i = 0x7fffffff;
|
||||
inf64.i = CV_BIG_INT(0x7FF0000000000000);
|
||||
nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF);
|
||||
#else
|
||||
inf32.f = std::numeric_limits<float>::infinity();
|
||||
nan32.f = std::numeric_limits<float>::quiet_NaN();
|
||||
inf64.f = std::numeric_limits<double>::infinity();
|
||||
nan64.f = std::numeric_limits<double>::quiet_NaN();
|
||||
#endif
|
||||
|
||||
if( src.ptr() == dst.ptr() )
|
||||
{
|
||||
@ -1595,9 +1607,9 @@ void patchNaNs( InputOutputArray _a, double _val )
|
||||
Cv32suf val;
|
||||
val.f = (float)_val;
|
||||
|
||||
#if CV_SIMD128
|
||||
v_int32x4 v_mask1 = v_setall_s32(0x7fffffff), v_mask2 = v_setall_s32(0x7f800000);
|
||||
v_int32x4 v_val = v_setall_s32(val.i);
|
||||
#if CV_SIMD
|
||||
v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000);
|
||||
v_int32 v_val = vx_setall_s32(val.i);
|
||||
#endif
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
@ -1605,18 +1617,16 @@ void patchNaNs( InputOutputArray _a, double _val )
|
||||
int* tptr = ptrs[0];
|
||||
size_t j = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
if( hasSIMD128() )
|
||||
#if CV_SIMD
|
||||
size_t cWidth = (size_t)v_int32::nlanes;
|
||||
for ( ; j + cWidth <= len; j += cWidth)
|
||||
{
|
||||
size_t cWidth = (size_t)v_int32x4::nlanes;
|
||||
for ( ; j + cWidth <= len; j += cWidth)
|
||||
{
|
||||
v_int32x4 v_src = v_load(tptr + j);
|
||||
v_int32x4 v_cmp_mask = v_mask2 < (v_src & v_mask1);
|
||||
v_int32x4 v_dst = v_select(v_cmp_mask, v_val, v_src);
|
||||
v_store(tptr + j, v_dst);
|
||||
}
|
||||
v_int32 v_src = vx_load(tptr + j);
|
||||
v_int32 v_cmp_mask = v_mask2 < (v_src & v_mask1);
|
||||
v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src);
|
||||
v_store(tptr + j, v_dst);
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
|
||||
for( ; j < len; j++ )
|
||||
|
||||
@ -27,16 +27,26 @@ float fastAtan2(float y, float x);
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
static inline float atan_f32(float y, float x)
|
||||
{
|
||||
float a = atan2(y, x) * 180.0f / CV_PI;
|
||||
if (a < 0.0f)
|
||||
a += 360.0f;
|
||||
if (a >= 360.0f)
|
||||
a -= 360.0f;
|
||||
return a; // range [0; 360)
|
||||
}
|
||||
#else
|
||||
static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
|
||||
static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
|
||||
static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
|
||||
static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
|
||||
|
||||
using namespace cv;
|
||||
|
||||
static inline float atan_f32(float y, float x)
|
||||
{
|
||||
float ax = std::abs(x), ay = std::abs(y);
|
||||
@ -59,6 +69,7 @@ static inline float atan_f32(float y, float x)
|
||||
a = 360.f - a;
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CV_SIMD
|
||||
|
||||
@ -363,7 +374,7 @@ void sqrt64f(const double* src, double* dst, int len)
|
||||
// Workaround for ICE in MSVS 2015 update 3 (issue #7795)
|
||||
// CV_AVX is not used here, because generated code is faster in non-AVX mode.
|
||||
// (tested with disabled IPP on i5-6300U)
|
||||
#if (defined _MSC_VER && _MSC_VER >= 1900)
|
||||
#if (defined _MSC_VER && _MSC_VER >= 1900) || defined(__EMSCRIPTEN__)
|
||||
void exp32f(const float *src, float *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -2310,18 +2310,12 @@ static void scaleAdd_32f(const float* src1, const float* src2, float* dst,
|
||||
{
|
||||
float alpha = *_alpha;
|
||||
int i = 0;
|
||||
#if CV_SIMD128
|
||||
if (hasSIMD128())
|
||||
{
|
||||
v_float32x4 v_alpha = v_setall_f32(alpha);
|
||||
const int cWidth = v_float32x4::nlanes;
|
||||
for (; i <= len - cWidth; i += cWidth)
|
||||
{
|
||||
v_float32x4 v_src1 = v_load(src1 + i);
|
||||
v_float32x4 v_src2 = v_load(src2 + i);
|
||||
v_store(dst + i, (v_src1 * v_alpha) + v_src2);
|
||||
}
|
||||
}
|
||||
#if CV_SIMD
|
||||
v_float32 v_alpha = vx_setall_f32(alpha);
|
||||
const int cWidth = v_float32::nlanes;
|
||||
for (; i <= len - cWidth; i += cWidth)
|
||||
v_store(dst + i, v_muladd(vx_load(src1 + i), v_alpha, vx_load(src2 + i)));
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = src1[i] * alpha + src2[i];
|
||||
@ -2333,22 +2327,12 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst,
|
||||
{
|
||||
double alpha = *_alpha;
|
||||
int i = 0;
|
||||
#if CV_SIMD128_64F
|
||||
if (hasSIMD128())
|
||||
{
|
||||
v_float64x2 a2 = v_setall_f64(alpha);
|
||||
const int cWidth = v_float64x2::nlanes;
|
||||
for (; i <= len - cWidth * 2; i += cWidth * 2)
|
||||
{
|
||||
v_float64x2 x0, x1, y0, y1, t0, t1;
|
||||
x0 = v_load(src1 + i); x1 = v_load(src1 + i + cWidth);
|
||||
y0 = v_load(src2 + i); y1 = v_load(src2 + i + cWidth);
|
||||
t0 = x0 * a2 + y0;
|
||||
t1 = x1 * a2 + y1;
|
||||
v_store(dst + i, t0);
|
||||
v_store(dst + i + cWidth, t1);
|
||||
}
|
||||
}
|
||||
#if CV_SIMD_64F
|
||||
v_float64 a2 = vx_setall_f64(alpha);
|
||||
const int cWidth = v_float64::nlanes;
|
||||
for (; i <= len - cWidth; i += cWidth)
|
||||
v_store(dst + i, v_muladd(vx_load(src1 + i), a2, vx_load(src2 + i)));
|
||||
vx_cleanup();
|
||||
#endif
|
||||
for (; i < len; i++)
|
||||
dst[i] = src1[i] * alpha + src2[i];
|
||||
@ -3025,42 +3009,40 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
|
||||
#endif
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
if (hasSIMD128())
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 15), blockSize;
|
||||
|
||||
while (i < len0)
|
||||
{
|
||||
int len0 = len & -8, blockSize0 = (1 << 15), blockSize;
|
||||
blockSize = std::min(len0 - i, blockSize0);
|
||||
v_int32 v_sum = vx_setzero_s32();
|
||||
const int cWidth = v_uint16::nlanes;
|
||||
|
||||
while (i < len0)
|
||||
int j = 0;
|
||||
for (; j <= blockSize - cWidth * 2; j += cWidth * 2)
|
||||
{
|
||||
blockSize = std::min(len0 - i, blockSize0);
|
||||
v_int32x4 v_sum = v_setzero_s32();
|
||||
const int cWidth = v_uint16x8::nlanes;
|
||||
v_uint16 v_src10, v_src20, v_src11, v_src21;
|
||||
v_expand(vx_load(src1 + j), v_src10, v_src11);
|
||||
v_expand(vx_load(src2 + j), v_src20, v_src21);
|
||||
|
||||
int j = 0;
|
||||
for (; j <= blockSize - cWidth * 2; j += cWidth * 2)
|
||||
{
|
||||
v_uint16x8 v_src10, v_src20, v_src11, v_src21;
|
||||
v_expand(v_load(src1 + j), v_src10, v_src11);
|
||||
v_expand(v_load(src2 + j), v_src20, v_src21);
|
||||
|
||||
v_sum += v_dotprod(v_reinterpret_as_s16(v_src10), v_reinterpret_as_s16(v_src20));
|
||||
v_sum += v_dotprod(v_reinterpret_as_s16(v_src11), v_reinterpret_as_s16(v_src21));
|
||||
}
|
||||
|
||||
for (; j <= blockSize - cWidth; j += cWidth)
|
||||
{
|
||||
v_int16x8 v_src10 = v_reinterpret_as_s16(v_load_expand(src1 + j));
|
||||
v_int16x8 v_src20 = v_reinterpret_as_s16(v_load_expand(src2 + j));
|
||||
|
||||
v_sum += v_dotprod(v_src10, v_src20);
|
||||
}
|
||||
r += (double)v_reduce_sum(v_sum);
|
||||
|
||||
src1 += blockSize;
|
||||
src2 += blockSize;
|
||||
i += blockSize;
|
||||
v_sum += v_dotprod(v_reinterpret_as_s16(v_src10), v_reinterpret_as_s16(v_src20));
|
||||
v_sum += v_dotprod(v_reinterpret_as_s16(v_src11), v_reinterpret_as_s16(v_src21));
|
||||
}
|
||||
|
||||
for (; j <= blockSize - cWidth; j += cWidth)
|
||||
{
|
||||
v_int16 v_src10 = v_reinterpret_as_s16(vx_load_expand(src1 + j));
|
||||
v_int16 v_src20 = v_reinterpret_as_s16(vx_load_expand(src2 + j));
|
||||
|
||||
v_sum += v_dotprod(v_src10, v_src20);
|
||||
}
|
||||
r += (double)v_reduce_sum(v_sum);
|
||||
|
||||
src1 += blockSize;
|
||||
src2 += blockSize;
|
||||
i += blockSize;
|
||||
}
|
||||
vx_cleanup();
|
||||
#elif CV_NEON
|
||||
if( cv::checkHardwareSupport(CV_CPU_NEON) )
|
||||
{
|
||||
@ -3113,42 +3095,40 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len)
|
||||
double r = 0.0;
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
if (hasSIMD128())
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 14), blockSize;
|
||||
|
||||
while (i < len0)
|
||||
{
|
||||
int len0 = len & -8, blockSize0 = (1 << 14), blockSize;
|
||||
blockSize = std::min(len0 - i, blockSize0);
|
||||
v_int32 v_sum = vx_setzero_s32();
|
||||
const int cWidth = v_int16::nlanes;
|
||||
|
||||
while (i < len0)
|
||||
int j = 0;
|
||||
for (; j <= blockSize - cWidth * 2; j += cWidth * 2)
|
||||
{
|
||||
blockSize = std::min(len0 - i, blockSize0);
|
||||
v_int32x4 v_sum = v_setzero_s32();
|
||||
const int cWidth = v_int16x8::nlanes;
|
||||
v_int16 v_src10, v_src20, v_src11, v_src21;
|
||||
v_expand(vx_load(src1 + j), v_src10, v_src11);
|
||||
v_expand(vx_load(src2 + j), v_src20, v_src21);
|
||||
|
||||
int j = 0;
|
||||
for (; j <= blockSize - cWidth * 2; j += cWidth * 2)
|
||||
{
|
||||
v_int16x8 v_src10, v_src20, v_src11, v_src21;
|
||||
v_expand(v_load(src1 + j), v_src10, v_src11);
|
||||
v_expand(v_load(src2 + j), v_src20, v_src21);
|
||||
|
||||
v_sum += v_dotprod(v_src10, v_src20);
|
||||
v_sum += v_dotprod(v_src11, v_src21);
|
||||
}
|
||||
|
||||
for (; j <= blockSize - cWidth; j += cWidth)
|
||||
{
|
||||
v_int16x8 v_src10 = v_load_expand(src1 + j);
|
||||
v_int16x8 v_src20 = v_load_expand(src2 + j);
|
||||
|
||||
v_sum += v_dotprod(v_src10, v_src20);
|
||||
}
|
||||
r += (double)v_reduce_sum(v_sum);
|
||||
|
||||
src1 += blockSize;
|
||||
src2 += blockSize;
|
||||
i += blockSize;
|
||||
v_sum += v_dotprod(v_src10, v_src20);
|
||||
v_sum += v_dotprod(v_src11, v_src21);
|
||||
}
|
||||
|
||||
for (; j <= blockSize - cWidth; j += cWidth)
|
||||
{
|
||||
v_int16 v_src10 = vx_load_expand(src1 + j);
|
||||
v_int16 v_src20 = vx_load_expand(src2 + j);
|
||||
|
||||
v_sum += v_dotprod(v_src10, v_src20);
|
||||
}
|
||||
r += (double)v_reduce_sum(v_sum);
|
||||
|
||||
src1 += blockSize;
|
||||
src2 += blockSize;
|
||||
i += blockSize;
|
||||
}
|
||||
vx_cleanup();
|
||||
#elif CV_NEON
|
||||
if( cv::checkHardwareSupport(CV_CPU_NEON) )
|
||||
{
|
||||
@ -3232,28 +3212,26 @@ static double dotProd_32f(const float* src1, const float* src2, int len)
|
||||
#endif
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
if (hasSIMD128())
|
||||
#if CV_SIMD
|
||||
int len0 = len & -v_float32::nlanes, blockSize0 = (1 << 13), blockSize;
|
||||
|
||||
while (i < len0)
|
||||
{
|
||||
int len0 = len & -4, blockSize0 = (1 << 13), blockSize;
|
||||
blockSize = std::min(len0 - i, blockSize0);
|
||||
v_float32 v_sum = vx_setzero_f32();
|
||||
|
||||
while (i < len0)
|
||||
{
|
||||
blockSize = std::min(len0 - i, blockSize0);
|
||||
v_float32x4 v_sum = v_setzero_f32();
|
||||
int j = 0;
|
||||
int cWidth = v_float32::nlanes;
|
||||
for (; j <= blockSize - cWidth; j += cWidth)
|
||||
v_sum = v_muladd(vx_load(src1 + j), vx_load(src2 + j), v_sum);
|
||||
|
||||
int j = 0;
|
||||
int cWidth = v_float32x4::nlanes;
|
||||
for (; j <= blockSize - cWidth; j += cWidth)
|
||||
v_sum = v_muladd(v_load(src1 + j), v_load(src2 + j), v_sum);
|
||||
r += v_reduce_sum(v_sum);
|
||||
|
||||
r += v_reduce_sum(v_sum);
|
||||
|
||||
src1 += blockSize;
|
||||
src2 += blockSize;
|
||||
i += blockSize;
|
||||
}
|
||||
src1 += blockSize;
|
||||
src2 += blockSize;
|
||||
i += blockSize;
|
||||
}
|
||||
vx_cleanup();
|
||||
#endif
|
||||
return r + dotProd_(src1, src2, len - i);
|
||||
}
|
||||
|
||||
@ -407,10 +407,13 @@ template<typename R> struct TheTest
|
||||
|
||||
Data<Rx2> resB = vx_load_expand(dataA.d);
|
||||
|
||||
Rx2 c, d;
|
||||
Rx2 c, d, e, f;
|
||||
v_expand(a, c, d);
|
||||
|
||||
Data<Rx2> resC = c, resD = d;
|
||||
e = v_expand_low(a);
|
||||
f = v_expand_high(a);
|
||||
|
||||
Data<Rx2> resC = c, resD = d, resE = e, resF = f;
|
||||
const int n = Rx2::nlanes;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
@ -418,6 +421,8 @@ template<typename R> struct TheTest
|
||||
EXPECT_EQ(dataA[i], resB[i]);
|
||||
EXPECT_EQ(dataA[i], resC[i]);
|
||||
EXPECT_EQ(dataA[i + n], resD[i]);
|
||||
EXPECT_EQ(dataA[i], resE[i]);
|
||||
EXPECT_EQ(dataA[i + n], resF[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
@ -455,19 +460,21 @@ template<typename R> struct TheTest
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_addsub_wrap()
|
||||
TheTest & test_arithm_wrap()
|
||||
{
|
||||
Data<R> dataA, dataB;
|
||||
dataB.reverse();
|
||||
R a = dataA, b = dataB;
|
||||
|
||||
Data<R> resC = v_add_wrap(a, b),
|
||||
resD = v_sub_wrap(a, b);
|
||||
resD = v_sub_wrap(a, b),
|
||||
resE = v_mul_wrap(a, b);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]);
|
||||
EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]);
|
||||
EXPECT_EQ((LaneType)(dataA[i] * dataB[i]), resE[i]);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -475,6 +482,7 @@ template<typename R> struct TheTest
|
||||
TheTest & test_mul()
|
||||
{
|
||||
Data<R> dataA, dataB;
|
||||
dataA[1] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
|
||||
dataB.reverse();
|
||||
R a = dataA, b = dataB;
|
||||
|
||||
@ -482,7 +490,7 @@ template<typename R> struct TheTest
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i] * dataB[i], resC[i]);
|
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] * dataB[i]), resC[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
@ -1209,7 +1217,9 @@ void test_hal_intrin_uint8()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_arithm_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
@ -1242,7 +1252,9 @@ void test_hal_intrin_int8()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_arithm_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
@ -1267,7 +1279,7 @@ void test_hal_intrin_uint16()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_arithm_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
@ -1295,7 +1307,7 @@ void test_hal_intrin_int16()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_arithm_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
|
||||
@ -562,9 +562,9 @@ static inline void PrintTo(const ConvParamID& v, std::ostream* os)
|
||||
*os << ", S=" << ((Size)p.stride);
|
||||
if (((Size)p.dilation).area() != 1)
|
||||
*os << ", D=" << ((Size)p.dilation);
|
||||
if (((Size)p.pad).area() != 0)
|
||||
if (!((Size)p.pad).empty())
|
||||
*os << ", P=" << ((Size)p.pad);
|
||||
if (((Size)p.padAdjust).area() != 0)
|
||||
if (!((Size)p.padAdjust).empty())
|
||||
*os << ", PAdj=" << ((Size)p.padAdjust);
|
||||
if (!((std::string)p.padMode).empty())
|
||||
*os << ", PM=" << ((std::string)p.padMode);
|
||||
|
||||
@ -278,11 +278,13 @@ public:
|
||||
int li;
|
||||
for (li = 0; li != netBinary.layer_size(); li++)
|
||||
{
|
||||
if (netBinary.layer(li).name() == name)
|
||||
const caffe::LayerParameter& binLayer = netBinary.layer(li);
|
||||
// Break if the layer name is the same and the blobs are not cleared
|
||||
if (binLayer.name() == name && binLayer.blobs_size() != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (li == netBinary.layer_size() || netBinary.layer(li).blobs_size() == 0)
|
||||
if (li == netBinary.layer_size())
|
||||
return;
|
||||
|
||||
caffe::LayerParameter* binLayer = netBinary.mutable_layer(li);
|
||||
|
||||
@ -1511,10 +1511,10 @@ struct Net::Impl
|
||||
CV_Assert(!ieNode.empty());
|
||||
ieNode->net = net;
|
||||
|
||||
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
|
||||
if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD) && !fused)
|
||||
{
|
||||
ieNode->layer->precision = InferenceEngine::Precision::FP16;
|
||||
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
|
||||
if (weightableLayer)
|
||||
{
|
||||
if (weightableLayer->_weights)
|
||||
@ -1532,7 +1532,13 @@ struct Net::Impl
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (weightableLayer)
|
||||
{
|
||||
if (weightableLayer->_weights)
|
||||
weightableLayer->blobs["weights"] = weightableLayer->_weights;
|
||||
if (weightableLayer->_biases)
|
||||
weightableLayer->blobs["biases"] = weightableLayer->_biases;
|
||||
}
|
||||
ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers);
|
||||
net->addBlobs(ld.inputBlobsWrappers);
|
||||
net->addBlobs(ld.outputBlobsWrappers);
|
||||
|
||||
@ -449,15 +449,28 @@ public:
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::ConvolutionLayer> ieLayer(new InferenceEngine::ConvolutionLayer(lp));
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R3)
|
||||
ieLayer->_kernel.insert(InferenceEngine::X_AXIS, kernel.width);
|
||||
ieLayer->_kernel.insert(InferenceEngine::Y_AXIS, kernel.height);
|
||||
ieLayer->_stride.insert(InferenceEngine::X_AXIS, stride.width);
|
||||
ieLayer->_stride.insert(InferenceEngine::Y_AXIS, stride.height);
|
||||
ieLayer->_padding.insert(InferenceEngine::X_AXIS, pad.width);
|
||||
ieLayer->_padding.insert(InferenceEngine::Y_AXIS, pad.height);
|
||||
ieLayer->_pads_end.insert(InferenceEngine::X_AXIS, pad.width);
|
||||
ieLayer->_pads_end.insert(InferenceEngine::Y_AXIS, pad.height);
|
||||
ieLayer->_dilation.insert(InferenceEngine::X_AXIS, dilation.width);
|
||||
ieLayer->_dilation.insert(InferenceEngine::Y_AXIS, dilation.height);
|
||||
#else
|
||||
ieLayer->_kernel_x = kernel.width;
|
||||
ieLayer->_kernel_y = kernel.height;
|
||||
ieLayer->_stride_x = stride.width;
|
||||
ieLayer->_stride_y = stride.height;
|
||||
ieLayer->_out_depth = outCn;
|
||||
ieLayer->_padding_x = pad.width;
|
||||
ieLayer->_padding_y = pad.height;
|
||||
ieLayer->_dilation_x = dilation.width;
|
||||
ieLayer->_dilation_y = dilation.height;
|
||||
#endif
|
||||
ieLayer->_out_depth = outCn;
|
||||
ieLayer->_group = group;
|
||||
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
|
||||
@ -1659,15 +1672,28 @@ public:
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::DeconvolutionLayer> ieLayer(new InferenceEngine::DeconvolutionLayer(lp));
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R3)
|
||||
ieLayer->_kernel.insert(InferenceEngine::X_AXIS, kernel.width);
|
||||
ieLayer->_kernel.insert(InferenceEngine::Y_AXIS, kernel.height);
|
||||
ieLayer->_stride.insert(InferenceEngine::X_AXIS, stride.width);
|
||||
ieLayer->_stride.insert(InferenceEngine::Y_AXIS, stride.height);
|
||||
ieLayer->_padding.insert(InferenceEngine::X_AXIS, pad.width);
|
||||
ieLayer->_padding.insert(InferenceEngine::Y_AXIS, pad.height);
|
||||
ieLayer->_pads_end.insert(InferenceEngine::X_AXIS, pad.width);
|
||||
ieLayer->_pads_end.insert(InferenceEngine::Y_AXIS, pad.height);
|
||||
ieLayer->_dilation.insert(InferenceEngine::X_AXIS, dilation.width);
|
||||
ieLayer->_dilation.insert(InferenceEngine::Y_AXIS, dilation.height);
|
||||
#else
|
||||
ieLayer->_kernel_x = kernel.width;
|
||||
ieLayer->_kernel_y = kernel.height;
|
||||
ieLayer->_stride_x = stride.width;
|
||||
ieLayer->_stride_y = stride.height;
|
||||
ieLayer->_out_depth = numOutput;
|
||||
ieLayer->_padding_x = pad.width;
|
||||
ieLayer->_padding_y = pad.height;
|
||||
ieLayer->_dilation_x = dilation.width;
|
||||
ieLayer->_dilation_y = dilation.height;
|
||||
#endif
|
||||
ieLayer->_out_depth = numOutput;
|
||||
ieLayer->_group = group;
|
||||
|
||||
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
|
||||
|
||||
@ -268,6 +268,16 @@ public:
|
||||
{
|
||||
lp.type = "Pooling";
|
||||
InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp);
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R3)
|
||||
poolLayer->_kernel.insert(InferenceEngine::X_AXIS, kernel.width);
|
||||
poolLayer->_kernel.insert(InferenceEngine::Y_AXIS, kernel.height);
|
||||
poolLayer->_stride.insert(InferenceEngine::X_AXIS, stride.width);
|
||||
poolLayer->_stride.insert(InferenceEngine::Y_AXIS, stride.height);
|
||||
poolLayer->_padding.insert(InferenceEngine::X_AXIS, pad_l);
|
||||
poolLayer->_padding.insert(InferenceEngine::Y_AXIS, pad_t);
|
||||
poolLayer->_pads_end.insert(InferenceEngine::X_AXIS, pad_r);
|
||||
poolLayer->_pads_end.insert(InferenceEngine::Y_AXIS, pad_b);
|
||||
#else
|
||||
poolLayer->_kernel_x = kernel.width;
|
||||
poolLayer->_kernel_y = kernel.height;
|
||||
poolLayer->_stride_x = stride.width;
|
||||
@ -276,6 +286,7 @@ public:
|
||||
poolLayer->_padding_y = pad_t;
|
||||
poolLayer->params["pad-r"] = format("%d", pad_r);
|
||||
poolLayer->params["pad-b"] = format("%d", pad_b);
|
||||
#endif
|
||||
poolLayer->_exclude_pad = type == AVE && padMode == "SAME";
|
||||
poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
|
||||
poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :
|
||||
|
||||
@ -454,6 +454,28 @@ TEST(Test_Caffe, multiple_inputs)
|
||||
normAssert(out, first_image + second_image);
|
||||
}
|
||||
|
||||
TEST(Test_Caffe, shared_weights)
|
||||
{
|
||||
const string proto = findDataFile("dnn/layers/shared_weights.prototxt", false);
|
||||
const string model = findDataFile("dnn/layers/shared_weights.caffemodel", false);
|
||||
|
||||
Net net = readNetFromCaffe(proto, model);
|
||||
|
||||
Mat input_1 = (Mat_<float>(2, 2) << 0., 2., 4., 6.);
|
||||
Mat input_2 = (Mat_<float>(2, 2) << 1., 3., 5., 7.);
|
||||
|
||||
Mat blob_1 = blobFromImage(input_1);
|
||||
Mat blob_2 = blobFromImage(input_2);
|
||||
|
||||
net.setInput(blob_1, "input_1");
|
||||
net.setInput(blob_2, "input_2");
|
||||
|
||||
Mat sum = net.forward();
|
||||
|
||||
EXPECT_EQ(sum.at<float>(0,0), 12.);
|
||||
EXPECT_EQ(sum.at<float>(0,1), 16.);
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<tuple<std::string, Target> > opencv_face_detector;
|
||||
TEST_P(opencv_face_detector, Accuracy)
|
||||
{
|
||||
|
||||
@ -74,7 +74,6 @@ CV_IMPL int cvWaitKey (int maxWait) {return 0;}
|
||||
|
||||
#include <iostream>
|
||||
|
||||
const int TOP_BORDER = 7;
|
||||
const int MIN_SLIDER_WIDTH=200;
|
||||
|
||||
static NSApplication *application = nil;
|
||||
@ -82,10 +81,10 @@ static NSAutoreleasePool *pool = nil;
|
||||
static NSMutableDictionary *windows = nil;
|
||||
static bool wasInitialized = false;
|
||||
|
||||
@interface CVView : NSView {
|
||||
NSImage *image;
|
||||
}
|
||||
@interface CVView : NSView
|
||||
@property(retain) NSView *imageView;
|
||||
@property(retain) NSImage *image;
|
||||
@property int sliderHeight;
|
||||
- (void)setImageData:(CvArr *)arr;
|
||||
@end
|
||||
|
||||
@ -221,32 +220,38 @@ CV_IMPL void cvShowImage( const char* name, const CvArr* arr)
|
||||
if(window)
|
||||
{
|
||||
bool empty = [[window contentView] image] == nil;
|
||||
NSRect rect = [window frame];
|
||||
NSRect vrectOld = [[window contentView] frame];
|
||||
|
||||
NSSize oldImageSize = [[[window contentView] image] size];
|
||||
[[window contentView] setImageData:(CvArr *)arr];
|
||||
if([window autosize] || [window firstContent] || empty)
|
||||
{
|
||||
//Set new view size considering sliders (reserve height and min width)
|
||||
NSRect vrectNew = vrectOld;
|
||||
int slider_height = 0;
|
||||
if ([window respondsToSelector:@selector(sliders)]) {
|
||||
for(NSString *key in [window sliders]) {
|
||||
slider_height += [[[window sliders] valueForKey:key] frame].size.height;
|
||||
NSSize imageSize = [[[window contentView] image] size];
|
||||
// Only adjust the image size if the new image is a different size from the previous
|
||||
if (oldImageSize.height != imageSize.height || oldImageSize.width != imageSize.width)
|
||||
{
|
||||
//Set new view size considering sliders (reserve height and min width)
|
||||
NSSize scaledImageSize;
|
||||
if ([[window contentView] respondsToSelector:@selector(convertSizeFromBacking:)])
|
||||
{
|
||||
// Only resize for retina displays if the image is bigger than the screen
|
||||
NSSize screenSize = NSScreen.mainScreen.visibleFrame.size;
|
||||
CGFloat titleBarHeight = window.frame.size.height - [window contentRectForFrameRect:window.frame].size.height;
|
||||
screenSize.height -= titleBarHeight;
|
||||
if (imageSize.width > screenSize.width || imageSize.height > screenSize.height)
|
||||
{
|
||||
scaledImageSize = [[window contentView] convertSizeFromBacking:imageSize];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scaledImageSize = imageSize;
|
||||
}
|
||||
NSSize contentSize = vrectOld.size;
|
||||
contentSize.height = scaledImageSize.height + [window contentView].sliderHeight;
|
||||
contentSize.width = std::max<int>(scaledImageSize.width, MIN_SLIDER_WIDTH);
|
||||
[window setContentSize:contentSize]; //adjust sliders to fit new window size
|
||||
}
|
||||
vrectNew.size.height = [[[window contentView] image] size].height + slider_height;
|
||||
vrectNew.size.width = std::max<int>([[[window contentView] image] size].width, MIN_SLIDER_WIDTH);
|
||||
[[window contentView] setFrameSize:vrectNew.size]; //adjust sliders to fit new window size
|
||||
|
||||
rect.size.width += vrectNew.size.width - vrectOld.size.width;
|
||||
rect.size.height += vrectNew.size.height - vrectOld.size.height;
|
||||
rect.origin.y -= vrectNew.size.height - vrectOld.size.height;
|
||||
|
||||
[window setFrame:rect display:YES];
|
||||
}
|
||||
else
|
||||
[window display];
|
||||
[window setFirstContent:NO];
|
||||
}
|
||||
[localpool drain];
|
||||
@ -259,10 +264,9 @@ CV_IMPL void cvResizeWindow( const char* name, int width, int height)
|
||||
NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];
|
||||
CVWindow *window = cvGetWindow(name);
|
||||
if(window && ![window autosize]) {
|
||||
NSRect frame = [window frame];
|
||||
frame.size.width = width;
|
||||
frame.size.height = height;
|
||||
[window setFrame:frame display:YES];
|
||||
height += [window contentView].sliderHeight;
|
||||
NSSize size = { width, height };
|
||||
[window setContentSize:size];
|
||||
}
|
||||
[localpool drain];
|
||||
}
|
||||
@ -532,7 +536,7 @@ CV_IMPL int cvNamedWindow( const char* name, int flags )
|
||||
NSScreen* mainDisplay = [NSScreen mainScreen];
|
||||
|
||||
NSString *windowName = [NSString stringWithFormat:@"%s", name];
|
||||
NSUInteger showResize = (flags == CV_WINDOW_AUTOSIZE) ? 0: NSResizableWindowMask ;
|
||||
NSUInteger showResize = NSResizableWindowMask;
|
||||
NSUInteger styleMask = NSTitledWindowMask|NSMiniaturizableWindowMask|showResize;
|
||||
CGFloat windowWidth = [NSWindow minFrameWidthWithTitle:windowName styleMask:styleMask];
|
||||
NSRect initContentRect = NSMakeRect(0, 0, windowWidth, 0);
|
||||
@ -728,6 +732,22 @@ void cv::setWindowTitle(const String& winname, const String& title)
|
||||
[localpool drain];
|
||||
}
|
||||
|
||||
static NSSize constrainAspectRatio(NSSize base, NSSize constraint) {
|
||||
CGFloat heightDiff = (base.height / constraint.height);
|
||||
CGFloat widthDiff = (base.width / constraint.width);
|
||||
if (widthDiff == heightDiff) {
|
||||
return base;
|
||||
}
|
||||
else if (widthDiff > heightDiff) {
|
||||
NSSize out = { constraint.width / constraint.height * base.height, base.height };
|
||||
return out;
|
||||
}
|
||||
else {
|
||||
NSSize out = { base.width, constraint.height / constraint.width * base.width };
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
@implementation CVWindow
|
||||
|
||||
@synthesize mouseCallback;
|
||||
@ -743,22 +763,19 @@ void cv::setWindowTitle(const String& winname, const String& title)
|
||||
NSPoint mp = [NSEvent mouseLocation];
|
||||
//NSRect visible = [[self contentView] frame];
|
||||
mp = [self convertScreenToBase: mp];
|
||||
double viewHeight = [self contentView].frame.size.height;
|
||||
double viewWidth = [self contentView].frame.size.width;
|
||||
CVWindow *window = (CVWindow *)[[self contentView] window];
|
||||
if ([window respondsToSelector:@selector(sliders)]) {
|
||||
for(NSString *key in [window sliders]) {
|
||||
NSSlider *slider = [[window sliders] valueForKey:key];
|
||||
viewHeight = std::min(viewHeight, (double)([slider frame].origin.y));
|
||||
}
|
||||
CVView *contentView = [self contentView];
|
||||
NSSize viewSize = contentView.frame.size;
|
||||
if (contentView.imageView) {
|
||||
viewSize = contentView.imageView.frame.size;
|
||||
}
|
||||
viewHeight -= TOP_BORDER;
|
||||
mp.y = viewHeight - mp.y;
|
||||
else {
|
||||
viewSize.height -= contentView.sliderHeight;
|
||||
}
|
||||
mp.y = viewSize.height - mp.y;
|
||||
|
||||
NSImage* image = ((CVView*)[self contentView]).image;
|
||||
NSSize imageSize = [image size];
|
||||
mp.x = mp.x * imageSize.width / std::max(viewWidth, 1.);
|
||||
mp.y = mp.y * imageSize.height / std::max(viewHeight, 1.);
|
||||
NSSize imageSize = contentView.image.size;
|
||||
mp.y *= (imageSize.height / std::max(viewSize.height, 1.));
|
||||
mp.x *= (imageSize.width / std::max(viewSize.width, 1.));
|
||||
|
||||
if( mp.x >= 0 && mp.y >= 0 && mp.x < imageSize.width && mp.y < imageSize.height )
|
||||
mouseCallback(type, mp.x, mp.y, flags, mouseParam);
|
||||
@ -862,17 +879,14 @@ void cv::setWindowTitle(const String& winname, const String& title)
|
||||
viewSize.width = std::max<int>(viewSize.width, MIN_SLIDER_WIDTH);
|
||||
|
||||
// Update slider sizes
|
||||
[[self contentView] setFrameSize:viewSize];
|
||||
[[self contentView] setNeedsDisplay:YES];
|
||||
[self contentView].sliderHeight += sliderSize.height;
|
||||
|
||||
if ([[self contentView] image] && ![[self contentView] imageView]) {
|
||||
[[self contentView] setNeedsDisplay:YES];
|
||||
}
|
||||
|
||||
//update window size to contain sliders
|
||||
NSRect rect = [self frame];
|
||||
rect.size.height += [slider frame].size.height;
|
||||
rect.size.width = std::max<int>(rect.size.width, MIN_SLIDER_WIDTH);
|
||||
[self setFrame:rect display:YES];
|
||||
|
||||
|
||||
|
||||
[self setContentSize: viewSize];
|
||||
}
|
||||
|
||||
- (CVView *)contentView {
|
||||
@ -888,20 +902,15 @@ void cv::setWindowTitle(const String& winname, const String& title)
|
||||
- (id)init {
|
||||
//cout << "CVView init" << endl;
|
||||
[super init];
|
||||
image = [[NSImage alloc] init];
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)setImageData:(CvArr *)arr {
|
||||
//cout << "setImageData" << endl;
|
||||
NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];
|
||||
CvMat *arrMat, *cvimage, stub;
|
||||
CvMat *arrMat, dst, stub;
|
||||
|
||||
arrMat = cvGetMat(arr, &stub);
|
||||
|
||||
cvimage = cvCreateMat(arrMat->rows, arrMat->cols, CV_8UC3);
|
||||
cvConvertImage(arrMat, cvimage, CV_CVTIMG_SWAP_RB);
|
||||
|
||||
/*CGColorSpaceRef colorspace = NULL;
|
||||
CGDataProviderRef provider = NULL;
|
||||
int width = cvimage->width;
|
||||
@ -922,42 +931,77 @@ void cv::setWindowTitle(const String& winname, const String& title)
|
||||
}*/
|
||||
|
||||
NSBitmapImageRep *bitmap = [[NSBitmapImageRep alloc] initWithBitmapDataPlanes:NULL
|
||||
pixelsWide:cvimage->width
|
||||
pixelsHigh:cvimage->height
|
||||
pixelsWide:arrMat->cols
|
||||
pixelsHigh:arrMat->rows
|
||||
bitsPerSample:8
|
||||
samplesPerPixel:3
|
||||
hasAlpha:NO
|
||||
isPlanar:NO
|
||||
colorSpaceName:NSDeviceRGBColorSpace
|
||||
bytesPerRow:(cvimage->width * 4)
|
||||
bitsPerPixel:32];
|
||||
bitmapFormat: kCGImageAlphaNone
|
||||
bytesPerRow:((arrMat->cols * 3 + 3) & -4)
|
||||
bitsPerPixel:24];
|
||||
|
||||
int pixelCount = cvimage->width * cvimage->height;
|
||||
unsigned char *src = cvimage->data.ptr;
|
||||
unsigned char *dst = [bitmap bitmapData];
|
||||
|
||||
for( int i = 0; i < pixelCount; i++ )
|
||||
{
|
||||
dst[i * 4 + 0] = src[i * 3 + 0];
|
||||
dst[i * 4 + 1] = src[i * 3 + 1];
|
||||
dst[i * 4 + 2] = src[i * 3 + 2];
|
||||
if (bitmap) {
|
||||
cvInitMatHeader(&dst, arrMat->rows, arrMat->cols, CV_8UC3, [bitmap bitmapData], [bitmap bytesPerRow]);
|
||||
cvConvertImage(arrMat, &dst, CV_CVTIMG_SWAP_RB);
|
||||
}
|
||||
else {
|
||||
// It's not guaranteed to like the bitsPerPixel:24, but this is a lot slower so we'd rather not do it
|
||||
bitmap = [[NSBitmapImageRep alloc] initWithBitmapDataPlanes:NULL
|
||||
pixelsWide:arrMat->cols
|
||||
pixelsHigh:arrMat->rows
|
||||
bitsPerSample:8
|
||||
samplesPerPixel:3
|
||||
hasAlpha:NO
|
||||
isPlanar:NO
|
||||
colorSpaceName:NSDeviceRGBColorSpace
|
||||
bytesPerRow:(arrMat->cols * 4)
|
||||
bitsPerPixel:32];
|
||||
uint8_t *data = [bitmap bitmapData];
|
||||
cvInitMatHeader(&dst, arrMat->rows, arrMat->cols, CV_8UC3, data, (arrMat->cols * 3));
|
||||
cvConvertImage(arrMat, &dst, CV_CVTIMG_SWAP_RB);
|
||||
for (int i = (arrMat->rows * arrMat->cols) - 1; i >= 0; i--) {
|
||||
memmove(data + i * 4, data + i * 3, 3);
|
||||
data[i * 4 + 3] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if( image )
|
||||
if( image ) {
|
||||
[image release];
|
||||
}
|
||||
|
||||
image = [[NSImage alloc] init];
|
||||
[image addRepresentation:bitmap];
|
||||
[bitmap release];
|
||||
|
||||
// This isn't supported on older versions of macOS
|
||||
// The performance issues this solves are mainly on newer versions of macOS, so that's fine
|
||||
if( floor(NSAppKitVersionNumber) > NSAppKitVersionNumber10_5 ) {
|
||||
if (![self imageView]) {
|
||||
[self setImageView:[[NSView alloc] init]];
|
||||
[[self imageView] setWantsLayer:true];
|
||||
[self addSubview:[self imageView]];
|
||||
}
|
||||
|
||||
[[[self imageView] layer] setContents:image];
|
||||
|
||||
NSRect imageViewFrame = [self frame];
|
||||
imageViewFrame.size.height -= [self sliderHeight];
|
||||
NSRect constrainedFrame = { imageViewFrame.origin, constrainAspectRatio(imageViewFrame.size, [image size]) };
|
||||
[[self imageView] setFrame:constrainedFrame];
|
||||
}
|
||||
else {
|
||||
NSRect redisplayRect = [self frame];
|
||||
redisplayRect.size.height -= [self sliderHeight];
|
||||
[self setNeedsDisplayInRect:redisplayRect];
|
||||
}
|
||||
|
||||
/*CGColorSpaceRelease(colorspace);
|
||||
CGDataProviderRelease(provider);
|
||||
CGImageRelease(imageRef);*/
|
||||
cvReleaseMat(&cvimage);
|
||||
|
||||
[localpool drain];
|
||||
|
||||
[self setNeedsDisplay:YES];
|
||||
|
||||
}
|
||||
|
||||
- (void)setFrameSize:(NSSize)size {
|
||||
@ -970,41 +1014,49 @@ void cv::setWindowTitle(const String& winname, const String& title)
|
||||
CVWindow *cvwindow = (CVWindow *)[self window];
|
||||
if ([cvwindow respondsToSelector:@selector(sliders)]) {
|
||||
for(NSString *key in [cvwindow sliders]) {
|
||||
NSSlider *slider = [[cvwindow sliders] valueForKey:key];
|
||||
CVSlider *slider = [[cvwindow sliders] valueForKey:key];
|
||||
NSRect r = [slider frame];
|
||||
r.origin.y = height - r.size.height;
|
||||
r.size.width = [[cvwindow contentView] frame].size.width;
|
||||
|
||||
CGRect sliderRect = slider.slider.frame;
|
||||
CGFloat targetWidth = r.size.width - (sliderRect.origin.x + 10);
|
||||
sliderRect.size.width = targetWidth < 0 ? 0 : targetWidth;
|
||||
slider.slider.frame = sliderRect;
|
||||
|
||||
[slider setFrame:r];
|
||||
height -= r.size.height;
|
||||
}
|
||||
}
|
||||
NSRect frame = self.frame;
|
||||
if (frame.size.height < self.sliderHeight) {
|
||||
frame.size.height = self.sliderHeight;
|
||||
self.frame = frame;
|
||||
}
|
||||
if ([self imageView]) {
|
||||
NSRect imageViewFrame = frame;
|
||||
imageViewFrame.size.height -= [self sliderHeight];
|
||||
NSRect constrainedFrame = { imageViewFrame.origin, constrainAspectRatio(imageViewFrame.size, [image size]) };
|
||||
[[self imageView] setFrame:constrainedFrame];
|
||||
}
|
||||
[localpool drain];
|
||||
}
|
||||
|
||||
- (void)drawRect:(NSRect)rect {
|
||||
//cout << "drawRect" << endl;
|
||||
[super drawRect:rect];
|
||||
// If imageView exists, all drawing will be done by it and nothing needs to happen here
|
||||
if ([self image] && ![self imageView]) {
|
||||
NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];
|
||||
|
||||
NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init];
|
||||
CVWindow *cvwindow = (CVWindow *)[self window];
|
||||
int height = 0;
|
||||
if ([cvwindow respondsToSelector:@selector(sliders)]) {
|
||||
for(NSString *key in [cvwindow sliders]) {
|
||||
height += [[[cvwindow sliders] valueForKey:key] frame].size.height;
|
||||
if(image != nil) {
|
||||
[image drawInRect: [self frame]
|
||||
fromRect: NSZeroRect
|
||||
operation: NSCompositeSourceOver
|
||||
fraction: 1.0];
|
||||
}
|
||||
[localpool release];
|
||||
}
|
||||
|
||||
|
||||
NSRect imageRect = {{0,0}, {[image size].width, [image size].height}};
|
||||
|
||||
if(image != nil) {
|
||||
[image drawInRect: imageRect
|
||||
fromRect: NSZeroRect
|
||||
operation: NSCompositeSourceOver
|
||||
fraction: 1.0];
|
||||
}
|
||||
[localpool release];
|
||||
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
set(the_description "Image Processing")
|
||||
ocv_add_dispatched_file(accum SSE2 AVX NEON)
|
||||
ocv_add_dispatched_file(accum SSE4_1 AVX AVX2)
|
||||
ocv_define_module(imgproc opencv_core WRAP java python js)
|
||||
|
||||
@ -5,94 +5,102 @@
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
#ifdef HAVE_OPENVX
|
||||
PERF_TEST_P(Size_MatType, Accumulate,
|
||||
testing::Combine(
|
||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
||||
testing::Values(CV_16SC1, CV_32FC1)
|
||||
)
|
||||
)
|
||||
#else
|
||||
PERF_TEST_P( Size_MatType, Accumulate,
|
||||
testing::Combine(
|
||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
||||
testing::Values(CV_32FC1)
|
||||
)
|
||||
)
|
||||
#endif
|
||||
{
|
||||
Size sz = get<0>(GetParam());
|
||||
int dstType = get<1>(GetParam());
|
||||
typedef Size_MatType Accumulate;
|
||||
|
||||
Mat src(sz, CV_8UC1);
|
||||
Mat dst(sz, dstType);
|
||||
#define MAT_TYPES_ACCUMLATE CV_8UC1, CV_16UC1, CV_32FC1
|
||||
#define MAT_TYPES_ACCUMLATE_C MAT_TYPES_ACCUMLATE, CV_8UC3, CV_16UC3, CV_32FC3
|
||||
#define MAT_TYPES_ACCUMLATE_D MAT_TYPES_ACCUMLATE, CV_64FC1
|
||||
#define MAT_TYPES_ACCUMLATE_D_C MAT_TYPES_ACCUMLATE_C, CV_64FC1, CV_64FC1
|
||||
|
||||
declare.time(100);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
#define PERF_ACCUMULATE_INIT(_FLTC) \
|
||||
const Size srcSize = get<0>(GetParam()); \
|
||||
const int srcType = get<1>(GetParam()); \
|
||||
const int dstType = _FLTC(CV_MAT_CN(srcType)); \
|
||||
Mat src1(srcSize, srcType), dst(srcSize, dstType); \
|
||||
declare.in(src1, dst, WARMUP_RNG).out(dst);
|
||||
|
||||
TEST_CYCLE() accumulate(src, dst);
|
||||
#define PERF_ACCUMULATE_MASK_INIT(_FLTC) \
|
||||
PERF_ACCUMULATE_INIT(_FLTC) \
|
||||
Mat mask(srcSize, CV_8UC1); \
|
||||
declare.in(mask, WARMUP_RNG);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
#define PERF_TEST_P_ACCUMULATE(_NAME, _TYPES, _INIT, _FUN) \
|
||||
PERF_TEST_P(Accumulate, _NAME, \
|
||||
testing::Combine( \
|
||||
testing::Values(sz1080p, sz720p, szVGA, szQVGA, szODD), \
|
||||
testing::Values(_TYPES) \
|
||||
) \
|
||||
) \
|
||||
{ \
|
||||
_INIT \
|
||||
TEST_CYCLE() _FUN; \
|
||||
SANITY_CHECK_NOTHING(); \
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENVX
|
||||
PERF_TEST_P(Size_MatType, AccumulateSquare,
|
||||
testing::Combine(
|
||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
||||
testing::Values(CV_16SC1, CV_32FC1)
|
||||
)
|
||||
)
|
||||
#else
|
||||
PERF_TEST_P( Size_MatType, AccumulateSquare,
|
||||
testing::Combine(
|
||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
||||
testing::Values(CV_32FC1)
|
||||
)
|
||||
)
|
||||
#endif
|
||||
{
|
||||
Size sz = get<0>(GetParam());
|
||||
int dstType = get<1>(GetParam());
|
||||
/////////////////////////////////// Accumulate ///////////////////////////////////
|
||||
|
||||
Mat src(sz, CV_8UC1);
|
||||
Mat dst(sz, dstType);
|
||||
PERF_TEST_P_ACCUMULATE(Accumulate, MAT_TYPES_ACCUMLATE,
|
||||
PERF_ACCUMULATE_INIT(CV_32FC), accumulate(src1, dst))
|
||||
|
||||
declare.time(100);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
PERF_TEST_P_ACCUMULATE(AccumulateMask, MAT_TYPES_ACCUMLATE_C,
|
||||
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulate(src1, dst, mask))
|
||||
|
||||
TEST_CYCLE() accumulateSquare(src, dst);
|
||||
PERF_TEST_P_ACCUMULATE(AccumulateDouble, MAT_TYPES_ACCUMLATE_D,
|
||||
PERF_ACCUMULATE_INIT(CV_64FC), accumulate(src1, dst))
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
PERF_TEST_P_ACCUMULATE(AccumulateDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulate(src1, dst, mask))
|
||||
|
||||
#ifdef HAVE_OPENVX
|
||||
PERF_TEST_P(Size_MatType, AccumulateWeighted,
|
||||
testing::Combine(
|
||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
||||
testing::Values(CV_8UC1, CV_32FC1)
|
||||
)
|
||||
)
|
||||
#else
|
||||
PERF_TEST_P( Size_MatType, AccumulateWeighted,
|
||||
testing::Combine(
|
||||
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
|
||||
testing::Values(CV_32FC1)
|
||||
)
|
||||
)
|
||||
#endif
|
||||
{
|
||||
Size sz = get<0>(GetParam());
|
||||
int dstType = get<1>(GetParam());
|
||||
///////////////////////////// AccumulateSquare ///////////////////////////////////
|
||||
|
||||
Mat src(sz, CV_8UC1);
|
||||
Mat dst(sz, dstType);
|
||||
PERF_TEST_P_ACCUMULATE(Square, MAT_TYPES_ACCUMLATE,
|
||||
PERF_ACCUMULATE_INIT(CV_32FC), accumulateSquare(src1, dst))
|
||||
|
||||
declare.time(100);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
PERF_TEST_P_ACCUMULATE(SquareMask, MAT_TYPES_ACCUMLATE_C,
|
||||
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulateSquare(src1, dst, mask))
|
||||
|
||||
TEST_CYCLE() accumulateWeighted(src, dst, 0.314);
|
||||
PERF_TEST_P_ACCUMULATE(SquareDouble, MAT_TYPES_ACCUMLATE_D,
|
||||
PERF_ACCUMULATE_INIT(CV_64FC), accumulateSquare(src1, dst))
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
PERF_TEST_P_ACCUMULATE(SquareDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulateSquare(src1, dst, mask))
|
||||
|
||||
///////////////////////////// AccumulateProduct ///////////////////////////////////
|
||||
|
||||
#define PERF_ACCUMULATE_INIT_2(_FLTC) \
|
||||
PERF_ACCUMULATE_INIT(_FLTC) \
|
||||
Mat src2(srcSize, srcType); \
|
||||
declare.in(src2);
|
||||
|
||||
#define PERF_ACCUMULATE_MASK_INIT_2(_FLTC) \
|
||||
PERF_ACCUMULATE_MASK_INIT(_FLTC) \
|
||||
Mat src2(srcSize, srcType); \
|
||||
declare.in(src2);
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(Product, MAT_TYPES_ACCUMLATE,
|
||||
PERF_ACCUMULATE_INIT_2(CV_32FC), accumulateProduct(src1, src2, dst))
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(ProductMask, MAT_TYPES_ACCUMLATE_C,
|
||||
PERF_ACCUMULATE_MASK_INIT_2(CV_32FC), accumulateProduct(src1, src2, dst, mask))
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(ProductDouble, MAT_TYPES_ACCUMLATE_D,
|
||||
PERF_ACCUMULATE_INIT_2(CV_64FC), accumulateProduct(src1, src2, dst))
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(ProductDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||
PERF_ACCUMULATE_MASK_INIT_2(CV_64FC), accumulateProduct(src1, src2, dst, mask))
|
||||
|
||||
///////////////////////////// AccumulateWeighted ///////////////////////////////////
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(Weighted, MAT_TYPES_ACCUMLATE,
|
||||
PERF_ACCUMULATE_INIT(CV_32FC), accumulateWeighted(src1, dst, 0.123))
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(WeightedMask, MAT_TYPES_ACCUMLATE_C,
|
||||
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulateWeighted(src1, dst, 0.123, mask))
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(WeightedDouble, MAT_TYPES_ACCUMLATE_D,
|
||||
PERF_ACCUMULATE_INIT(CV_64FC), accumulateWeighted(src1, dst, 0.123456))
|
||||
|
||||
PERF_TEST_P_ACCUMULATE(WeightedDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
|
||||
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulateWeighted(src1, dst, 0.123456, mask))
|
||||
|
||||
} // namespace
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1875,7 +1875,7 @@ void rectangle( InputOutputArray img, Rect rec,
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
if( rec.area() > 0 )
|
||||
if( !rec.empty() )
|
||||
rectangle( img, rec.tl(), rec.br() - Point(1<<shift,1<<shift),
|
||||
color, thickness, lineType, shift );
|
||||
}
|
||||
|
||||
@ -663,7 +663,7 @@ static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
|
||||
cval[k] = saturate_cast<T>(_borderValue[k & 3]);
|
||||
|
||||
unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
|
||||
CV_Assert( ssize.area() > 0 );
|
||||
CV_Assert( !ssize.empty() );
|
||||
#if CV_SIMD128
|
||||
if( _src.type() == CV_8UC3 )
|
||||
width1 = std::max(ssize.width-2, 0);
|
||||
@ -1705,7 +1705,7 @@ void cv::remap( InputArray _src, OutputArray _dst,
|
||||
remapLanczos4<Cast<double, double>, float, 1>, 0
|
||||
};
|
||||
|
||||
CV_Assert( _map1.size().area() > 0 );
|
||||
CV_Assert( !_map1.empty() );
|
||||
CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
|
||||
|
||||
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
|
||||
@ -2410,7 +2410,7 @@ static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArra
|
||||
scalarToRawData(borderValue, borderBuf, sctype);
|
||||
|
||||
UMat src = _src.getUMat(), M0;
|
||||
_dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
|
||||
_dst.create( dsize.empty() ? src.size() : dsize, src.type() );
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
float M[9] = {0};
|
||||
@ -2514,7 +2514,7 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
|
||||
scalarToRawData(borderValue, borderBuf, sctype);
|
||||
|
||||
UMat src = _src.getUMat(), M0;
|
||||
_dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
|
||||
_dst.create( dsize.empty() ? src.size() : dsize, src.type() );
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
double M[9] = {0};
|
||||
@ -2606,7 +2606,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
|
||||
borderValue, OCL_OP_AFFINE))
|
||||
|
||||
Mat src = _src.getMat(), M0 = _M0.getMat();
|
||||
_dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
|
||||
_dst.create( dsize.empty() ? src.size() : dsize, src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
CV_Assert( src.cols > 0 && src.rows > 0 );
|
||||
if( dst.data == src.data )
|
||||
@ -2912,7 +2912,7 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
|
||||
OCL_OP_PERSPECTIVE))
|
||||
|
||||
Mat src = _src.getMat(), M0 = _M0.getMat();
|
||||
_dst.create( dsize.area() == 0 ? src.size() : dsize, src.type() );
|
||||
_dst.create( dsize.empty() ? src.size() : dsize, src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
if( dst.data == src.data )
|
||||
|
||||
@ -1166,7 +1166,7 @@ int LineSegmentDetectorImpl::compareSegments(const Size& size, InputArray lines1
|
||||
|
||||
Size sz = size;
|
||||
if (_image.needed() && _image.size() != size) sz = _image.size();
|
||||
CV_Assert(sz.area());
|
||||
CV_Assert(!sz.empty());
|
||||
|
||||
Mat_<uchar> I1 = Mat_<uchar>::zeros(sz);
|
||||
Mat_<uchar> I2 = Mat_<uchar>::zeros(sz);
|
||||
|
||||
@ -631,7 +631,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
|
||||
return false;
|
||||
|
||||
Size ssize = _src.size();
|
||||
Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
|
||||
Size dsize = _dsz.empty() ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
|
||||
if (dsize.height < 2 || dsize.width < 2)
|
||||
return false;
|
||||
|
||||
@ -683,7 +683,7 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int
|
||||
return false;
|
||||
|
||||
Size ssize = _src.size();
|
||||
if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
|
||||
if (!_dsz.empty() && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
@ -742,7 +742,7 @@ static bool ipp_pyrdown( InputArray _src, OutputArray _dst, const Size& _dsz, in
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_DOWN
|
||||
Size dsz = _dsz.area() == 0 ? Size((_src.cols() + 1)/2, (_src.rows() + 1)/2) : _dsz;
|
||||
Size dsz = _dsz.empty() ? Size((_src.cols() + 1)/2, (_src.rows() + 1)/2) : _dsz;
|
||||
bool isolated = (borderType & BORDER_ISOLATED) != 0;
|
||||
int borderTypeNI = borderType & ~BORDER_ISOLATED;
|
||||
|
||||
@ -817,7 +817,7 @@ static bool openvx_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz,
|
||||
// OpenVX limitations
|
||||
if((srcMat.type() != CV_8U) ||
|
||||
(borderType != BORDER_REPLICATE) ||
|
||||
(_dsz != acceptableSize && _dsz.area() != 0))
|
||||
(_dsz != acceptableSize && !_dsz.empty()))
|
||||
return false;
|
||||
|
||||
// The only border mode which is supported by both cv::pyrDown() and OpenVX
|
||||
@ -889,7 +889,7 @@ void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borde
|
||||
openvx_pyrDown(_src, _dst, _dsz, borderType))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
|
||||
Size dsz = _dsz.empty() ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
|
||||
_dst.create( dsz, src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
int depth = src.depth();
|
||||
@ -931,7 +931,7 @@ static bool ipp_pyrup( InputArray _src, OutputArray _dst, const Size& _dsz, int
|
||||
|
||||
#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_UP
|
||||
Size sz = _src.dims() <= 2 ? _src.size() : Size();
|
||||
Size dsz = _dsz.area() == 0 ? Size(_src.cols()*2, _src.rows()*2) : _dsz;
|
||||
Size dsz = _dsz.empty() ? Size(_src.cols()*2, _src.rows()*2) : _dsz;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
_dst.create( dsz, src.type() );
|
||||
@ -994,7 +994,7 @@ void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderT
|
||||
|
||||
|
||||
Mat src = _src.getMat();
|
||||
Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz;
|
||||
Size dsz = _dsz.empty() ? Size(src.cols*2, src.rows*2) : _dsz;
|
||||
_dst.create( dsz, src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
int depth = src.depth();
|
||||
|
||||
@ -1825,7 +1825,7 @@ void hlineSmooth1N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_mul = vx_setall_u16(*((uint16_t*)m));
|
||||
for (; i <= lencn - VECSZ; i += VECSZ)
|
||||
v_store((uint16_t*)dst + i, v_mul*vx_load_expand(src + i));
|
||||
v_store((uint16_t*)dst + i, v_mul_wrap(v_mul, vx_load_expand(src + i)));
|
||||
#endif
|
||||
for (; i < lencn; i++)
|
||||
dst[i] = m[0] * src[i];
|
||||
@ -1915,7 +1915,9 @@ void hlineSmooth3N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, vx_load_expand(src - cn) * v_mul0 + vx_load_expand(src) * v_mul1 + vx_load_expand(src + cn) * v_mul2);
|
||||
v_store((uint16_t*)dst, v_mul_wrap(vx_load_expand(src - cn), v_mul0) +
|
||||
v_mul_wrap(vx_load_expand(src), v_mul1) +
|
||||
v_mul_wrap(vx_load_expand(src + cn), v_mul2));
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*dst = m[0] * src[-cn] + m[1] * src[0] + m[2] * src[cn];
|
||||
@ -2089,7 +2091,8 @@ void hlineSmooth3Naba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
|
||||
v_uint16 v_mul0 = vx_setall_u16(_m[0]);
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn)) * v_mul0 + vx_load_expand(src) * v_mul1);
|
||||
v_store((uint16_t*)dst, v_mul_wrap(vx_load_expand(src - cn) + vx_load_expand(src + cn), v_mul0) +
|
||||
v_mul_wrap(vx_load_expand(src), v_mul1));
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = ((uint16_t*)m)[1] * src[0] + ((uint16_t*)m)[0] * ((uint16_t)(src[-cn]) + (uint16_t)(src[cn]));
|
||||
@ -2285,7 +2288,11 @@ void hlineSmooth5N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
|
||||
v_uint16 v_mul3 = vx_setall_u16(_m[3]);
|
||||
v_uint16 v_mul4 = vx_setall_u16(_m[4]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, vx_load_expand(src - 2 * cn) * v_mul0 + vx_load_expand(src - cn) * v_mul1 + vx_load_expand(src) * v_mul2 + vx_load_expand(src + cn) * v_mul3 + vx_load_expand(src + 2 * cn) * v_mul4);
|
||||
v_store((uint16_t*)dst, v_mul_wrap(vx_load_expand(src - 2 * cn), v_mul0) +
|
||||
v_mul_wrap(vx_load_expand(src - cn), v_mul1) +
|
||||
v_mul_wrap(vx_load_expand(src), v_mul2) +
|
||||
v_mul_wrap(vx_load_expand(src + cn), v_mul3) +
|
||||
v_mul_wrap(vx_load_expand(src + 2 * cn), v_mul4));
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*dst = m[0] * src[-2*cn] + m[1] * src[-cn] + m[2] * src[0] + m[3] * src[cn] + m[4] * src[2*cn];
|
||||
@ -2488,7 +2495,7 @@ void hlineSmooth5N14641<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
v_uint16 v_6 = vx_setall_u16(6);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src) * v_6 + ((vx_load_expand(src - cn) + vx_load_expand(src + cn)) << 2) + vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) << 4);
|
||||
v_store((uint16_t*)dst, (v_mul_wrap(vx_load_expand(src), v_6) + ((vx_load_expand(src - cn) + vx_load_expand(src + cn)) << 2) + vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) << 4);
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = (uint16_t(src[0]) * 6 + ((uint16_t(src[-cn]) + uint16_t(src[cn])) << 2) + uint16_t(src[-2 * cn]) + uint16_t(src[2 * cn])) << 4;
|
||||
@ -2689,7 +2696,9 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
|
||||
v_uint16 v_mul1 = vx_setall_u16(_m[1]);
|
||||
v_uint16 v_mul2 = vx_setall_u16(_m[2]);
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
v_store((uint16_t*)dst, (vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn)) * v_mul0 + (vx_load_expand(src - cn) + vx_load_expand(src + cn))* v_mul1 + vx_load_expand(src) * v_mul2);
|
||||
v_store((uint16_t*)dst, v_mul_wrap(vx_load_expand(src - 2 * cn) + vx_load_expand(src + 2 * cn), v_mul0) +
|
||||
v_mul_wrap(vx_load_expand(src - cn) + vx_load_expand(src + cn), v_mul1) +
|
||||
v_mul_wrap(vx_load_expand(src), v_mul2));
|
||||
#endif
|
||||
for (; i < lencn; i++, src++, dst++)
|
||||
*((uint16_t*)dst) = ((uint16_t*)m)[0] * ((uint16_t)(src[-2 * cn]) + (uint16_t)(src[2 * cn])) + ((uint16_t*)m)[1] * ((uint16_t)(src[-cn]) + (uint16_t)(src[cn])) + ((uint16_t*)m)[2] * src[0];
|
||||
@ -2804,9 +2813,9 @@ void hlineSmooth<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixe
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= lencn - VECSZ; i+=VECSZ, src+=VECSZ, dst+=VECSZ)
|
||||
{
|
||||
v_uint16 v_res0 = vx_load_expand(src) * vx_setall_u16(*((uint16_t*)m));
|
||||
v_uint16 v_res0 = v_mul_wrap(vx_load_expand(src), vx_setall_u16(*((uint16_t*)m)));
|
||||
for (int j = 1; j < n; j++)
|
||||
v_res0 += vx_load_expand(src + j * cn) * vx_setall_u16(*((uint16_t*)(m + j)));
|
||||
v_res0 += v_mul_wrap(vx_load_expand(src + j * cn), vx_setall_u16(*((uint16_t*)(m + j))));
|
||||
v_store((uint16_t*)dst, v_res0);
|
||||
}
|
||||
#endif
|
||||
@ -2923,9 +2932,9 @@ void hlineSmoothONa_yzy_a<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, co
|
||||
const int VECSZ = v_uint16::nlanes;
|
||||
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
|
||||
{
|
||||
v_uint16 v_res0 = vx_load_expand(src + pre_shift * cn) * vx_setall_u16(*((uint16_t*)(m + pre_shift)));
|
||||
v_uint16 v_res0 = v_mul_wrap(vx_load_expand(src + pre_shift * cn), vx_setall_u16(*((uint16_t*)(m + pre_shift))));
|
||||
for (int j = 0; j < pre_shift; j ++)
|
||||
v_res0 += (vx_load_expand(src + j * cn) + vx_load_expand(src + (n - 1 - j)*cn)) * vx_setall_u16(*((uint16_t*)(m + j)));
|
||||
v_res0 += v_mul_wrap(vx_load_expand(src + j * cn) + vx_load_expand(src + (n - 1 - j)*cn), vx_setall_u16(*((uint16_t*)(m + j))));
|
||||
v_store((uint16_t*)dst, v_res0);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -97,7 +97,7 @@ OCL_TEST_P(PyrDown, Mat)
|
||||
Size src_roiSize = randomSize(1, MAX_VALUE);
|
||||
Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2),
|
||||
randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2));
|
||||
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
|
||||
dst_roiSize = dst_roiSize.empty() ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
|
||||
generateTestData(src_roiSize, dst_roiSize);
|
||||
|
||||
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize, borderType));
|
||||
|
||||
@ -289,7 +289,7 @@ PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool, int)
|
||||
dstRoiSize.width = cvRound(srcRoiSize.width * fx);
|
||||
dstRoiSize.height = cvRound(srcRoiSize.height * fy);
|
||||
|
||||
if (dstRoiSize.area() == 0)
|
||||
if (dstRoiSize.empty())
|
||||
{
|
||||
random_roi();
|
||||
return;
|
||||
|
||||
@ -527,7 +527,7 @@ double CV_Resize_Test::getWeight(double a, double b, int x)
|
||||
void CV_Resize_Test::resize_area()
|
||||
{
|
||||
Size ssize = src.size(), dsize = reference_dst.size();
|
||||
CV_Assert(ssize.area() > 0 && dsize.area() > 0);
|
||||
CV_Assert(!ssize.empty() && !dsize.empty());
|
||||
int cn = src.channels();
|
||||
|
||||
CV_Assert(scale_x >= 1.0 && scale_y >= 1.0);
|
||||
@ -654,7 +654,7 @@ void CV_Resize_Test::generate_buffer(double scale, dim& _dim)
|
||||
void CV_Resize_Test::resize_generic()
|
||||
{
|
||||
Size dsize = reference_dst.size(), ssize = src.size();
|
||||
CV_Assert(dsize.area() > 0 && ssize.area() > 0);
|
||||
CV_Assert(!dsize.empty() && !ssize.empty());
|
||||
|
||||
dim dims[] = { dim(dsize.width), dim(dsize.height) };
|
||||
if (interpolation == INTER_NEAREST)
|
||||
@ -884,7 +884,7 @@ void CV_Remap_Test::remap_nearest(const Mat& _src, Mat& _dst)
|
||||
CV_Assert(mapx.type() == CV_16SC2 && mapy.empty());
|
||||
|
||||
Size ssize = _src.size(), dsize = _dst.size();
|
||||
CV_Assert(ssize.area() > 0 && dsize.area() > 0);
|
||||
CV_Assert(!ssize.empty() && !dsize.empty());
|
||||
int cn = _src.channels();
|
||||
|
||||
for (int dy = 0; dy < dsize.height; ++dy)
|
||||
@ -1106,8 +1106,8 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
|
||||
{
|
||||
Size dsize = _dst.size();
|
||||
|
||||
CV_Assert(_src.size().area() > 0);
|
||||
CV_Assert(dsize.area() > 0);
|
||||
CV_Assert(!_src.empty());
|
||||
CV_Assert(!dsize.empty());
|
||||
CV_Assert(_src.type() == _dst.type());
|
||||
|
||||
Mat tM;
|
||||
@ -1228,8 +1228,8 @@ void CV_WarpPerspective_Test::warpPerspective(const Mat& _src, Mat& _dst)
|
||||
{
|
||||
Size ssize = _src.size(), dsize = _dst.size();
|
||||
|
||||
CV_Assert(ssize.area() > 0);
|
||||
CV_Assert(dsize.area() > 0);
|
||||
CV_Assert(!ssize.empty());
|
||||
CV_Assert(!dsize.empty());
|
||||
CV_Assert(_src.type() == _dst.type());
|
||||
|
||||
if (M.depth() != CV_64F)
|
||||
|
||||
@ -91,7 +91,11 @@ endif()
|
||||
if(COMPILE_FLAGS)
|
||||
set_target_properties(${the_module} PROPERTIES COMPILE_FLAGS ${COMPILE_FLAGS})
|
||||
endif()
|
||||
set_target_properties(${the_module} PROPERTIES LINK_FLAGS "--memory-init-file 0 -s TOTAL_MEMORY=134217728 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME=\"'cv'\" -s DEMANGLE_SUPPORT=1 -s FORCE_FILESYSTEM=1 --use-preload-plugins --bind --post-js ${JS_HELPER} ${COMPILE_FLAGS}")
|
||||
set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} --memory-init-file 0 -s TOTAL_MEMORY=134217728 -s ALLOW_MEMORY_GROWTH=1")
|
||||
set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s MODULARIZE=1 -s SINGLE_FILE=1")
|
||||
set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s EXPORT_NAME=\"'cv'\" -s DEMANGLE_SUPPORT=1")
|
||||
set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s FORCE_FILESYSTEM=1 --use-preload-plugins --bind --post-js ${JS_HELPER} ${COMPILE_FLAGS}")
|
||||
set_target_properties(${the_module} PROPERTIES LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS}")
|
||||
|
||||
# add UMD wrapper
|
||||
set(MODULE_JS_PATH "${OpenCV_BINARY_DIR}/bin/${the_module}.js")
|
||||
|
||||
@ -502,7 +502,7 @@ bool FeatureEvaluator::setImage( InputArray _image, const std::vector<float>& _s
|
||||
copyVectorToUMat(*scaleData, uscaleData);
|
||||
}
|
||||
|
||||
if (_image.isUMat() && localSize.area() > 0)
|
||||
if (_image.isUMat() && !localSize.empty())
|
||||
{
|
||||
usbuf.create(sbufSize.height*nchannels, sbufSize.width, CV_32S);
|
||||
urbuf.create(sz0, CV_8U);
|
||||
@ -1072,7 +1072,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
std::vector<UMat> bufs;
|
||||
featureEvaluator->getUMats(bufs);
|
||||
Size localsz = featureEvaluator->getLocalSize();
|
||||
if( localsz.area() == 0 )
|
||||
if( localsz.empty() )
|
||||
return false;
|
||||
Size lbufSize = featureEvaluator->getLocalBufSize();
|
||||
size_t localsize[] = { (size_t)localsz.width, (size_t)localsz.height };
|
||||
@ -1108,7 +1108,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
if( haarKernel.empty() )
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
if ( !lbufSize.empty() )
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D HAAR",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
@ -1148,7 +1148,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
if( lbpKernel.empty() )
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
if ( !lbufSize.empty() )
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D LBP",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
@ -1304,7 +1304,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
|
||||
#ifdef HAVE_OPENCL
|
||||
bool use_ocl = tryOpenCL && ocl::isOpenCLActivated() &&
|
||||
OCL_FORCE_CHECK(_image.isUMat()) &&
|
||||
featureEvaluator->getLocalSize().area() > 0 &&
|
||||
!featureEvaluator->getLocalSize().empty() &&
|
||||
(data.minNodesPerTree == data.maxNodesPerTree) &&
|
||||
!isOldFormatCascade() &&
|
||||
maskGenerator.empty() &&
|
||||
|
||||
@ -510,7 +510,7 @@ void DetectionBasedTracker::process(const Mat& imageGray)
|
||||
CV_Assert(n > 0);
|
||||
|
||||
Rect r = trackedObjects[i].lastPositions[n-1];
|
||||
if(r.area() == 0) {
|
||||
if(r.empty()) {
|
||||
LOGE("DetectionBasedTracker::process: ERROR: ATTENTION: strange algorithm's behavior: trackedObjects[i].rect() is empty");
|
||||
continue;
|
||||
}
|
||||
@ -550,7 +550,7 @@ void cv::DetectionBasedTracker::getObjects(std::vector<cv::Rect>& result) const
|
||||
|
||||
for(size_t i=0; i < trackedObjects.size(); i++) {
|
||||
Rect r=calcTrackedObjectPositionToShow((int)i);
|
||||
if (r.area()==0) {
|
||||
if (r.empty()) {
|
||||
continue;
|
||||
}
|
||||
result.push_back(r);
|
||||
@ -564,7 +564,7 @@ void cv::DetectionBasedTracker::getObjects(std::vector<Object>& result) const
|
||||
|
||||
for(size_t i=0; i < trackedObjects.size(); i++) {
|
||||
Rect r=calcTrackedObjectPositionToShow((int)i);
|
||||
if (r.area()==0) {
|
||||
if (r.empty()) {
|
||||
continue;
|
||||
}
|
||||
result.push_back(Object(r, trackedObjects[i].id));
|
||||
|
||||
@ -1427,7 +1427,7 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
|
||||
+ equRect.x + equRect.width;
|
||||
}
|
||||
|
||||
if( scanROI.area() > 0 )
|
||||
if( !scanROI.empty() )
|
||||
{
|
||||
//adjust start_height and stop_height
|
||||
startY = cvRound(scanROI.y / ystep);
|
||||
@ -1442,7 +1442,7 @@ cvHaarDetectObjectsForROC( const CvArr* _img,
|
||||
ystep, sum->step, (const int**)p,
|
||||
(const int**)pq, allCandidates, &mtx ));
|
||||
|
||||
if( findBiggestObject && !allCandidates.empty() && scanROI.area() == 0 )
|
||||
if( findBiggestObject && !allCandidates.empty() && scanROI.empty() )
|
||||
{
|
||||
rectList.resize(allCandidates.size());
|
||||
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
|
||||
|
||||
@ -332,7 +332,7 @@ bool QRDetect::localization()
|
||||
const int width = cvRound(bin_barcode.size().width / coeff_expansion);
|
||||
const int height = cvRound(bin_barcode.size().height / coeff_expansion);
|
||||
Size new_size(width, height);
|
||||
Mat intermediate = Mat::zeros(new_size, CV_8UC1);
|
||||
Mat intermediate;
|
||||
resize(bin_barcode, intermediate, new_size, 0, 0, INTER_LINEAR);
|
||||
bin_barcode = intermediate.clone();
|
||||
for (size_t i = 0; i < localization_points.size(); i++)
|
||||
@ -833,26 +833,29 @@ void QRDecode::init(const Mat &src, const vector<Point2f> &points)
|
||||
|
||||
bool QRDecode::updatePerspective()
|
||||
{
|
||||
const Point2f centerPt = QRDetect::intersectionLines(original_points[0], original_points[2],
|
||||
original_points[1], original_points[3]);
|
||||
if (cvIsNaN(centerPt.x) || cvIsNaN(centerPt.y))
|
||||
return false;
|
||||
|
||||
const Size temporary_size(cvRound(test_perspective_size), cvRound(test_perspective_size));
|
||||
|
||||
vector<Point2f> perspective_points;
|
||||
perspective_points.push_back(Point2f(0.f, 0.f));
|
||||
perspective_points.push_back(Point2f(test_perspective_size, 0.f));
|
||||
|
||||
perspective_points.push_back(Point2f(static_cast<float>(test_perspective_size * 0.5),
|
||||
static_cast<float>(test_perspective_size * 0.5)));
|
||||
original_points.insert(original_points.begin() + 2,
|
||||
QRDetect::intersectionLines(
|
||||
original_points[0], original_points[2],
|
||||
original_points[1], original_points[3]));
|
||||
|
||||
perspective_points.push_back(Point2f(test_perspective_size, test_perspective_size));
|
||||
perspective_points.push_back(Point2f(0.f, test_perspective_size));
|
||||
|
||||
Mat H = findHomography(original_points, perspective_points);
|
||||
Mat bin_original = Mat::zeros(original.size(), CV_8UC1);
|
||||
perspective_points.push_back(Point2f(test_perspective_size * 0.5f, test_perspective_size * 0.5f));
|
||||
|
||||
vector<Point2f> pts = original_points;
|
||||
pts.push_back(centerPt);
|
||||
|
||||
Mat H = findHomography(pts, perspective_points);
|
||||
Mat bin_original;
|
||||
adaptiveThreshold(original, bin_original, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY, 83, 2);
|
||||
Mat temp_intermediate = Mat::zeros(temporary_size, CV_8UC1);
|
||||
Mat temp_intermediate;
|
||||
warpPerspective(bin_original, temp_intermediate, H, temporary_size, INTER_NEAREST);
|
||||
no_border_intermediate = temp_intermediate(Range(1, temp_intermediate.rows), Range(1, temp_intermediate.cols));
|
||||
|
||||
@ -1054,6 +1057,7 @@ CV_EXPORTS bool decodeQRCode(InputArray in, InputArray points, std::string &deco
|
||||
vector<Point2f> src_points;
|
||||
points.copyTo(src_points);
|
||||
CV_Assert(src_points.size() == 4);
|
||||
CV_CheckGT(contourArea(src_points), 0.0, "Invalid QR code source points");
|
||||
|
||||
QRDecode qrdec;
|
||||
qrdec.init(inarr, src_points);
|
||||
@ -1061,7 +1065,7 @@ CV_EXPORTS bool decodeQRCode(InputArray in, InputArray points, std::string &deco
|
||||
|
||||
decoded_info = qrdec.getDecodeInformation();
|
||||
|
||||
if (straight_qrcode.needed())
|
||||
if (exit_flag && straight_qrcode.needed())
|
||||
{
|
||||
qrdec.getStraightBarcode().convertTo(straight_qrcode,
|
||||
straight_qrcode.fixedType() ?
|
||||
|
||||
@ -121,7 +121,7 @@ TEST(Objdetect_QRCode_basic, not_found_qrcode)
|
||||
EXPECT_FALSE(detectQRCode(zero_image, corners));
|
||||
#ifdef HAVE_QUIRC
|
||||
corners = std::vector<Point>(4);
|
||||
EXPECT_FALSE(decodeQRCode(zero_image, corners, decoded_info, straight_barcode));
|
||||
EXPECT_ANY_THROW(decodeQRCode(zero_image, corners, decoded_info, straight_barcode));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -91,7 +91,7 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
v_int16x8 s2 = v_reinterpret_as_s16(v_load_expand(srow2 + x));
|
||||
|
||||
v_int16x8 t1 = s2 - s0;
|
||||
v_int16x8 t0 = (s0 + s2) * c3 + s1 * c10;
|
||||
v_int16x8 t0 = v_mul_wrap(s0 + s2, c3) + v_mul_wrap(s1, c10);
|
||||
|
||||
v_store(trow0 + x, t0);
|
||||
v_store(trow1 + x, t1);
|
||||
@ -129,7 +129,7 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
v_int16x8 s4 = v_load(trow1 + x + cn);
|
||||
|
||||
v_int16x8 t0 = s1 - s0;
|
||||
v_int16x8 t1 = ((s2 + s4) * c3) + (s3 * c10);
|
||||
v_int16x8 t1 = v_mul_wrap(s2 + s4, c3) + v_mul_wrap(s3, c10);
|
||||
|
||||
v_store_interleave((drow + x*2), t0, t1);
|
||||
}
|
||||
@ -812,7 +812,7 @@ namespace
|
||||
double minEigThreshold_ = 1e-4) :
|
||||
winSize(winSize_), maxLevel(maxLevel_), criteria(criteria_), flags(flags_), minEigThreshold(minEigThreshold_)
|
||||
#ifdef HAVE_OPENCL
|
||||
, iters(criteria_.maxCount), derivLambda(criteria_.epsilon), useInitialFlow(0 != (flags_ & OPTFLOW_LK_GET_MIN_EIGENVALS)), waveSize(0)
|
||||
, iters(criteria_.maxCount), derivLambda(criteria_.epsilon), useInitialFlow(0 != (flags_ & OPTFLOW_LK_GET_MIN_EIGENVALS))
|
||||
#endif
|
||||
{
|
||||
}
|
||||
@ -854,8 +854,6 @@ namespace
|
||||
calcPatchSize();
|
||||
if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
|
||||
return false;
|
||||
if (!initWaveSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -924,19 +922,6 @@ namespace
|
||||
int iters;
|
||||
double derivLambda;
|
||||
bool useInitialFlow;
|
||||
int waveSize;
|
||||
bool initWaveSize()
|
||||
{
|
||||
waveSize = 1;
|
||||
if (isDeviceCPU())
|
||||
return true;
|
||||
|
||||
ocl::Kernel kernel;
|
||||
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
|
||||
return false;
|
||||
waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
|
||||
return true;
|
||||
}
|
||||
dim3 patch;
|
||||
void calcPatchSize()
|
||||
{
|
||||
@ -975,8 +960,8 @@ namespace
|
||||
if (isDeviceCPU())
|
||||
build_options = " -D CPU";
|
||||
else
|
||||
build_options = cv::format("-D WAVE_SIZE=%d -D WSX=%d -D WSY=%d",
|
||||
waveSize, wsx, wsy);
|
||||
build_options = cv::format("-D WSX=%d -D WSY=%d",
|
||||
wsx, wsy);
|
||||
|
||||
ocl::Kernel kernel;
|
||||
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
|
||||
@ -1062,7 +1047,9 @@ namespace
|
||||
_status.create((int)npoints, 1, CV_8UC1);
|
||||
UMat umatNextPts = _nextPts.getUMat();
|
||||
UMat umatStatus = _status.getUMat();
|
||||
return sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
|
||||
UMat umatPrevPts;
|
||||
_prevPts.getMat().copyTo(umatPrevPts);
|
||||
return sparse(_prevImg.getUMat(), _nextImg.getUMat(), umatPrevPts, umatNextPts, umatStatus, umatErr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -53,9 +53,6 @@
|
||||
#define LM_H (LSy*GRIDSIZE+2)
|
||||
#define BUFFER (LSx*LSy)
|
||||
#define BUFFER2 BUFFER>>1
|
||||
#ifndef WAVE_SIZE
|
||||
#define WAVE_SIZE 1
|
||||
#endif
|
||||
|
||||
#ifdef CPU
|
||||
|
||||
@ -78,7 +75,7 @@ inline void reduce3(float val1, float val2, float val3, __local float* smem1,
|
||||
}
|
||||
}
|
||||
|
||||
inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
|
||||
inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
@ -95,7 +92,7 @@ inline void reduce2(float val1, float val2, volatile __local float* smem1, volat
|
||||
}
|
||||
}
|
||||
|
||||
inline void reduce1(float val1, volatile __local float* smem1, int tid)
|
||||
inline void reduce1(float val1, __local float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -111,7 +108,7 @@ inline void reduce1(float val1, volatile __local float* smem1, int tid)
|
||||
}
|
||||
#else
|
||||
inline void reduce3(float val1, float val2, float val3,
|
||||
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
|
||||
__local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
@ -123,38 +120,39 @@ inline void reduce3(float val1, float val2, float val3,
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
smem3[tid] += smem3[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
smem3[tid] += smem3[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid<1)
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
local float8* m1 = (local float8*)smem1;
|
||||
local float8* m2 = (local float8*)smem2;
|
||||
local float8* m3 = (local float8*)smem3;
|
||||
float8 t1 = m1[0]+m1[1];
|
||||
float8 t2 = m2[0]+m2[1];
|
||||
float8 t3 = m3[0]+m3[1];
|
||||
float4 t14 = t1.lo + t1.hi;
|
||||
float4 t24 = t2.lo + t2.hi;
|
||||
float4 t34 = t3.lo + t3.hi;
|
||||
smem1[0] = t14.x+t14.y+t14.z+t14.w;
|
||||
smem2[0] = t24.x+t24.y+t24.z+t24.w;
|
||||
smem3[0] = t34.x+t34.y+t34.z+t34.w;
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
smem3[tid] += smem3[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
smem3[tid] += smem3[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid == 0)
|
||||
{
|
||||
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
|
||||
smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
|
||||
smem3[0] = (smem3[0] + smem3[1]) + (smem3[2] + smem3[3]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
|
||||
inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
@ -164,33 +162,35 @@ inline void reduce2(float val1, float val2, __local volatile float* smem1, __loc
|
||||
{
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid<1)
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
local float8* m1 = (local float8*)smem1;
|
||||
local float8* m2 = (local float8*)smem2;
|
||||
float8 t1 = m1[0]+m1[1];
|
||||
float8 t2 = m2[0]+m2[1];
|
||||
float4 t14 = t1.lo + t1.hi;
|
||||
float4 t24 = t2.lo + t2.hi;
|
||||
smem1[0] = t14.x+t14.y+t14.z+t14.w;
|
||||
smem2[0] = t24.x+t24.y+t24.z+t24.w;
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid == 0)
|
||||
{
|
||||
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
|
||||
smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
inline void reduce1(float val1, __local volatile float* smem1, int tid)
|
||||
inline void reduce1(float val1, __local float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -198,23 +198,26 @@ inline void reduce1(float val1, __local volatile float* smem1, int tid)
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid<1)
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
local float8* m1 = (local float8*)smem1;
|
||||
float8 t1 = m1[0]+m1[1];
|
||||
float4 t14 = t1.lo + t1.hi;
|
||||
smem1[0] = t14.x+t14.y+t14.z+t14.w;
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid == 0)
|
||||
{
|
||||
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
@ -1743,7 +1743,7 @@ static bool icvSetControl (CvCaptureCAM_V4L* capture,
|
||||
fprintf(stderr,
|
||||
"VIDEOIO ERROR: V4L2: setting property #%d is not supported\n",
|
||||
property_id);
|
||||
return -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* get the min/max values */
|
||||
|
||||
@ -14,7 +14,9 @@ class Fail(Exception):
|
||||
def execute(cmd, shell=False):
|
||||
try:
|
||||
log.info("Executing: %s" % cmd)
|
||||
retcode = subprocess.call(cmd, shell=shell)
|
||||
env = os.environ.copy()
|
||||
env['VERBOSE'] = '1'
|
||||
retcode = subprocess.call(cmd, shell=shell, env=env)
|
||||
if retcode < 0:
|
||||
raise Fail("Child was terminated by signal: %s" % -retcode)
|
||||
elif retcode > 0:
|
||||
@ -150,6 +152,8 @@ class Builder:
|
||||
flags = ""
|
||||
if self.options.build_wasm:
|
||||
flags += "-s WASM=1 "
|
||||
elif self.options.disable_wasm:
|
||||
flags += "-s WASM=0 "
|
||||
if self.options.enable_exception:
|
||||
flags += "-s DISABLE_EXCEPTION_CATCHING=0 "
|
||||
return flags
|
||||
@ -182,6 +186,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--opencv_dir', default=opencv_dir, help='Opencv source directory (default is "../.." relative to script location)')
|
||||
parser.add_argument('--emscripten_dir', default=emscripten_dir, help="Path to Emscripten to use for build")
|
||||
parser.add_argument('--build_wasm', action="store_true", help="Build OpenCV.js in WebAssembly format")
|
||||
parser.add_argument('--disable_wasm', action="store_true", help="Build OpenCV.js in Asm.js format")
|
||||
parser.add_argument('--build_test', action="store_true", help="Build tests")
|
||||
parser.add_argument('--build_doc', action="store_true", help="Build tutorials")
|
||||
parser.add_argument('--clean_build_dir', action="store_true", help="Clean build dir")
|
||||
@ -208,9 +213,11 @@ if __name__ == "__main__":
|
||||
builder.clean_build_dir()
|
||||
|
||||
if not args.skip_config:
|
||||
target = "asm.js"
|
||||
target = "default target"
|
||||
if args.build_wasm:
|
||||
target = "wasm"
|
||||
elif args.disable_wasm:
|
||||
target = "asm.js"
|
||||
log.info("=====")
|
||||
log.info("===== Config OpenCV.js build for %s" % target)
|
||||
log.info("=====")
|
||||
@ -220,7 +227,7 @@ if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
|
||||
log.info("=====")
|
||||
log.info("===== Building OpenCV.js in %s", "asm.js" if not args.build_wasm else "wasm")
|
||||
log.info("===== Building OpenCV.js")
|
||||
log.info("=====")
|
||||
builder.build_opencvjs()
|
||||
|
||||
|
||||
@ -559,7 +559,7 @@ int main(int argc, char** argv)
|
||||
{
|
||||
Rect r = extract3DBox(frame, shownFrame, selectedObjFrame,
|
||||
cameraMatrix, rvec, tvec, box, 4, true);
|
||||
if( r.area() )
|
||||
if( !r.empty() )
|
||||
{
|
||||
const int maxFrameIdx = 10000;
|
||||
char path[1000];
|
||||
|
||||
@ -36,7 +36,7 @@ static Mat getVisibleFlow(InputArray flow)
|
||||
|
||||
static Size fitSize(const Size & sz, const Size & bounds)
|
||||
{
|
||||
CV_Assert(sz.area() > 0);
|
||||
CV_Assert(!sz.empty());
|
||||
if (sz.width > bounds.width || sz.height > bounds.height)
|
||||
{
|
||||
double scale = std::min((double)bounds.width / sz.width, (double)bounds.height / sz.height);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user