Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin
2018-11-21 08:33:39 +00:00
31 changed files with 895 additions and 114 deletions
@@ -107,7 +107,7 @@
# include <arm_neon.h>
#endif
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
#ifdef CV_CPU_COMPILE_VSX
# include <altivec.h>
# undef vector
# undef pixel
@@ -115,6 +115,10 @@
# define CV_VSX 1
#endif
#ifdef CV_CPU_COMPILE_VSX3
# define CV_VSX3 1
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
@@ -237,3 +241,7 @@ struct VZeroUpperGuard {
#ifndef CV_VSX
# define CV_VSX 0
#endif
#ifndef CV_VSX3
# define CV_VSX3 0
#endif
@@ -315,5 +315,26 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
# define CV_TRY_VSX3 1
# define CV_CPU_FORCE_VSX3 1
# define CV_CPU_HAS_SUPPORT_VSX3 1
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
# define CV_TRY_VSX3 1
# define CV_CPU_FORCE_VSX3 0
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
#else
# define CV_TRY_VSX3 0
# define CV_CPU_FORCE_VSX3 0
# define CV_CPU_HAS_SUPPORT_VSX3 0
# define CV_CPU_CALL_VSX3(fn, args)
# define CV_CPU_CALL_VSX3_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
+4 -2
View File
@@ -226,9 +226,10 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_AVX_512VBMI 20
#define CV_CPU_AVX_512VL 21
#define CV_CPU_NEON 100
#define CV_CPU_NEON 100
#define CV_CPU_VSX 200
#define CV_CPU_VSX 200
#define CV_CPU_VSX3 201
// CPU features groups
#define CV_CPU_AVX512_SKX 256
@@ -266,6 +267,7 @@ enum CpuFeatures {
CPU_NEON = 100,
CPU_VSX = 200,
CPU_VSX3 = 201,
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
@@ -905,6 +905,11 @@ OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4)
OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps)
OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd)
inline v_float32x8 v_not_nan(const v_float32x8& a)
{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); }
inline v_float64x4 v_not_nan(const v_float64x4& a)
{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); }
/** min/max **/
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32, _mm256_min_epu8)
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32, _mm256_max_epu8)
@@ -683,6 +683,25 @@ OPENCV_HAL_IMPL_CMP_OP(==)
For all types except 64-bit integer values. */
OPENCV_HAL_IMPL_CMP_OP(!=)
template<int n>
inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
{
typedef typename V_TypeTraits<float>::int_type itype;
v_reg<float, n> c;
for (int i = 0; i < n; i++)
c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
return c;
}
template<int n>
inline v_reg<double, n> v_not_nan(const v_reg<double, n>& a)
{
typedef typename V_TypeTraits<double>::int_type itype;
v_reg<double, n> c;
for (int i = 0; i < n; i++)
c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
return c;
}
//! @brief Helper macro
//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
@@ -764,6 +764,13 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
#endif
inline v_float32x4 v_not_nan(const v_float32x4& a)
{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); }
#if CV_SIMD128_64F
inline v_float64x2 v_not_nan(const v_float64x2& a)
{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); }
#endif
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16)
@@ -1041,6 +1041,11 @@ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)
inline v_float32x4 v_not_nan(const v_float32x4& a)
{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }
inline v_float64x2 v_not_nan(const v_float64x2& a)
{ return v_float64x2(_mm_cmpord_pd(a.val, a.val)); }
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
@@ -607,6 +607,11 @@ OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
inline v_float32x4 v_not_nan(const v_float32x4& a)
{ return v_float32x4(vec_cmpeq(a.val, a.val)); }
inline v_float64x2 v_not_nan(const v_float64x2& a)
{ return v_float64x2(vec_cmpeq(a.val, a.val)); }
/** min/max **/
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
+14 -2
View File
@@ -1941,8 +1941,11 @@ Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b )
template<typename _Tp> static inline
Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b )
{
a.width -= b.width;
a.height -= b.height;
const _Tp width = a.width - b.width;
const _Tp height = a.height - b.height;
CV_DbgAssert(width >= 0 && height >= 0);
a.width = width;
a.height = height;
return a;
}
@@ -2007,6 +2010,15 @@ Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b)
return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height );
}
template<typename _Tp> static inline
Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Size_<_Tp>& b)
{
const _Tp width = a.width - b.width;
const _Tp height = a.height - b.height;
CV_DbgAssert(width >= 0 && height >= 0);
return Rect_<_Tp>( a.x, a.y, width, height );
}
template<typename _Tp> static inline
Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
{