Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
@@ -107,7 +107,7 @@
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
|
||||
#ifdef CV_CPU_COMPILE_VSX
|
||||
# include <altivec.h>
|
||||
# undef vector
|
||||
# undef pixel
|
||||
@@ -115,6 +115,10 @@
|
||||
# define CV_VSX 1
|
||||
#endif
|
||||
|
||||
#ifdef CV_CPU_COMPILE_VSX3
|
||||
# define CV_VSX3 1
|
||||
#endif
|
||||
|
||||
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||
|
||||
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
|
||||
@@ -237,3 +241,7 @@ struct VZeroUpperGuard {
|
||||
#ifndef CV_VSX
|
||||
# define CV_VSX 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_VSX3
|
||||
# define CV_VSX3 0
|
||||
#endif
|
||||
|
||||
@@ -315,5 +315,26 @@
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
|
||||
# define CV_TRY_VSX3 1
|
||||
# define CV_CPU_FORCE_VSX3 1
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 1
|
||||
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
|
||||
# define CV_TRY_VSX3 1
|
||||
# define CV_CPU_FORCE_VSX3 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
|
||||
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||
#else
|
||||
# define CV_TRY_VSX3 0
|
||||
# define CV_CPU_FORCE_VSX3 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 0
|
||||
# define CV_CPU_CALL_VSX3(fn, args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
|
||||
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
|
||||
|
||||
@@ -226,9 +226,10 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
||||
#define CV_CPU_AVX_512VBMI 20
|
||||
#define CV_CPU_AVX_512VL 21
|
||||
|
||||
#define CV_CPU_NEON 100
|
||||
#define CV_CPU_NEON 100
|
||||
|
||||
#define CV_CPU_VSX 200
|
||||
#define CV_CPU_VSX 200
|
||||
#define CV_CPU_VSX3 201
|
||||
|
||||
// CPU features groups
|
||||
#define CV_CPU_AVX512_SKX 256
|
||||
@@ -266,6 +267,7 @@ enum CpuFeatures {
|
||||
CPU_NEON = 100,
|
||||
|
||||
CPU_VSX = 200,
|
||||
CPU_VSX3 = 201,
|
||||
|
||||
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
|
||||
|
||||
|
||||
@@ -905,6 +905,11 @@ OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4)
|
||||
OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps)
|
||||
OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd)
|
||||
|
||||
inline v_float32x8 v_not_nan(const v_float32x8& a)
|
||||
{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); }
|
||||
inline v_float64x4 v_not_nan(const v_float64x4& a)
|
||||
{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); }
|
||||
|
||||
/** min/max **/
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32, _mm256_min_epu8)
|
||||
OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32, _mm256_max_epu8)
|
||||
|
||||
@@ -683,6 +683,25 @@ OPENCV_HAL_IMPL_CMP_OP(==)
|
||||
For all types except 64-bit integer values. */
|
||||
OPENCV_HAL_IMPL_CMP_OP(!=)
|
||||
|
||||
template<int n>
|
||||
inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
|
||||
{
|
||||
typedef typename V_TypeTraits<float>::int_type itype;
|
||||
v_reg<float, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
|
||||
return c;
|
||||
}
|
||||
template<int n>
|
||||
inline v_reg<double, n> v_not_nan(const v_reg<double, n>& a)
|
||||
{
|
||||
typedef typename V_TypeTraits<double>::int_type itype;
|
||||
v_reg<double, n> c;
|
||||
for (int i = 0; i < n; i++)
|
||||
c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
|
||||
return c;
|
||||
}
|
||||
|
||||
//! @brief Helper macro
|
||||
//! @ingroup core_hal_intrin_impl
|
||||
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
|
||||
|
||||
@@ -764,6 +764,13 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
|
||||
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
|
||||
#endif
|
||||
|
||||
inline v_float32x4 v_not_nan(const v_float32x4& a)
|
||||
{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); }
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_not_nan(const v_float64x2& a)
|
||||
{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); }
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
|
||||
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16)
|
||||
|
||||
@@ -1041,6 +1041,11 @@ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)
|
||||
|
||||
inline v_float32x4 v_not_nan(const v_float32x4& a)
|
||||
{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }
|
||||
inline v_float64x2 v_not_nan(const v_float64x2& a)
|
||||
{ return v_float64x2(_mm_cmpord_pd(a.val, a.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
|
||||
OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
|
||||
|
||||
@@ -607,6 +607,11 @@ OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
|
||||
|
||||
inline v_float32x4 v_not_nan(const v_float32x4& a)
|
||||
{ return v_float32x4(vec_cmpeq(a.val, a.val)); }
|
||||
inline v_float64x2 v_not_nan(const v_float64x2& a)
|
||||
{ return v_float64x2(vec_cmpeq(a.val, a.val)); }
|
||||
|
||||
/** min/max **/
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
|
||||
|
||||
@@ -1941,8 +1941,11 @@ Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b )
|
||||
template<typename _Tp> static inline
|
||||
Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b )
|
||||
{
|
||||
a.width -= b.width;
|
||||
a.height -= b.height;
|
||||
const _Tp width = a.width - b.width;
|
||||
const _Tp height = a.height - b.height;
|
||||
CV_DbgAssert(width >= 0 && height >= 0);
|
||||
a.width = width;
|
||||
a.height = height;
|
||||
return a;
|
||||
}
|
||||
|
||||
@@ -2007,6 +2010,15 @@ Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b)
|
||||
return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height );
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Size_<_Tp>& b)
|
||||
{
|
||||
const _Tp width = a.width - b.width;
|
||||
const _Tp height = a.height - b.height;
|
||||
CV_DbgAssert(width >= 0 && height >= 0);
|
||||
return Rect_<_Tp>( a.x, a.y, width, height );
|
||||
}
|
||||
|
||||
template<typename _Tp> static inline
|
||||
Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user