Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
@@ -754,5 +754,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
|
||||
|
||||
#include "opencv2/core/neon_utils.hpp"
|
||||
#include "opencv2/core/vsx_utils.hpp"
|
||||
#include "opencv2/core/check.hpp"
|
||||
|
||||
#endif //OPENCV_CORE_BASE_HPP
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_CHECK_HPP
|
||||
#define OPENCV_CORE_CHECK_HPP
|
||||
|
||||
#include <opencv2/core/base.hpp>
|
||||
|
||||
namespace cv {
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
|
||||
CV_EXPORTS const char* depthToString(int depth);
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
|
||||
CV_EXPORTS const String typeToString(int type);
|
||||
|
||||
|
||||
//! @cond IGNORED
|
||||
namespace detail {
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
|
||||
CV_EXPORTS const char* depthToString_(int depth);
|
||||
|
||||
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
|
||||
CV_EXPORTS const cv::String typeToString_(int type);
|
||||
|
||||
enum TestOp {
|
||||
TEST_CUSTOM = 0,
|
||||
TEST_EQ = 1,
|
||||
TEST_NE = 2,
|
||||
TEST_LE = 3,
|
||||
TEST_LT = 4,
|
||||
TEST_GE = 5,
|
||||
TEST_GT = 6,
|
||||
CV__LAST_TEST_OP
|
||||
};
|
||||
|
||||
struct CheckContext {
|
||||
const char* func;
|
||||
const char* file;
|
||||
int line;
|
||||
enum TestOp testOp;
|
||||
const char* message;
|
||||
const char* p1_str;
|
||||
const char* p2_str;
|
||||
};
|
||||
|
||||
#ifndef CV__CHECK_FILENAME
|
||||
# define CV__CHECK_FILENAME __FILE__
|
||||
#endif
|
||||
|
||||
#ifndef CV__CHECK_FUNCTION
|
||||
# if defined _MSC_VER
|
||||
# define CV__CHECK_FUNCTION __FUNCSIG__
|
||||
# elif defined __GNUC__
|
||||
# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
|
||||
# else
|
||||
# define CV__CHECK_FUNCTION "<unknown>"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
|
||||
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
|
||||
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
|
||||
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
|
||||
|
||||
|
||||
#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
|
||||
#define CV__TEST_NE(v1, v2) ((v1) != (v2))
|
||||
#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
|
||||
#define CV__TEST_LT(v1, v2) ((v1) < (v2))
|
||||
#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
|
||||
#define CV__TEST_GT(v1, v2) ((v1) > (v2))
|
||||
|
||||
#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
|
||||
if(CV__TEST_##op((v1), (v2))) ; else { \
|
||||
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
|
||||
cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
|
||||
if(!!(test_expr)) ; else { \
|
||||
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
|
||||
cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
} // namespace
|
||||
//! @endcond
|
||||
|
||||
|
||||
/// Supported values of these types: int, float, double
|
||||
#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
|
||||
#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
|
||||
|
||||
/// Check with additional "decoding" of type values in error message
|
||||
#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
|
||||
/// Check with additional "decoding" of depth values in error message
|
||||
#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
|
||||
|
||||
#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
|
||||
|
||||
|
||||
/// Example: type == CV_8UC1 || type == CV_8UC3
|
||||
#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
|
||||
|
||||
/// Example: depth == CV_32F || depth == CV_64F
|
||||
#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
|
||||
|
||||
/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
|
||||
// TODO define pretty-printers: #define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // OPENCV_CORE_CHECK_HPP
|
||||
@@ -795,7 +795,7 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>
|
||||
/** @brief Multiply and add
|
||||
|
||||
Returns \f$ a*b + c \f$
|
||||
For floating point types only. */
|
||||
For floating point types and signed 32bit int only. */
|
||||
template<typename _Tp, int n>
|
||||
inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
|
||||
const v_reg<_Tp, n>& c)
|
||||
@@ -828,6 +828,29 @@ template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n
|
||||
return c;
|
||||
}
|
||||
|
||||
/** @brief Dot product of elements
|
||||
|
||||
Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
|
||||
Scheme:
|
||||
@code
|
||||
{A1 A2 ...} // 16-bit
|
||||
x {B1 B2 ...} // 16-bit
|
||||
-------------
|
||||
{A1B1+A2B2+C1 ...} // 32-bit
|
||||
|
||||
@endcode
|
||||
Implemented only for 16-bit signed source type (v_int16x8).
|
||||
*/
|
||||
template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
|
||||
v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
|
||||
{
|
||||
typedef typename V_TypeTraits<_Tp>::w_type w_type;
|
||||
v_reg<w_type, n/2> s;
|
||||
for( int i = 0; i < (n/2); i++ )
|
||||
s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
|
||||
return s;
|
||||
}
|
||||
|
||||
/** @brief Multiply and expand
|
||||
|
||||
Multiply values two registers and store results in two registers with wider pack type.
|
||||
|
||||
@@ -506,6 +506,12 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
v_int32x4 s = v_dotprod(a, b);
|
||||
return v_int32x4(vaddq_s32(s.val , c.val));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
|
||||
@@ -730,6 +736,11 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_
|
||||
return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{
|
||||
return v_int32x4(vmlaq_s32(c.val, a.val, b.val));
|
||||
}
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
|
||||
{
|
||||
@@ -1095,6 +1106,18 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
|
||||
#endif
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
float32x4_t a_ = a.val;
|
||||
int32x4_t result;
|
||||
__asm__ ("fcvtns %0.4s, %1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a_)
|
||||
: /* No clobbers */);
|
||||
return v_int32x4(result);
|
||||
}
|
||||
#else
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||
@@ -1103,7 +1126,7 @@ inline v_int32x4 v_round(const v_float32x4& a)
|
||||
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
|
||||
return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
|
||||
}
|
||||
|
||||
#endif
|
||||
inline v_int32x4 v_floor(const v_float32x4& a)
|
||||
{
|
||||
int32x4_t a1 = vcvtq_s32_f32(a.val);
|
||||
|
||||
@@ -710,6 +710,11 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
return v_int32x4(_mm_madd_epi16(a.val, b.val));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
return v_int32x4(_mm_add_epi32(_mm_madd_epi16(a.val, b.val), c.val));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
|
||||
@@ -954,6 +959,10 @@ inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
|
||||
__m128i m = _mm_cmpgt_epi32(b.val, a.val);
|
||||
return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
|
||||
}
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{
|
||||
return a * b + c;
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
|
||||
inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
||||
@@ -1632,7 +1641,7 @@ inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2&
|
||||
c = v_reinterpret_as_f64(t2);
|
||||
}
|
||||
|
||||
// 2-channel, float only
|
||||
// 2-channel
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
@@ -1644,7 +1653,29 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
|
||||
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
|
||||
}
|
||||
|
||||
inline void v_store_interleave( short* ptr, const v_int16x8& a, const v_int16x8& b )
|
||||
inline void v_load_deinterleave(const short* ptr, v_int16x8& a, v_int16x8& b)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
|
||||
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
|
||||
|
||||
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
|
||||
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
|
||||
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
|
||||
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
|
||||
|
||||
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
|
||||
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort*ptr, v_uint16x8& a, v_uint16x8& b)
|
||||
{
|
||||
v_int16x8 sa, sb;
|
||||
v_load_deinterleave((const short*)ptr, sa, sb);
|
||||
a = v_reinterpret_as_u16(sa);
|
||||
b = v_reinterpret_as_u16(sb);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(short* ptr, const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
__m128i t0, t1;
|
||||
t0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||
|
||||
@@ -1,46 +1,6 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_VSX_HPP
|
||||
#define OPENCV_HAL_VSX_HPP
|
||||
@@ -276,34 +236,38 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
{ return _Tpvec(ld(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \
|
||||
{ return _Tpvec(ld_a(0, ptr)); } \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vec_ld_l8(ptr)); } \
|
||||
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
|
||||
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
{ st(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
||||
{ st_a(a.val, 0, ptr); } \
|
||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_l8(a.val, ptr); } \
|
||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_h8(a.val, ptr); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
|
||||
|
||||
//////////////// Value reordering ///////////////
|
||||
|
||||
@@ -343,7 +307,7 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
b1.val = fl(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
|
||||
{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
|
||||
@@ -353,10 +317,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{ return v_uint32x4(vec_ld_buw(ptr)); }
|
||||
{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{ return v_int32x4(vec_ld_bsw(ptr)); }
|
||||
{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
|
||||
/* pack */
|
||||
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
|
||||
@@ -429,36 +393,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
d.val = vec_mergesql(a.val, b.val);
|
||||
}
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
const int w = sizeof(typename _Tpvec::lane_type);
|
||||
const int n = _Tpvec::nlanes;
|
||||
const unsigned int sf = ((w * n) - (s * w));
|
||||
if (s == 0)
|
||||
return _Tpvec(a.val);
|
||||
else if (sf > 15)
|
||||
return _Tpvec();
|
||||
// bitwise it just to make xlc happy
|
||||
return _Tpvec(vec_sld(b.val, a.val, sf & 15));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \
|
||||
template<int s> \
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
switch(s) { \
|
||||
case 0: return _Tpvec(a.val); \
|
||||
case 2: return _Tpvec(b.val); \
|
||||
case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \
|
||||
default: return _Tpvec(); \
|
||||
} \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
|
||||
|
||||
|
||||
////////// Arithmetic, bitwise and comparison operations /////////
|
||||
|
||||
/* Element-wise binary and unary operations */
|
||||
@@ -669,6 +603,11 @@ OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{ return v_rotate_right<s>(a, b); }
|
||||
|
||||
////////// Reduce and mask /////////
|
||||
|
||||
/** Reduce **/
|
||||
@@ -821,6 +760,9 @@ inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
|
||||
|
||||
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
|
||||
{ return a * b + c; }
|
||||
|
||||
// TODO: exp, log, sin, cos
|
||||
|
||||
/** Absolute values **/
|
||||
@@ -904,6 +846,9 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& m3)
|
||||
|
||||
@@ -1,46 +1,6 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#ifndef OPENCV_HAL_VSX_UTILS_HPP
|
||||
#define OPENCV_HAL_VSX_UTILS_HPP
|
||||
@@ -64,106 +24,77 @@ typedef __vector unsigned char vec_uchar16;
|
||||
#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__}
|
||||
#define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, c))
|
||||
#define vec_uchar16_c(v) ((vec_uchar16)(v))
|
||||
#define vec_uchar16_mx vec_uchar16_sp(0xFF)
|
||||
#define vec_uchar16_mn vec_uchar16_sp(0)
|
||||
#define vec_uchar16_z vec_uchar16_mn
|
||||
#define vec_uchar16_z vec_uchar16_sp(0)
|
||||
|
||||
typedef __vector signed char vec_char16;
|
||||
#define vec_char16_set(...) (vec_char16){__VA_ARGS__}
|
||||
#define vec_char16_sp(c) (__VSX_S16__(vec_char16, c))
|
||||
#define vec_char16_c(v) ((vec_char16)(v))
|
||||
#define vec_char16_mx vec_char16_sp(0x7F)
|
||||
#define vec_char16_mn vec_char16_sp(-0x7F-1)
|
||||
#define vec_char16_z vec_char16_sp(0)
|
||||
|
||||
typedef __vector unsigned short vec_ushort8;
|
||||
#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__}
|
||||
#define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, c))
|
||||
#define vec_ushort8_c(v) ((vec_ushort8)(v))
|
||||
#define vec_ushort8_mx vec_ushort8_sp(0xFFFF)
|
||||
#define vec_ushort8_mn vec_ushort8_sp(0)
|
||||
#define vec_ushort8_z vec_ushort8_mn
|
||||
#define vec_ushort8_z vec_ushort8_sp(0)
|
||||
|
||||
typedef __vector signed short vec_short8;
|
||||
#define vec_short8_set(...) (vec_short8){__VA_ARGS__}
|
||||
#define vec_short8_sp(c) (__VSX_S8__(vec_short8, c))
|
||||
#define vec_short8_c(v) ((vec_short8)(v))
|
||||
#define vec_short8_mx vec_short8_sp(0x7FFF)
|
||||
#define vec_short8_mn vec_short8_sp(-0x7FFF-1)
|
||||
#define vec_short8_z vec_short8_sp(0)
|
||||
|
||||
typedef __vector unsigned int vec_uint4;
|
||||
#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__}
|
||||
#define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, c))
|
||||
#define vec_uint4_c(v) ((vec_uint4)(v))
|
||||
#define vec_uint4_mx vec_uint4_sp(0xFFFFFFFFU)
|
||||
#define vec_uint4_mn vec_uint4_sp(0)
|
||||
#define vec_uint4_z vec_uint4_mn
|
||||
#define vec_uint4_z vec_uint4_sp(0)
|
||||
|
||||
typedef __vector signed int vec_int4;
|
||||
#define vec_int4_set(...) (vec_int4){__VA_ARGS__}
|
||||
#define vec_int4_sp(c) (__VSX_S4__(vec_int4, c))
|
||||
#define vec_int4_c(v) ((vec_int4)(v))
|
||||
#define vec_int4_mx vec_int4_sp(0x7FFFFFFF)
|
||||
#define vec_int4_mn vec_int4_sp(-0x7FFFFFFF-1)
|
||||
#define vec_int4_z vec_int4_sp(0)
|
||||
|
||||
typedef __vector float vec_float4;
|
||||
#define vec_float4_set(...) (vec_float4){__VA_ARGS__}
|
||||
#define vec_float4_sp(c) (__VSX_S4__(vec_float4, c))
|
||||
#define vec_float4_c(v) ((vec_float4)(v))
|
||||
#define vec_float4_mx vec_float4_sp(3.40282347E+38F)
|
||||
#define vec_float4_mn vec_float4_sp(1.17549435E-38F)
|
||||
#define vec_float4_z vec_float4_sp(0)
|
||||
|
||||
typedef __vector unsigned long long vec_udword2;
|
||||
#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__}
|
||||
#define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, c))
|
||||
#define vec_udword2_c(v) ((vec_udword2)(v))
|
||||
#define vec_udword2_mx vec_udword2_sp(18446744073709551615ULL)
|
||||
#define vec_udword2_mn vec_udword2_sp(0)
|
||||
#define vec_udword2_z vec_udword2_mn
|
||||
#define vec_udword2_z vec_udword2_sp(0)
|
||||
|
||||
typedef __vector signed long long vec_dword2;
|
||||
#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__}
|
||||
#define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, c))
|
||||
#define vec_dword2_c(v) ((vec_dword2)(v))
|
||||
#define vec_dword2_mx vec_dword2_sp(9223372036854775807LL)
|
||||
#define vec_dword2_mn vec_dword2_sp(-9223372036854775807LL-1)
|
||||
#define vec_dword2_z vec_dword2_sp(0)
|
||||
|
||||
typedef __vector double vec_double2;
|
||||
#define vec_double2_set(...) (vec_double2){__VA_ARGS__}
|
||||
#define vec_double2_c(v) ((vec_double2)(v))
|
||||
#define vec_double2_sp(c) (__VSX_S2__(vec_double2, c))
|
||||
#define vec_double2_mx vec_double2_sp(1.7976931348623157E+308)
|
||||
#define vec_double2_mn vec_double2_sp(2.2250738585072014E-308)
|
||||
#define vec_double2_z vec_double2_sp(0)
|
||||
|
||||
#define vec_bchar16 __vector __bool char
|
||||
#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__}
|
||||
#define vec_bchar16_c(v) ((vec_bchar16)(v))
|
||||
#define vec_bchar16_f (__VSX_S16__(vec_bchar16, 0))
|
||||
#define vec_bchar16_t (__VSX_S16__(vec_bchar16, 1))
|
||||
|
||||
#define vec_bshort8 __vector __bool short
|
||||
#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__}
|
||||
#define vec_bshort8_c(v) ((vec_bshort8)(v))
|
||||
#define vec_bshort8_f (__VSX_S8__(vec_bshort8, 0))
|
||||
#define vec_bshort8_t (__VSX_S8__(vec_bshort8, 1))
|
||||
|
||||
#define vec_bint4 __vector __bool int
|
||||
#define vec_bint4_set(...) (vec_bint4){__VA_ARGS__}
|
||||
#define vec_bint4_c(v) ((vec_bint4)(v))
|
||||
#define vec_bint4_f (__VSX_S4__(vec_bint4, 0))
|
||||
#define vec_bint4_t (__VSX_S4__(vec_bint4, 1))
|
||||
|
||||
#define vec_bdword2 __vector __bool long long
|
||||
#define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__}
|
||||
#define vec_bdword2_c(v) ((vec_bdword2)(v))
|
||||
#define vec_bdword2_f (__VSX_S2__(vec_bdword2, 0))
|
||||
#define vec_bdword2_t (__VSX_S2__(vec_bdword2, 1))
|
||||
|
||||
|
||||
#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline))
|
||||
|
||||
@@ -688,34 +619,17 @@ VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu)
|
||||
{ vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); }
|
||||
#endif
|
||||
|
||||
// load 4 unsigned bytes into uint4 vector
|
||||
#define vec_ld_buw(p) vec_uint4_set((p)[0], (p)[1], (p)[2], (p)[3])
|
||||
|
||||
// load 4 signed bytes into int4 vector
|
||||
#define vec_ld_bsw(p) vec_int4_set((p)[0], (p)[1], (p)[2], (p)[3])
|
||||
|
||||
// load 4 unsigned bytes into float vector
|
||||
#define vec_ld_bps(p) vec_ctf(vec_ld_buw(p), 0)
|
||||
|
||||
// Store lower 8 byte
|
||||
#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0)
|
||||
|
||||
// Store higher 8 byte
|
||||
#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1)
|
||||
|
||||
/*
|
||||
* vec_ld_l8(ptr) -> Load 64-bits of integer data to lower part
|
||||
* vec_ldz_l8(ptr) -> Load 64-bits of integer data to lower part and zero upper part
|
||||
**/
|
||||
#define VSX_IMPL_LOAD_L8(Tvec, Tp) \
|
||||
VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \
|
||||
{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } \
|
||||
VSX_FINLINE(Tvec) vec_ldz_l8(const Tp *p) \
|
||||
{ \
|
||||
/* TODO: try (Tvec)(vec_udword2{*((uint64*)p), 0}) */ \
|
||||
static const vec_bdword2 mask = {0xFFFFFFFFFFFFFFFF, 0x0000000000000000}; \
|
||||
return vec_and(vec_ld_l8(p), (Tvec)mask); \
|
||||
}
|
||||
// Load 64-bits of integer data to lower part
|
||||
#define VSX_IMPL_LOAD_L8(Tvec, Tp) \
|
||||
VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \
|
||||
{ return ((Tvec)vec_promote(*((uint64*)p), 0)); }
|
||||
|
||||
VSX_IMPL_LOAD_L8(vec_uchar16, uchar)
|
||||
VSX_IMPL_LOAD_L8(vec_char16, schar)
|
||||
VSX_IMPL_LOAD_L8(vec_ushort8, ushort)
|
||||
@@ -745,11 +659,11 @@ VSX_IMPL_LOAD_L8(vec_double2, double)
|
||||
* Implement vec_unpacklu and vec_unpackhu
|
||||
* since vec_unpackl, vec_unpackh only support signed integers
|
||||
**/
|
||||
#define VSX_IMPL_UNPACKU(rt, rg, zero) \
|
||||
VSX_FINLINE(rt) vec_unpacklu(const rg& a) \
|
||||
{ return reinterpret_cast<rt>(vec_mergel(a, zero)); } \
|
||||
VSX_FINLINE(rt) vec_unpackhu(const rg& a) \
|
||||
{ return reinterpret_cast<rt>(vec_mergeh(a, zero)); }
|
||||
#define VSX_IMPL_UNPACKU(rt, rg, zero) \
|
||||
VSX_FINLINE(rt) vec_unpacklu(const rg& a) \
|
||||
{ return (rt)(vec_mergel(a, zero)); } \
|
||||
VSX_FINLINE(rt) vec_unpackhu(const rg& a) \
|
||||
{ return (rt)(vec_mergeh(a, zero)); }
|
||||
|
||||
VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z)
|
||||
VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z)
|
||||
|
||||
Reference in New Issue
Block a user