Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin
2020-02-10 19:40:29 +03:00
26 changed files with 1517 additions and 1313 deletions
@@ -326,6 +326,13 @@ enum CpuFeatures {
#include "cv_cpu_dispatch.h"
#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
// int*, int64* should be propertly aligned pointers on ARMv7
#define CV_STRONG_ALIGNMENT 1
#endif
#if !defined(CV_STRONG_ALIGNMENT)
#define CV_STRONG_ALIGNMENT 0
#endif
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
@@ -1458,16 +1458,23 @@ template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const
@return register object
@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
@note Alignment requirement:
if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough).
Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`).
*/
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
}
/** @brief Load register contents from memory (aligned)
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc)
*/
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr)
@@ -1488,6 +1495,9 @@ v_int32x4 r = v_load_low(lo);
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
for( int i = 0; i < c.nlanes/2; i++ )
{
@@ -1509,6 +1519,10 @@ v_int32x4 r = v_load_halves(lo, hi);
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(loptr));
CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
#endif
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
for( int i = 0; i < c.nlanes/2; i++ )
{
@@ -1531,6 +1545,9 @@ template<typename _Tp>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_TypeTraits<_Tp>::nlanes128 / 2>
v_load_expand(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
typedef typename V_TypeTraits<_Tp>::w_type w_type;
v_reg<w_type, V_TypeTraits<w_type>::nlanes128> c;
for( int i = 0; i < c.nlanes; i++ )
@@ -1552,6 +1569,9 @@ template<typename _Tp>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_TypeTraits<_Tp>::nlanes128 / 4>
v_load_expand_q(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
typedef typename V_TypeTraits<_Tp>::q_type q_type;
v_reg<q_type, V_TypeTraits<q_type>::nlanes128> c;
for( int i = 0; i < c.nlanes; i++ )
@@ -1572,6 +1592,9 @@ For all types except 64-bit. */
template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
v_reg<_Tp, n>& b)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i2;
for( i = i2 = 0; i < n; i++, i2 += 2 )
{
@@ -1591,6 +1614,9 @@ For all types except 64-bit. */
template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i3;
for( i = i3 = 0; i < n; i++, i3 += 3 )
{
@@ -1613,6 +1639,9 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
v_reg<_Tp, n>& d)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i4;
for( i = i4 = 0; i < n; i++, i4 += 4 )
{
@@ -1636,6 +1665,9 @@ inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i2;
for( i = i2 = 0; i < n; i++, i2 += 2 )
{
@@ -1657,6 +1689,9 @@ inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i3;
for( i = i3 = 0; i < n; i++, i3 += 3 )
{
@@ -1679,6 +1714,9 @@ template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_
const v_reg<_Tp, n>& d,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i4;
for( i = i4 = 0; i < n; i++, i4 += 4 )
{
@@ -1700,6 +1738,9 @@ Pointer can be unaligned. */
template<typename _Tp, int n>
inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
for( int i = 0; i < n; i++ )
ptr[i] = a.s[i];
}
@@ -1707,6 +1748,9 @@ inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
template<typename _Tp, int n>
inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
v_store(ptr, a);
}
@@ -1720,6 +1764,9 @@ Scheme:
template<typename _Tp, int n>
inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
for( int i = 0; i < (n/2); i++ )
ptr[i] = a.s[i];
}
@@ -1734,6 +1781,9 @@ Scheme:
template<typename _Tp, int n>
inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
for( int i = 0; i < (n/2); i++ )
ptr[i] = a.s[i+(n/2)];
}
@@ -449,7 +449,7 @@ Returned value is a string containing space separated list of CPU features with
Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?`
*/
CV_EXPORTS std::string getCPUFeaturesLine();
CV_EXPORTS_W std::string getCPUFeaturesLine();
/** @brief Returns the number of logical CPUs available for the process.
*/
@@ -0,0 +1,103 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_UTILS_BUFFER_AREA_HPP
#define OPENCV_UTILS_BUFFER_AREA_HPP
#include <opencv2/core/base.hpp>
#include <opencv2/core/private.hpp>
#include <opencv2/core/utility.hpp>
#include <vector>
namespace cv { namespace utils {
//! @addtogroup core_utils
//! @{
/** @brief Manages memory block shared by muliple buffers.
This class allows to allocate one large memory block and split it into several smaller
non-overlapping buffers. In safe mode each buffer allocation will be performed independently,
this mode allows dynamic memory access instrumentation using valgrind or memory sanitizer.
Safe mode can be explicitly switched ON in constructor. It will also be enabled when compiling with
memory sanitizer support or in runtime with the environment variable `OPENCV_BUFFER_AREA_ALWAYS_SAFE`.
Example of usage:
@code
int * buf1 = 0;
double * buf2 = 0;
cv::util::BufferArea area;
area.allocate(buf1, 200); // buf1 = new int[200];
area.allocate(buf2, 1000, 64); // buf2 = new double[1000]; - aligned by 64
area.commit();
@endcode
@note This class is considered private and should be used only in OpenCV itself. API can be changed.
*/
class CV_EXPORTS BufferArea
{
public:
/** @brief Class constructor.
@param safe Enable _safe_ operation mode, each allocation will be performed independently.
*/
BufferArea(bool safe = false);
/** @brief Class destructor
All allocated memory well be freed. Each bound pointer will be reset to NULL.
*/
~BufferArea();
/** @brief Bind a pointer to local area.
BufferArea will store reference to the pointer and allocation parameters effectively owning the
pointer and allocated memory. This operation has the same parameters and does the same job
as the operator `new`, except allocation can be performed later during the BufferArea::commit call.
@param ptr Reference to a pointer of type T. Must be NULL
@param count Count of objects to be allocated, it has the same meaning as in the operator `new`.
@param alignment Alignment of allocated memory. same meaning as in the operator `new` (C++17).
Must be divisible by sizeof(T). Must be power of two.
@note In safe mode allocation will be performed immediatly.
*/
template <typename T>
void allocate(T*&ptr, size_t count, ushort alignment = sizeof(T))
{
CV_Assert(ptr == NULL);
CV_Assert(count > 0);
CV_Assert(alignment > 0);
CV_Assert(alignment % sizeof(T) == 0);
CV_Assert((alignment & (alignment - 1)) == 0);
allocate_((void**)(&ptr), static_cast<ushort>(sizeof(T)), count, alignment);
}
/** @brief Allocate memory and initialize all bound pointers
Each pointer bound to the area with the BufferArea::allocate will be initialized and will be set
to point to a memory block with requested size and alignment.
@note Does nothing in safe mode as all allocations will be performed by BufferArea::allocate
*/
void commit();
private:
BufferArea(const BufferArea &); // = delete
BufferArea &operator=(const BufferArea &); // = delete
void allocate_(void **ptr, ushort type_size, size_t count, ushort alignment);
private:
class Block;
std::vector<Block> blocks;
void * oneBuf;
size_t totalSize;
const bool safe;
};
//! @}
}} // cv::utils::
#endif