Merge pull request #7462 from alalek:cpu_multi_target
This commit is contained in:
@@ -0,0 +1,166 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#if defined __OPENCV_BUILD \
|
||||
|
||||
#include "cv_cpu_config.h"
|
||||
#include "cv_cpu_helper.h"
|
||||
|
||||
#if defined CV_ENABLE_INTRINSICS \
|
||||
&& !defined CV_DISABLE_OPTIMIZATION \
|
||||
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
|
||||
|
||||
#ifdef CV_CPU_COMPILE_SSE2
|
||||
# include <emmintrin.h>
|
||||
# define CV_MMX 1
|
||||
# define CV_SSE 1
|
||||
# define CV_SSE2 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_SSE3
|
||||
# include <pmmintrin.h>
|
||||
# define CV_SSE3 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_SSSE3
|
||||
# include <tmmintrin.h>
|
||||
# define CV_SSSE3 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_SSE4_1
|
||||
# include <smmintrin.h>
|
||||
# define CV_SSE4_1 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_SSE4_2
|
||||
# include <nmmintrin.h>
|
||||
# define CV_SSE4_2 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_POPCNT
|
||||
# ifdef _MSC_VER
|
||||
# include <nmmintrin.h>
|
||||
# if defined(_M_X64)
|
||||
# define CV_POPCNT_U64 _mm_popcnt_u64
|
||||
# endif
|
||||
# define CV_POPCNT_U32 _mm_popcnt_u32
|
||||
# else
|
||||
# include <popcntintrin.h>
|
||||
# if defined(__x86_64__)
|
||||
# define CV_POPCNT_U64 __builtin_popcountll
|
||||
# endif
|
||||
# define CV_POPCNT_U32 __builtin_popcount
|
||||
# endif
|
||||
# define CV_POPCNT 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX2
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX2 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_FMA3
|
||||
# define CV_FMA3 1
|
||||
#endif
|
||||
|
||||
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
|
||||
# include <Intrin.h>
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__aarch64__)
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||
|
||||
#endif // __OPENCV_BUILD
|
||||
|
||||
|
||||
|
||||
#if !defined __OPENCV_BUILD // Compatibility code
|
||||
|
||||
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
|
||||
# include <emmintrin.h>
|
||||
# define CV_MMX 1
|
||||
# define CV_SSE 1
|
||||
# define CV_SSE2 1
|
||||
#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM)
|
||||
# include <Intrin.h>
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
#endif
|
||||
|
||||
#endif // !__OPENCV_BUILD (Compatibility code)
|
||||
|
||||
|
||||
|
||||
#ifndef CV_MMX
|
||||
# define CV_MMX 0
|
||||
#endif
|
||||
#ifndef CV_SSE
|
||||
# define CV_SSE 0
|
||||
#endif
|
||||
#ifndef CV_SSE2
|
||||
# define CV_SSE2 0
|
||||
#endif
|
||||
#ifndef CV_SSE3
|
||||
# define CV_SSE3 0
|
||||
#endif
|
||||
#ifndef CV_SSSE3
|
||||
# define CV_SSSE3 0
|
||||
#endif
|
||||
#ifndef CV_SSE4_1
|
||||
# define CV_SSE4_1 0
|
||||
#endif
|
||||
#ifndef CV_SSE4_2
|
||||
# define CV_SSE4_2 0
|
||||
#endif
|
||||
#ifndef CV_POPCNT
|
||||
# define CV_POPCNT 0
|
||||
#endif
|
||||
#ifndef CV_AVX
|
||||
# define CV_AVX 0
|
||||
#endif
|
||||
#ifndef CV_AVX2
|
||||
# define CV_AVX2 0
|
||||
#endif
|
||||
#ifndef CV_FMA3
|
||||
# define CV_FMA3 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512F
|
||||
# define CV_AVX_512F 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512BW
|
||||
# define CV_AVX_512BW 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512CD
|
||||
# define CV_AVX_512CD 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512DQ
|
||||
# define CV_AVX_512DQ 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512ER
|
||||
# define CV_AVX_512ER 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512IFMA512
|
||||
# define CV_AVX_512IFMA512 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512PF
|
||||
# define CV_AVX_512PF 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VBMI
|
||||
# define CV_AVX_512VBMI 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VL
|
||||
# define CV_AVX_512VL 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_NEON
|
||||
# define CV_NEON 0
|
||||
#endif
|
||||
@@ -0,0 +1,133 @@
|
||||
// AUTOGENERATED, DO NOT EDIT
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 1
|
||||
# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
|
||||
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
|
||||
# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 0
|
||||
# define CV_CPU_CALL_SSE(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 1
|
||||
# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
|
||||
# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 0
|
||||
# define CV_CPU_CALL_SSE2(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 1
|
||||
# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
|
||||
# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 0
|
||||
# define CV_CPU_CALL_SSE3(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 1
|
||||
# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
|
||||
# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 0
|
||||
# define CV_CPU_CALL_SSSE3(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
|
||||
# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
|
||||
# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
|
||||
# define CV_CPU_CALL_SSE4_1(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
|
||||
# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
|
||||
# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
|
||||
# define CV_CPU_CALL_SSE4_2(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 1
|
||||
# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
|
||||
# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 0
|
||||
# define CV_CPU_CALL_POPCNT(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 1
|
||||
# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
|
||||
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
|
||||
# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 0
|
||||
# define CV_CPU_CALL_AVX(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 1
|
||||
# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
|
||||
# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 0
|
||||
# define CV_CPU_CALL_FP16(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 1
|
||||
# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
|
||||
# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 0
|
||||
# define CV_CPU_CALL_AVX2(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 1
|
||||
# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
|
||||
# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 0
|
||||
# define CV_CPU_CALL_FMA3(...)
|
||||
#endif
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 1
|
||||
# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
|
||||
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
|
||||
# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 0
|
||||
# define CV_CPU_CALL_NEON(...)
|
||||
#endif
|
||||
@@ -48,6 +48,10 @@
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
#include "cvconfig.h"
|
||||
#endif
|
||||
|
||||
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
|
||||
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
|
||||
#endif
|
||||
@@ -59,10 +63,6 @@
|
||||
#undef abs
|
||||
#undef Complex
|
||||
|
||||
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
|
||||
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include "opencv2/core/hal/interface.h"
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
|
||||
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
|
||||
# define CV_ENABLE_UNROLLED 0
|
||||
#else
|
||||
# define CV_ENABLE_UNROLLED 1
|
||||
@@ -161,150 +161,9 @@ enum CpuFeatures {
|
||||
CPU_NEON = 100
|
||||
};
|
||||
|
||||
// do not include SSE/AVX/NEON headers for NVCC compiler
|
||||
#ifndef __CUDACC__
|
||||
|
||||
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
|
||||
# include <emmintrin.h>
|
||||
# define CV_MMX 1
|
||||
# define CV_SSE 1
|
||||
# define CV_SSE2 1
|
||||
# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
||||
# include <pmmintrin.h>
|
||||
# define CV_SSE3 1
|
||||
# endif
|
||||
# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
||||
# include <tmmintrin.h>
|
||||
# define CV_SSSE3 1
|
||||
# endif
|
||||
# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
||||
# include <smmintrin.h>
|
||||
# define CV_SSE4_1 1
|
||||
# endif
|
||||
# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
||||
# include <nmmintrin.h>
|
||||
# define CV_SSE4_2 1
|
||||
# endif
|
||||
# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
|
||||
# ifdef _MSC_VER
|
||||
# include <nmmintrin.h>
|
||||
# if defined(_M_X64)
|
||||
# define CV_POPCNT_U64 _mm_popcnt_u64
|
||||
# endif
|
||||
# define CV_POPCNT_U32 _mm_popcnt_u32
|
||||
# else
|
||||
# include <popcntintrin.h>
|
||||
# if defined(__x86_64__)
|
||||
# define CV_POPCNT_U64 __builtin_popcountll
|
||||
# endif
|
||||
# define CV_POPCNT_U32 __builtin_popcount
|
||||
# endif
|
||||
# define CV_POPCNT 1
|
||||
# endif
|
||||
# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
|
||||
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
|
||||
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX 1
|
||||
# if defined(_XCR_XFEATURE_ENABLED_MASK)
|
||||
# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
|
||||
# else
|
||||
# define __xgetbv() 0
|
||||
# endif
|
||||
# endif
|
||||
# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX2 1
|
||||
# if defined __FMA__
|
||||
# define CV_FMA3 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#include "cv_cpu_dispatch.h"
|
||||
|
||||
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
|
||||
# include <Intrin.h>
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
# define CPU_HAS_NEON_FEATURE (true)
|
||||
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
|
||||
# define CV_VFP 1
|
||||
#endif
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
||||
#ifndef CV_POPCNT
|
||||
#define CV_POPCNT 0
|
||||
#endif
|
||||
#ifndef CV_MMX
|
||||
# define CV_MMX 0
|
||||
#endif
|
||||
#ifndef CV_SSE
|
||||
# define CV_SSE 0
|
||||
#endif
|
||||
#ifndef CV_SSE2
|
||||
# define CV_SSE2 0
|
||||
#endif
|
||||
#ifndef CV_SSE3
|
||||
# define CV_SSE3 0
|
||||
#endif
|
||||
#ifndef CV_SSSE3
|
||||
# define CV_SSSE3 0
|
||||
#endif
|
||||
#ifndef CV_SSE4_1
|
||||
# define CV_SSE4_1 0
|
||||
#endif
|
||||
#ifndef CV_SSE4_2
|
||||
# define CV_SSE4_2 0
|
||||
#endif
|
||||
#ifndef CV_AVX
|
||||
# define CV_AVX 0
|
||||
#endif
|
||||
#ifndef CV_AVX2
|
||||
# define CV_AVX2 0
|
||||
#endif
|
||||
#ifndef CV_FMA3
|
||||
# define CV_FMA3 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512F
|
||||
# define CV_AVX_512F 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512BW
|
||||
# define CV_AVX_512BW 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512CD
|
||||
# define CV_AVX_512CD 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512DQ
|
||||
# define CV_AVX_512DQ 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512ER
|
||||
# define CV_AVX_512ER 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512IFMA512
|
||||
# define CV_AVX_512IFMA512 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512PF
|
||||
# define CV_AVX_512PF 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VBMI
|
||||
# define CV_AVX_512VBMI 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VL
|
||||
# define CV_AVX_512VL 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_NEON
|
||||
# define CV_NEON 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_VFP
|
||||
# define CV_VFP 0
|
||||
#endif
|
||||
|
||||
/* fundamental constants */
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
|
||||
@@ -47,6 +47,12 @@
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
@@ -68,7 +74,7 @@
|
||||
# include "tegra_round.hpp"
|
||||
#endif
|
||||
|
||||
#if CV_VFP
|
||||
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
|
||||
// 1. general scheme
|
||||
#define ARM_ROUND(_value, _asm_string) \
|
||||
int res; \
|
||||
@@ -84,7 +90,7 @@
|
||||
#endif
|
||||
// 3. version for float
|
||||
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
||||
#endif // CV_VFP
|
||||
#endif
|
||||
|
||||
/** @brief Rounds floating-point number to the nearest integer
|
||||
|
||||
@@ -95,7 +101,7 @@ CV_INLINE int
|
||||
cvRound( double value )
|
||||
{
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
|
||||
__m128d t = _mm_set_sd( value );
|
||||
return _mm_cvtsd_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
@@ -110,7 +116,7 @@ cvRound( double value )
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
TEGRA_ROUND_DBL(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
# if CV_VFP
|
||||
# if defined ARM_ROUND_DBL
|
||||
ARM_ROUND_DBL(value);
|
||||
# else
|
||||
return (int)lrint(value);
|
||||
@@ -132,18 +138,8 @@ cvRound( double value )
|
||||
*/
|
||||
CV_INLINE int cvFloor( double value )
|
||||
{
|
||||
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
__m128d t = _mm_set_sd( value );
|
||||
int i = _mm_cvtsd_si32(t);
|
||||
return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
|
||||
#elif defined __GNUC__
|
||||
int i = (int)value;
|
||||
return i - (i > value);
|
||||
#else
|
||||
int i = cvRound(value);
|
||||
float diff = (float)(value - i);
|
||||
return i - (diff < 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
|
||||
@@ -155,18 +151,8 @@ CV_INLINE int cvFloor( double value )
|
||||
*/
|
||||
CV_INLINE int cvCeil( double value )
|
||||
{
|
||||
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
__m128d t = _mm_set_sd( value );
|
||||
int i = _mm_cvtsd_si32(t);
|
||||
return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
|
||||
#elif defined __GNUC__
|
||||
int i = (int)value;
|
||||
return i + (i < value);
|
||||
#else
|
||||
int i = cvRound(value);
|
||||
float diff = (float)(i - value);
|
||||
return i + (diff < 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Determines if the argument is Not A Number.
|
||||
@@ -202,8 +188,8 @@ CV_INLINE int cvIsInf( double value )
|
||||
/** @overload */
|
||||
CV_INLINE int cvRound(float value)
|
||||
{
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
|
||||
defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
|
||||
__m128 t = _mm_set_ss( value );
|
||||
return _mm_cvtss_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
@@ -218,7 +204,7 @@ CV_INLINE int cvRound(float value)
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
TEGRA_ROUND_FLT(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
# if CV_VFP
|
||||
# if defined ARM_ROUND_FLT
|
||||
ARM_ROUND_FLT(value);
|
||||
# else
|
||||
return (int)lrintf(value);
|
||||
@@ -239,18 +225,8 @@ CV_INLINE int cvRound( int value )
|
||||
/** @overload */
|
||||
CV_INLINE int cvFloor( float value )
|
||||
{
|
||||
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
__m128 t = _mm_set_ss( value );
|
||||
int i = _mm_cvtss_si32(t);
|
||||
return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
|
||||
#elif defined __GNUC__
|
||||
int i = (int)value;
|
||||
return i - (i > value);
|
||||
#else
|
||||
int i = cvRound(value);
|
||||
float diff = (float)(value - i);
|
||||
return i - (diff < 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
@@ -262,18 +238,8 @@ CV_INLINE int cvFloor( int value )
|
||||
/** @overload */
|
||||
CV_INLINE int cvCeil( float value )
|
||||
{
|
||||
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
__m128 t = _mm_set_ss( value );
|
||||
int i = _mm_cvtss_si32(t);
|
||||
return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
|
||||
#elif defined __GNUC__
|
||||
int i = (int)value;
|
||||
return i + (i < value);
|
||||
#else
|
||||
int i = cvRound(value);
|
||||
float diff = (float)(i - value);
|
||||
return i + (diff < 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
|
||||
+259
-40
@@ -237,24 +237,81 @@ void Exception::formatMessage()
|
||||
msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str());
|
||||
}
|
||||
|
||||
static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };
|
||||
|
||||
static const char* getHWFeatureName(int id)
|
||||
{
|
||||
return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL;
|
||||
}
|
||||
static const char* getHWFeatureNameSafe(int id)
|
||||
{
|
||||
const char* name = getHWFeatureName(id);
|
||||
return name ? name : "Unknown feature";
|
||||
}
|
||||
|
||||
struct HWFeatures
|
||||
{
|
||||
enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
|
||||
|
||||
HWFeatures(void)
|
||||
HWFeatures(bool run_initialize = false)
|
||||
{
|
||||
memset( have, 0, sizeof(have) );
|
||||
x86_family = 0;
|
||||
memset( have, 0, sizeof(have[0]) * MAX_FEATURE );
|
||||
if (run_initialize)
|
||||
initialize();
|
||||
}
|
||||
|
||||
static HWFeatures initialize(void)
|
||||
static void initializeNames()
|
||||
{
|
||||
HWFeatures f;
|
||||
for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
|
||||
{
|
||||
g_hwFeatureNames[i] = 0;
|
||||
}
|
||||
g_hwFeatureNames[CPU_MMX] = "MMX";
|
||||
g_hwFeatureNames[CPU_SSE] = "SSE";
|
||||
g_hwFeatureNames[CPU_SSE2] = "SSE2";
|
||||
g_hwFeatureNames[CPU_SSE3] = "SSE3";
|
||||
g_hwFeatureNames[CPU_SSSE3] = "SSSE3";
|
||||
g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1";
|
||||
g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2";
|
||||
g_hwFeatureNames[CPU_POPCNT] = "POPCNT";
|
||||
g_hwFeatureNames[CPU_FP16] = "FP16";
|
||||
g_hwFeatureNames[CPU_AVX] = "AVX";
|
||||
g_hwFeatureNames[CPU_AVX2] = "AVX2";
|
||||
g_hwFeatureNames[CPU_FMA3] = "FMA3";
|
||||
|
||||
g_hwFeatureNames[CPU_AVX_512F] = "AVX512F";
|
||||
g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW";
|
||||
g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD";
|
||||
g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ";
|
||||
g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER";
|
||||
g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA";
|
||||
g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF";
|
||||
g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI";
|
||||
g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
|
||||
|
||||
g_hwFeatureNames[CPU_NEON] = "NEON";
|
||||
}
|
||||
|
||||
void initialize(void)
|
||||
{
|
||||
#ifndef WINRT
|
||||
if (getenv("OPENCV_DUMP_CONFIG"))
|
||||
{
|
||||
fprintf(stderr, "\nOpenCV build configuration is:\n%s\n",
|
||||
cv::getBuildInformation().c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
initializeNames();
|
||||
|
||||
int cpuid_data[4] = { 0, 0, 0, 0 };
|
||||
int cpuid_data_ex[4] = { 0, 0, 0, 0 };
|
||||
|
||||
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
|
||||
#define OPENCV_HAVE_X86_CPUID 1
|
||||
__cpuid(cpuid_data, 1);
|
||||
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
|
||||
#define OPENCV_HAVE_X86_CPUID 1
|
||||
#ifdef __x86_64__
|
||||
asm __volatile__
|
||||
(
|
||||
@@ -278,33 +335,36 @@ struct HWFeatures
|
||||
#endif
|
||||
#endif
|
||||
|
||||
f.x86_family = (cpuid_data[0] >> 8) & 15;
|
||||
if( f.x86_family >= 6 )
|
||||
#ifdef OPENCV_HAVE_X86_CPUID
|
||||
int x86_family = (cpuid_data[0] >> 8) & 15;
|
||||
if( x86_family >= 6 )
|
||||
{
|
||||
f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
|
||||
f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
|
||||
f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
|
||||
f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
|
||||
f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
|
||||
f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
|
||||
f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
|
||||
f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
|
||||
f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
|
||||
f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
|
||||
f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
|
||||
have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0;
|
||||
have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
|
||||
have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
|
||||
have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
|
||||
have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
|
||||
have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
|
||||
have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
|
||||
have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
|
||||
have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
|
||||
have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0;
|
||||
have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0;
|
||||
|
||||
// make the second call to the cpuid command in order to get
|
||||
// information about extended features like AVX2
|
||||
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
|
||||
__cpuidex(cpuid_data, 7, 0);
|
||||
#define OPENCV_HAVE_X86_CPUID_EX 1
|
||||
__cpuidex(cpuid_data_ex, 7, 0);
|
||||
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
|
||||
#define OPENCV_HAVE_X86_CPUID_EX 1
|
||||
#ifdef __x86_64__
|
||||
asm __volatile__
|
||||
(
|
||||
"movl $7, %%eax\n\t"
|
||||
"movl $0, %%ecx\n\t"
|
||||
"cpuid\n\t"
|
||||
:[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
|
||||
:[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3])
|
||||
:
|
||||
: "cc"
|
||||
);
|
||||
@@ -317,29 +377,76 @@ struct HWFeatures
|
||||
"cpuid\n\t"
|
||||
"movl %%ebx, %0\n\t"
|
||||
"popl %%ebx\n\t"
|
||||
: "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
|
||||
: "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2])
|
||||
:
|
||||
: "cc"
|
||||
);
|
||||
#endif
|
||||
#endif
|
||||
f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
|
||||
|
||||
f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
|
||||
f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
|
||||
f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
|
||||
f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
|
||||
f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
|
||||
f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
|
||||
f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
|
||||
f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
|
||||
f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
|
||||
#ifdef OPENCV_HAVE_X86_CPUID_EX
|
||||
have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0;
|
||||
|
||||
have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0;
|
||||
have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0;
|
||||
have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0;
|
||||
have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0;
|
||||
have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0;
|
||||
have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0;
|
||||
have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0;
|
||||
have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0;
|
||||
have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0;
|
||||
#else
|
||||
CV_UNUSED(cpuid_data_ex);
|
||||
#endif
|
||||
|
||||
bool have_AVX_OS_support = true;
|
||||
bool have_AVX512_OS_support = true;
|
||||
if (!(cpuid_data[2] & (1<<27)))
|
||||
have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX
|
||||
else
|
||||
{
|
||||
int xcr0 = 0;
|
||||
#ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h
|
||||
xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
|
||||
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
|
||||
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
|
||||
#endif
|
||||
if ((xcr0 & 0x6) != 0x6)
|
||||
have_AVX_OS_support = false; // YMM registers
|
||||
if ((xcr0 & 0xe6) != 0xe6)
|
||||
have_AVX512_OS_support = false; // ZMM registers
|
||||
}
|
||||
|
||||
if (!have_AVX_OS_support)
|
||||
{
|
||||
have[CV_CPU_AVX] = false;
|
||||
have[CV_CPU_FP16] = false;
|
||||
have[CV_CPU_AVX2] = false;
|
||||
have[CV_CPU_FMA3] = false;
|
||||
}
|
||||
if (!have_AVX_OS_support || !have_AVX512_OS_support)
|
||||
{
|
||||
have[CV_CPU_AVX_512F] = false;
|
||||
have[CV_CPU_AVX_512BW] = false;
|
||||
have[CV_CPU_AVX_512CD] = false;
|
||||
have[CV_CPU_AVX_512DQ] = false;
|
||||
have[CV_CPU_AVX_512ER] = false;
|
||||
have[CV_CPU_AVX_512IFMA512] = false;
|
||||
have[CV_CPU_AVX_512PF] = false;
|
||||
have[CV_CPU_AVX_512VBMI] = false;
|
||||
have[CV_CPU_AVX_512VL] = false;
|
||||
}
|
||||
}
|
||||
#else
|
||||
CV_UNUSED(cpuid_data);
|
||||
CV_UNUSED(cpuid_data_ex);
|
||||
#endif // OPENCV_HAVE_X86_CPUID
|
||||
|
||||
#if defined ANDROID || defined __linux__
|
||||
#ifdef __aarch64__
|
||||
f.have[CV_CPU_NEON] = true;
|
||||
f.have[CV_CPU_FP16] = true;
|
||||
have[CV_CPU_NEON] = true;
|
||||
have[CV_CPU_FP16] = true;
|
||||
#elif defined __arm__
|
||||
int cpufile = open("/proc/self/auxv", O_RDONLY);
|
||||
|
||||
@@ -352,8 +459,8 @@ struct HWFeatures
|
||||
{
|
||||
if (auxv.a_type == AT_HWCAP)
|
||||
{
|
||||
f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
|
||||
f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
|
||||
have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
|
||||
have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -363,21 +470,133 @@ struct HWFeatures
|
||||
#endif
|
||||
#elif (defined __clang__ || defined __APPLE__)
|
||||
#if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
|
||||
f.have[CV_CPU_NEON] = true;
|
||||
have[CV_CPU_NEON] = true;
|
||||
#endif
|
||||
#if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
|
||||
f.have[CV_CPU_FP16] = true;
|
||||
have[CV_CPU_FP16] = true;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return f;
|
||||
int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
|
||||
if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
|
||||
{
|
||||
fprintf(stderr, "\n"
|
||||
"******************************************************************\n"
|
||||
"* FATAL ERROR: *\n"
|
||||
"* This OpenCV build doesn't support current CPU/HW configuration *\n"
|
||||
"* *\n"
|
||||
"* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n"
|
||||
"******************************************************************\n");
|
||||
fprintf(stderr, "\nRequired baseline features:\n");
|
||||
checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true);
|
||||
CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup.");
|
||||
}
|
||||
|
||||
readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]));
|
||||
}
|
||||
|
||||
bool checkFeatures(const int* features, int count, bool dump = false)
|
||||
{
|
||||
bool result = true;
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
int feature = features[i];
|
||||
if (feature)
|
||||
{
|
||||
if (have[feature])
|
||||
{
|
||||
if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature));
|
||||
}
|
||||
else
|
||||
{
|
||||
result = false;
|
||||
if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline bool isSymbolSeparator(char c)
|
||||
{
|
||||
return c == ',' || c == ';' || c == '-';
|
||||
}
|
||||
|
||||
void readSettings(const int* baseline_features, int baseline_count)
|
||||
{
|
||||
bool dump = true;
|
||||
const char* disabled_features =
|
||||
#ifndef WINRT
|
||||
getenv("OPENCV_CPU_DISABLE");
|
||||
#else
|
||||
NULL;
|
||||
#endif
|
||||
if (disabled_features && disabled_features[0] != 0)
|
||||
{
|
||||
const char* start = disabled_features;
|
||||
for (;;)
|
||||
{
|
||||
while (start[0] != 0 && isSymbolSeparator(start[0]))
|
||||
{
|
||||
start++;
|
||||
}
|
||||
if (start[0] == 0)
|
||||
break;
|
||||
const char* end = start;
|
||||
while (end[0] != 0 && !isSymbolSeparator(end[0]))
|
||||
{
|
||||
end++;
|
||||
}
|
||||
if (end == start)
|
||||
continue;
|
||||
cv::String feature(start, end);
|
||||
start = end;
|
||||
|
||||
CV_Assert(feature.size() > 0);
|
||||
|
||||
bool found = false;
|
||||
for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
|
||||
{
|
||||
if (!g_hwFeatureNames[i]) continue;
|
||||
size_t len = strlen(g_hwFeatureNames[i]);
|
||||
if (len != feature.size()) continue;
|
||||
if (feature.compare(g_hwFeatureNames[i]) == 0)
|
||||
{
|
||||
bool isBaseline = false;
|
||||
for (int k = 0; k < baseline_count; k++)
|
||||
{
|
||||
if (baseline_features[k] == i)
|
||||
{
|
||||
isBaseline = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isBaseline)
|
||||
{
|
||||
if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i));
|
||||
}
|
||||
if (!have[i])
|
||||
{
|
||||
if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i));
|
||||
}
|
||||
have[i] = false;
|
||||
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int x86_family;
|
||||
bool have[MAX_FEATURE+1];
|
||||
};
|
||||
|
||||
static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
|
||||
static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false);
|
||||
static HWFeatures* currentFeatures = &featuresEnabled;
|
||||
|
||||
bool checkHardwareSupport(int feature)
|
||||
|
||||
Reference in New Issue
Block a user