cmake: support multiple CPU targets

This commit is contained in:
Alexander Alekhin
2016-09-07 18:02:36 +03:00
parent 47ae5f14f5
commit e16227b53c
28 changed files with 1439 additions and 393 deletions
@@ -0,0 +1,166 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#if defined __OPENCV_BUILD \
#include "cv_cpu_config.h"
#include "cv_cpu_helper.h"
#if defined CV_ENABLE_INTRINSICS \
&& !defined CV_DISABLE_OPTIMIZATION \
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
#ifdef CV_CPU_COMPILE_SSE2
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
#endif
#ifdef CV_CPU_COMPILE_SSE3
# include <pmmintrin.h>
# define CV_SSE3 1
#endif
#ifdef CV_CPU_COMPILE_SSSE3
# include <tmmintrin.h>
# define CV_SSSE3 1
#endif
#ifdef CV_CPU_COMPILE_SSE4_1
# include <smmintrin.h>
# define CV_SSE4_1 1
#endif
#ifdef CV_CPU_COMPILE_SSE4_2
# include <nmmintrin.h>
# define CV_SSE4_2 1
#endif
#ifdef CV_CPU_COMPILE_POPCNT
# ifdef _MSC_VER
# include <nmmintrin.h>
# if defined(_M_X64)
# define CV_POPCNT_U64 _mm_popcnt_u64
# endif
# define CV_POPCNT_U32 _mm_popcnt_u32
# else
# include <popcntintrin.h>
# if defined(__x86_64__)
# define CV_POPCNT_U64 __builtin_popcountll
# endif
# define CV_POPCNT_U32 __builtin_popcount
# endif
# define CV_POPCNT 1
#endif
#ifdef CV_CPU_COMPILE_AVX
# include <immintrin.h>
# define CV_AVX 1
#endif
#ifdef CV_CPU_COMPILE_AVX2
# include <immintrin.h>
# define CV_AVX2 1
#endif
#ifdef CV_CPU_COMPILE_FMA3
# define CV_FMA3 1
#endif
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#if defined(__ARM_NEON__) || defined(__aarch64__)
# include <arm_neon.h>
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#endif // __OPENCV_BUILD
#if !defined __OPENCV_BUILD // Compatibility code
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#endif // !__OPENCV_BUILD (Compatibility code)
#ifndef CV_MMX
# define CV_MMX 0
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
#ifndef CV_SSE2
# define CV_SSE2 0
#endif
#ifndef CV_SSE3
# define CV_SSE3 0
#endif
#ifndef CV_SSSE3
# define CV_SSSE3 0
#endif
#ifndef CV_SSE4_1
# define CV_SSE4_1 0
#endif
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_POPCNT
# define CV_POPCNT 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif
#ifndef CV_AVX2
# define CV_AVX2 0
#endif
#ifndef CV_FMA3
# define CV_FMA3 0
#endif
#ifndef CV_AVX_512F
# define CV_AVX_512F 0
#endif
#ifndef CV_AVX_512BW
# define CV_AVX_512BW 0
#endif
#ifndef CV_AVX_512CD
# define CV_AVX_512CD 0
#endif
#ifndef CV_AVX_512DQ
# define CV_AVX_512DQ 0
#endif
#ifndef CV_AVX_512ER
# define CV_AVX_512ER 0
#endif
#ifndef CV_AVX_512IFMA512
# define CV_AVX_512IFMA512 0
#endif
#ifndef CV_AVX_512PF
# define CV_AVX_512PF 0
#endif
#ifndef CV_AVX_512VBMI
# define CV_AVX_512VBMI 0
#endif
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
#endif
@@ -0,0 +1,133 @@
// AUTOGENERATED, DO NOT EDIT
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE 1
# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE 0
# define CV_CPU_CALL_SSE(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 1
# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE2 0
# define CV_CPU_CALL_SSE2(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 1
# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE3 0
# define CV_CPU_CALL_SSE3(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 1
# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSSE3 0
# define CV_CPU_CALL_SSSE3(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
# define CV_CPU_CALL_SSE4_1(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
# define CV_CPU_CALL_SSE4_2(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT 1
# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_POPCNT 0
# define CV_CPU_CALL_POPCNT(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX 1
# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_AVX 0
# define CV_CPU_CALL_AVX(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 1
# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_FP16 0
# define CV_CPU_CALL_FP16(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 1
# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_AVX2 0
# define CV_CPU_CALL_AVX2(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 1
# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_FMA3 0
# define CV_CPU_CALL_FMA3(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON 1
# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_NEON 0
# define CV_CPU_CALL_NEON(...)
#endif
+6 -147
View File
@@ -48,6 +48,10 @@
//! @addtogroup core_utils
//! @{
#ifdef __OPENCV_BUILD
#include "cvconfig.h"
#endif
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif
@@ -59,10 +63,6 @@
#undef abs
#undef Complex
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif
#include <limits.h>
#include "opencv2/core/hal/interface.h"
@@ -88,7 +88,7 @@
# endif
#endif
#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
# define CV_ENABLE_UNROLLED 0
#else
# define CV_ENABLE_UNROLLED 1
@@ -161,150 +161,9 @@ enum CpuFeatures {
CPU_NEON = 100
};
// do not include SSE/AVX/NEON headers for NVCC compiler
#ifndef __CUDACC__
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <pmmintrin.h>
# define CV_SSE3 1
# endif
# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <tmmintrin.h>
# define CV_SSSE3 1
# endif
# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <smmintrin.h>
# define CV_SSE4_1 1
# endif
# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <nmmintrin.h>
# define CV_SSE4_2 1
# endif
# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
# ifdef _MSC_VER
# include <nmmintrin.h>
# if defined(_M_X64)
# define CV_POPCNT_U64 _mm_popcnt_u64
# endif
# define CV_POPCNT_U32 _mm_popcnt_u32
# else
# include <popcntintrin.h>
# if defined(__x86_64__)
# define CV_POPCNT_U64 __builtin_popcountll
# endif
# define CV_POPCNT_U32 __builtin_popcount
# endif
# define CV_POPCNT 1
# endif
# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
# include <immintrin.h>
# define CV_AVX 1
# if defined(_XCR_XFEATURE_ENABLED_MASK)
# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
# else
# define __xgetbv() 0
# endif
# endif
# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
# include <immintrin.h>
# define CV_AVX2 1
# if defined __FMA__
# define CV_FMA3 1
# endif
# endif
#endif
#include "cv_cpu_dispatch.h"
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
# define CPU_HAS_NEON_FEATURE (true)
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
# define CV_VFP 1
#endif
#endif // __CUDACC__
#ifndef CV_POPCNT
#define CV_POPCNT 0
#endif
#ifndef CV_MMX
# define CV_MMX 0
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
#ifndef CV_SSE2
# define CV_SSE2 0
#endif
#ifndef CV_SSE3
# define CV_SSE3 0
#endif
#ifndef CV_SSSE3
# define CV_SSSE3 0
#endif
#ifndef CV_SSE4_1
# define CV_SSE4_1 0
#endif
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif
#ifndef CV_AVX2
# define CV_AVX2 0
#endif
#ifndef CV_FMA3
# define CV_FMA3 0
#endif
#ifndef CV_AVX_512F
# define CV_AVX_512F 0
#endif
#ifndef CV_AVX_512BW
# define CV_AVX_512BW 0
#endif
#ifndef CV_AVX_512CD
# define CV_AVX_512CD 0
#endif
#ifndef CV_AVX_512DQ
# define CV_AVX_512DQ 0
#endif
#ifndef CV_AVX_512ER
# define CV_AVX_512ER 0
#endif
#ifndef CV_AVX_512IFMA512
# define CV_AVX_512IFMA512 0
#endif
#ifndef CV_AVX_512PF
# define CV_AVX_512PF 0
#endif
#ifndef CV_AVX_512VBMI
# define CV_AVX_512VBMI 0
#endif
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
#endif
#ifndef CV_VFP
# define CV_VFP 0
#endif
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
+13 -47
View File
@@ -47,6 +47,12 @@
#include "opencv2/core/cvdef.h"
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
#include <emmintrin.h>
#endif
//! @addtogroup core_utils
//! @{
@@ -66,7 +72,7 @@
# include "tegra_round.hpp"
#endif
#if CV_VFP
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
// 1. general scheme
#define ARM_ROUND(_value, _asm_string) \
int res; \
@@ -82,7 +88,7 @@
#endif
// 3. version for float
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
#endif // CV_VFP
#endif
/** @brief Rounds floating-point number to the nearest integer
@@ -93,7 +99,7 @@ CV_INLINE int
cvRound( double value )
{
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86
@@ -108,7 +114,7 @@ cvRound( double value )
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_DBL(value);
#elif defined CV_ICC || defined __GNUC__
# if CV_VFP
# if defined ARM_ROUND_DBL
ARM_ROUND_DBL(value);
# else
return (int)lrint(value);
@@ -130,18 +136,8 @@ cvRound( double value )
*/
CV_INLINE int cvFloor( double value )
{
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
int i = _mm_cvtsd_si32(t);
return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
#elif defined __GNUC__
int i = (int)value;
return i - (i > value);
#else
int i = cvRound(value);
float diff = (float)(value - i);
return i - (diff < 0);
#endif
}
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
@@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value )
*/
CV_INLINE int cvCeil( double value )
{
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
int i = _mm_cvtsd_si32(t);
return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
#elif defined __GNUC__
int i = (int)value;
return i + (i < value);
#else
int i = cvRound(value);
float diff = (float)(i - value);
return i + (diff < 0);
#endif
}
/** @brief Determines if the argument is Not A Number.
@@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value )
/** @overload */
CV_INLINE int cvRound(float value)
{
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86
@@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value)
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_FLT(value);
#elif defined CV_ICC || defined __GNUC__
# if CV_VFP
# if defined ARM_ROUND_FLT
ARM_ROUND_FLT(value);
# else
return (int)lrintf(value);
@@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value )
/** @overload */
CV_INLINE int cvFloor( float value )
{
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
int i = _mm_cvtss_si32(t);
return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
#elif defined __GNUC__
int i = (int)value;
return i - (i > value);
#else
int i = cvRound(value);
float diff = (float)(value - i);
return i - (diff < 0);
#endif
}
/** @overload */
@@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value )
/** @overload */
CV_INLINE int cvCeil( float value )
{
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
int i = _mm_cvtss_si32(t);
return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
#elif defined __GNUC__
int i = (int)value;
return i + (i < value);
#else
int i = cvRound(value);
float diff = (float)(i - value);
return i + (diff < 0);
#endif
}
/** @overload */