Merge pull request #7317 from tomoaki0705:fixIfdefFp16
This commit is contained in:
@@ -310,7 +310,7 @@ enum CpuFeatures {
|
||||
typedef union Cv16suf
|
||||
{
|
||||
short i;
|
||||
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||
__fp16 h;
|
||||
#endif
|
||||
struct _fp16Format
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "opencl_kernels_core.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#undef CV_NEON
|
||||
@@ -4379,7 +4380,7 @@ struct Cvt_SIMD<float, int>
|
||||
|
||||
#endif
|
||||
|
||||
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
|
||||
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) )
|
||||
// const numbers for floating points format
|
||||
const unsigned int kShiftSignificand = 13;
|
||||
const unsigned int kMaskFp16Significand = 0x3ff;
|
||||
@@ -4387,7 +4388,7 @@ const unsigned int kBiasFp16Exponent = 15;
|
||||
const unsigned int kBiasFp32Exponent = 127;
|
||||
#endif
|
||||
|
||||
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||
static float convertFp16SW(short fp16)
|
||||
{
|
||||
// Fp16 -> Fp32
|
||||
@@ -4449,7 +4450,7 @@ static float convertFp16SW(short fp16)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||
static short convertFp16SW(float fp32)
|
||||
{
|
||||
// Fp32 -> Fp16
|
||||
@@ -4557,7 +4558,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
|
||||
if ( ( (intptr_t)dst & 0xf ) == 0 )
|
||||
#endif
|
||||
{
|
||||
#if CV_FP16
|
||||
#if CV_FP16 && CV_SIMD128
|
||||
for ( ; x <= size.width - 4; x += 4)
|
||||
{
|
||||
v_float32x4 v_src = v_load(src + x);
|
||||
@@ -4603,7 +4604,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
|
||||
if ( ( (intptr_t)src & 0xf ) == 0 )
|
||||
#endif
|
||||
{
|
||||
#if CV_FP16
|
||||
#if CV_FP16 && CV_SIMD128
|
||||
for ( ; x <= size.width - 4; x += 4)
|
||||
{
|
||||
v_float16x4 v_src = v_load_f16(src + x);
|
||||
|
||||
@@ -711,48 +711,56 @@ template<typename R> struct TheTest
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if CV_FP16
|
||||
TheTest & test_loadstore_fp16()
|
||||
{
|
||||
#if CV_FP16
|
||||
AlignedData<R> data;
|
||||
AlignedData<R> out;
|
||||
|
||||
// check if addresses are aligned and unaligned respectively
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
|
||||
if(checkHardwareSupport(CV_CPU_FP16))
|
||||
{
|
||||
// check if addresses are aligned and unaligned respectively
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
|
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
|
||||
|
||||
// check some initialization methods
|
||||
R r1 = data.u;
|
||||
R r2 = v_load_f16(data.a.d);
|
||||
R r3(r2);
|
||||
EXPECT_EQ(data.u[0], r1.get0());
|
||||
EXPECT_EQ(data.a[0], r2.get0());
|
||||
EXPECT_EQ(data.a[0], r3.get0());
|
||||
// check some initialization methods
|
||||
R r1 = data.u;
|
||||
R r2 = v_load_f16(data.a.d);
|
||||
R r3(r2);
|
||||
EXPECT_EQ(data.u[0], r1.get0());
|
||||
EXPECT_EQ(data.a[0], r2.get0());
|
||||
EXPECT_EQ(data.a[0], r3.get0());
|
||||
|
||||
// check some store methods
|
||||
out.a.clear();
|
||||
v_store_f16(out.a.d, r1);
|
||||
EXPECT_EQ(data.a, out.a);
|
||||
// check some store methods
|
||||
out.a.clear();
|
||||
v_store_f16(out.a.d, r1);
|
||||
EXPECT_EQ(data.a, out.a);
|
||||
}
|
||||
|
||||
return *this;
|
||||
#endif
|
||||
}
|
||||
|
||||
TheTest & test_float_cvt_fp16()
|
||||
{
|
||||
#if CV_FP16
|
||||
AlignedData<v_float32x4> data;
|
||||
|
||||
// check conversion
|
||||
v_float32x4 r1 = v_load(data.a.d);
|
||||
v_float16x4 r2 = v_cvt_f16(r1);
|
||||
v_float32x4 r3 = v_cvt_f32(r2);
|
||||
EXPECT_EQ(0x3c00, r2.get0());
|
||||
EXPECT_EQ(r3.get0(), r1.get0());
|
||||
if(checkHardwareSupport(CV_CPU_FP16))
|
||||
{
|
||||
// check conversion
|
||||
v_float32x4 r1 = v_load(data.a.d);
|
||||
v_float16x4 r2 = v_cvt_f16(r1);
|
||||
v_float32x4 r3 = v_cvt_f32(r2);
|
||||
EXPECT_EQ(0x3c00, r2.get0());
|
||||
EXPECT_EQ(r3.get0(), r1.get0());
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user