Merge pull request #8900 from alalek:update_android_build

2017-06-23 10:58:52 +00:00
parent 259147e32c 3e3e2dd512
commit fa7e7e0ff9
24 changed files with 240 additions and 155 deletions
@@ -71,7 +71,11 @@
 #  define CV_AVX 1
 #endif
 #ifdef CV_CPU_COMPILE_FP16
-#  include <immintrin.h>
+#  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
+#    include <arm_neon.h>
+#  else
+#    include <immintrin.h>
+#  endif
 #  define CV_FP16 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX2
@@ -279,15 +279,27 @@ struct v_float64x2
 #endif

 #if CV_FP16
-// Workaround for old comiplers
+// Workaround for old compilers
 template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a)
 { return (int16x4_t)a; }
 template <typename T> static inline float16x4_t vreinterpret_f16_s16(T a)
 { return (float16x4_t)a; }
-template <typename T> static inline float16x4_t vld1_f16(const T* ptr)
-{ return vreinterpret_f16_s16(vld1_s16((const short*)ptr)); }
-template <typename T> static inline void vst1_f16(T* ptr, float16x4_t a)
-{ vst1_s16((short*)ptr, vreinterpret_s16_f16(a)); }
+template <typename T> static inline float16x4_t cv_vld1_f16(const T* ptr)
+{
+#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
+    return vreinterpret_f16_s16(vld1_s16((const short*)ptr));
+#else
+    return vld1_f16((const __fp16*)ptr);
+#endif
+}
+template <typename T> static inline void cv_vst1_f16(T* ptr, float16x4_t a)
+{
+#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
+    vst1_s16((short*)ptr, vreinterpret_s16_f16(a));
+#else
+    vst1_f16((__fp16*)ptr, a);
+#endif
+}

 struct v_float16x4
 {
@@ -299,7 +311,7 @@ struct v_float16x4
    v_float16x4(short v0, short v1, short v2, short v3)
    {
        short v[] = {v0, v1, v2, v3};
-        val = vld1_f16(v);
+        val = cv_vld1_f16(v);
    }
    short get0() const
    {
@@ -778,9 +790,9 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
 #if CV_FP16
 // Workaround for old comiplers
 inline v_float16x4 v_load_f16(const short* ptr)
-{ return v_float16x4(vld1_f16(ptr)); }
+{ return v_float16x4(cv_vld1_f16(ptr)); }
 inline void v_store_f16(short* ptr, v_float16x4& a)
-{ vst1_f16(ptr, a.val); }
+{ cv_vst1_f16(ptr, a.val); }
 #endif

 #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \