diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index bcb8a3e203..4f5c353980 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -314,6 +314,10 @@ if(MSVC) set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS") set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS") endif() + + if(AARCH64 AND NOT MSVC_VERSION LESS 1930) + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES") + endif() endif() if(PROJECT_NAME STREQUAL "OpenCV") diff --git a/cmake/checks/cpu_neon.cpp b/cmake/checks/cpu_neon.cpp index c309e85049..bb103ec366 100644 --- a/cmake/checks/cpu_neon.cpp +++ b/cmake/checks/cpu_neon.cpp @@ -1,6 +1,7 @@ #include #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) +# define _ARM64_DISTINCT_NEON_TYPES # include # include # define CV_NEON 1 diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index e17972a3fc..28cf813379 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { +#if CV_NEON_AARCH64 + int32x4_t c = vmull_high_s16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); +#endif // #if CV_NEON_AARCH64 return v_int16x8(vcombine_s16( vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16), - vshrn_n_s32( -#if CV_NEON_AARCH64 - vmull_high_s16(a.val, b.val) -#else // #if CV_NEON_AARCH64 - vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)) -#endif // #if CV_NEON_AARCH64 - , 16) + vshrn_n_s32(c, 16) )); } inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { +#if CV_NEON_AARCH64 + uint32x4_t c = vmull_high_u16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); +#endif // #if CV_NEON_AARCH64 return v_uint16x8(vcombine_u16( vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16), - vshrn_n_u32( -#if CV_NEON_AARCH64 - vmull_high_u16(a.val, b.val) -#else // #if CV_NEON_AARCH64 - vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)) -#endif // #if CV_NEON_AARCH64 - , 16) + vshrn_n_u32(c, 16) )); } @@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a) { float32x4_t a_ = a.val; int32x4_t result; +#if defined _MSC_VER + result = vcvtnq_s32_f32(a_); +#else __asm__ ("fcvtns %0.4s, %1.4s" : "=w"(result) : "w"(a_) : /* No clobbers */); +#endif return v_int32x4(result); } #else diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index ebafee59e0..d2231fe952 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -615,6 +615,9 @@ struct HWFeatures #if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800) have[CV_CPU_NEON] = true; #endif + #if defined _M_ARM64 + have[CV_CPU_NEON] = true; + #endif #ifdef __riscv_vector have[CV_CPU_RISCVV] = true; #endif