From d354ad1c34c3b83427ca67bedc89b6c7b8784ce7 Mon Sep 17 00:00:00 2001
From: Tatsuro Shibamura <me@shibayan.jp>
Date: Sun, 27 Feb 2022 02:35:03 +0900
Subject: [PATCH] Merge pull request #21630 from shibayan:arm64-msvc-neon

* Added NEON support in builds for Windows on ARM

* Fixed `HAVE_CPU_NEON_SUPPORT` display broken during compiler test

* Fixed a build error prior to Visual Studio 2022
---
 cmake/OpenCVCompilerOptions.cmake             |  4 +++
 cmake/checks/cpu_neon.cpp                     |  1 +
 .../include/opencv2/core/hal/intrin_neon.hpp  | 30 ++++++++++---------
 modules/core/src/system.cpp                   |  3 ++
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index bcb8a3e203..4f5c353980 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -314,6 +314,10 @@ if(MSVC)
     set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS")
     set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS")
   endif()
+
+  if(AARCH64 AND NOT MSVC_VERSION LESS 1930)
+    set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES")
+  endif()
 endif()
 
 if(PROJECT_NAME STREQUAL "OpenCV")
diff --git a/cmake/checks/cpu_neon.cpp b/cmake/checks/cpu_neon.cpp
index c309e85049..bb103ec366 100644
--- a/cmake/checks/cpu_neon.cpp
+++ b/cmake/checks/cpu_neon.cpp
@@ -1,6 +1,7 @@
 #include <stdio.h>
 
 #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64))
+# define _ARM64_DISTINCT_NEON_TYPES
 # include <Intrin.h>
 # include <arm_neon.h>
 # define CV_NEON 1
diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
index e17972a3fc..28cf813379 100644
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
 
 inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
 {
+#if CV_NEON_AARCH64
+    int32x4_t c = vmull_high_s16(a.val, b.val);
+#else // #if CV_NEON_AARCH64
+    int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+#endif // #if CV_NEON_AARCH64
     return v_int16x8(vcombine_s16(
                                   vshrn_n_s32(vmull_s16( vget_low_s16(a.val),  vget_low_s16(b.val)), 16),
-                                  vshrn_n_s32(
-#if CV_NEON_AARCH64
-                                    vmull_high_s16(a.val, b.val)
-#else // #if CV_NEON_AARCH64
-                                    vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val))
-#endif // #if CV_NEON_AARCH64
-                                    , 16)
+                                  vshrn_n_s32(c, 16)
                                  ));
 }
 inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
 {
+#if CV_NEON_AARCH64
+    uint32x4_t c = vmull_high_u16(a.val, b.val);
+#else // #if CV_NEON_AARCH64
+    uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
+#endif // #if CV_NEON_AARCH64
     return v_uint16x8(vcombine_u16(
                                    vshrn_n_u32(vmull_u16( vget_low_u16(a.val),  vget_low_u16(b.val)), 16),
-                                   vshrn_n_u32(
-#if CV_NEON_AARCH64
-                                    vmull_high_u16(a.val, b.val)
-#else // #if CV_NEON_AARCH64
-                                    vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val))
-#endif // #if CV_NEON_AARCH64
-                                    , 16)
+                                   vshrn_n_u32(c, 16)
                                   ));
 }
 
@@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a)
 {
     float32x4_t a_ = a.val;
     int32x4_t result;
+#if defined _MSC_VER
+    result = vcvtnq_s32_f32(a_);
+#else
     __asm__ ("fcvtns %0.4s, %1.4s"
              : "=w"(result)
              : "w"(a_)
              : /* No clobbers */);
+#endif
     return v_int32x4(result);
 }
 #else
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index ebafee59e0..d2231fe952 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -615,6 +615,9 @@ struct HWFeatures
     #if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800)
         have[CV_CPU_NEON] = true;
     #endif
+    #if defined _M_ARM64
+        have[CV_CPU_NEON] = true;
+    #endif
     #ifdef __riscv_vector
         have[CV_CPU_RISCVV] = true;
     #endif