diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index f82159ea3a..2a85123f48 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -4,6 +4,15 @@ # SSE4_1 / SSE4_2 / POPCNT # AVX / AVX2 / AVX_512F # FMA3 +# +# CPU features groups: +# AVX512_COMMON (Common instructions AVX-512F/CD for all CPUs that support AVX-512) +# AVX512_KNL (Knights Landing with AVX-512F/CD/ER/PF) +# AVX512_KNM (Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ) +# AVX512_SKX (Skylake-X with AVX-512F/CD/BW/DQ/VL) +# AVX512_CNL (Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI) +# AVX512_CEL (Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI) +# AVX512_ICL (Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ/VPCLMULQDQ*/GFNI*/VAES*) # ppc64le arch: # VSX (always available on Power8) @@ -33,7 +42,8 @@ # # CPU_{opt}_ENABLED_DEFAULT=ON/OFF - has compiler support without additional flag (CPU_BASELINE_DETECT=ON only) -set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_SKX") +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F") +list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL") list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3) list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) @@ -152,9 +162,15 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") endif() if(X86 OR X86_64) - ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_SKX") + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL") - ocv_update(CPU_AVX512_SKX_GROUP "AVX_512F;AVX_512CD;AVX_512BW;AVX_512DQ;AVX_512VL") + ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD") + ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA") + ocv_update(CPU_AVX512_KNM_GROUP "AVX512_KNL;AVX512_KNM_EXTRA;AVX_512VPOPCNTDQ") + ocv_update(CPU_AVX512_SKX_GROUP "AVX512_COMMON;AVX_512VL;AVX_512BW;AVX_512DQ") + ocv_update(CPU_AVX512_CNL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI") + ocv_update(CPU_AVX512_CEL_GROUP "AVX512_CNL;AVX_512VNNI") + ocv_update(CPU_AVX512_ICL_GROUP "AVX512_CEL;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") @@ -167,9 +183,22 @@ if(X86 OR X86_64) ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp") ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") ocv_update(CPU_AVX_512F_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp") + ocv_update(CPU_AVX512_COMMON_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512common.cpp") + ocv_update(CPU_AVX512_KNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knl.cpp") + ocv_update(CPU_AVX512_KNM_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knm.cpp") ocv_update(CPU_AVX512_SKX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512skx.cpp") + ocv_update(CPU_AVX512_CNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cnl.cpp") + ocv_update(CPU_AVX512_CEL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cel.cpp") + ocv_update(CPU_AVX512_ICL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512icl.cpp") if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE) + ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_CEL") + ocv_update(CPU_AVX512_CEL_IMPLIES "AVX512_CNL") + ocv_update(CPU_AVX512_CNL_IMPLIES "AVX512_SKX") + ocv_update(CPU_AVX512_SKX_IMPLIES "AVX512_COMMON") + ocv_update(CPU_AVX512_KNM_IMPLIES "AVX512_KNL") + ocv_update(CPU_AVX512_KNL_IMPLIES "AVX512_COMMON") + ocv_update(CPU_AVX512_COMMON_IMPLIES "AVX_512F") ocv_update(CPU_AVX_512F_IMPLIES "AVX2") ocv_update(CPU_AVX_512F_FORCE "") # Don't force other optimizations ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16") @@ -192,10 +221,10 @@ if(X86 OR X86_64) ocv_update(CPU_${name}_FLAGS_NAME "${name}") if(MSVC) set(enable_flags "${msvc_flags}") - set(flags_conflict "/arch:[^ ]+") + set(flags_conflict "/arch:[^ ]*|/Qx:[^ ]+") else() set(enable_flags "${unix_flags}") - set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+") + set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]*|-x[^ ]+") endif() ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}") if(flags_conflict) @@ -215,8 +244,14 @@ if(X86 OR X86_64) if(NOT X86_64) # x64 compiler doesn't support /arch:sse ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE") endif() - ocv_intel_compiler_optimization_option(AVX_512F "-march=common-avx512" "/arch:COMMON-AVX512") - ocv_intel_compiler_optimization_option(AVX512_SKX "-march=core-avx512" "/arch:CORE-AVX512") + ocv_intel_compiler_optimization_option(AVX_512F "-xCOMMON-AVX512" "/Qx:COMMON-AVX512") + ocv_intel_compiler_optimization_option(AVX512_COMMON "-xCOMMON-AVX512" "/Qx:COMMON-AVX512") + ocv_intel_compiler_optimization_option(AVX512_KNL "-xKNL" "/Qx:KNL") + ocv_intel_compiler_optimization_option(AVX512_KNM "-xKNM" "/Qx:KNM") + ocv_intel_compiler_optimization_option(AVX512_SKX "-xSKYLAKE-AVX512" "/Qx:SKYLAKE-AVX512") + ocv_intel_compiler_optimization_option(AVX512_CNL "-xCANNONLAKE" "/Qx:CANNONLAKE") + ocv_intel_compiler_optimization_option(AVX512_CEL "-xCASCADELAKE" "/Qx:CASCADELAKE") + ocv_intel_compiler_optimization_option(AVX512_ICL "-xICELAKE-CLIENT" "/Qx:ICELAKE-CLIENT") elseif(CV_GCC OR CV_CLANG) ocv_update(CPU_AVX2_FLAGS_ON "-mavx2") ocv_update(CPU_FP16_FLAGS_ON "-mf16c") @@ -230,12 +265,21 @@ if(X86 OR X86_64) ocv_update(CPU_SSE2_FLAGS_ON "-msse2") ocv_update(CPU_SSE_FLAGS_ON "-msse") if(NOT (CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")) # GCC >= 5.0 - # -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi ocv_update(CPU_AVX_512F_FLAGS_ON "-mavx512f") - ocv_update(CPU_AVX512_SKX_FLAGS_ON "-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq") + ocv_update(CPU_AVX_512CD_FLAGS_ON "-mavx512cd") + ocv_update(CPU_AVX512_KNL_EXTRA_FLAGS_ON "-mavx512er -mavx512pf") + ocv_update(CPU_AVX512_KNM_EXTRA_FLAGS_ON "-mavx5124fmaps -mavx5124vnniw") + ocv_update(CPU_AVX_512BW_FLAGS_ON "-mavx512bw") + ocv_update(CPU_AVX_512DQ_FLAGS_ON "-mavx512dq") + ocv_update(CPU_AVX_512VL_FLAGS_ON "-mavx512vl") + ocv_update(CPU_AVX_512IFMA_FLAGS_ON "-mavx512ifma") + ocv_update(CPU_AVX_512VBMI_FLAGS_ON "-mavx512vbmi") + ocv_update(CPU_AVX_512VNNI_FLAGS_ON "-mavx512vnni") + ocv_update(CPU_AVX_512VBMI2_FLAGS_ON "-mavx512vbmi2") + ocv_update(CPU_AVX_512BITALG_FLAGS_ON "-mavx512bitalg") + ocv_update(CPU_AVX_512VPOPCNTDQ_FLAGS_ON "-mavx512vpopcntdq") else() ocv_update(CPU_AVX_512F_SUPPORTED OFF) - ocv_update(CPU_AVX512_SKX_SUPPORTED OFF) endif() elseif(MSVC) ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2") @@ -408,6 +452,7 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) if(NOT DEFINED CPU_${OPT}_FORCE) set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}") endif() + #message("${OPT}: CPU_${OPT}_FLAGS_ON=${CPU_${OPT}_FLAGS_ON}") endforeach() if(_add_native_flag) diff --git a/cmake/checks/cpu_avx512cel.cpp b/cmake/checks/cpu_avx512cel.cpp new file mode 100644 index 0000000000..e372cf9a45 --- /dev/null +++ b/cmake/checks/cpu_avx512cel.cpp @@ -0,0 +1,11 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i a, b, c; + a = _mm512_dpwssd_epi32(a, b, c); +} +#else +#error "AVX512-CEL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512cnl.cpp b/cmake/checks/cpu_avx512cnl.cpp new file mode 100644 index 0000000000..480a312fe5 --- /dev/null +++ b/cmake/checks/cpu_avx512cnl.cpp @@ -0,0 +1,12 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i a, b, c; + a = _mm512_madd52hi_epu64(a, b, c); + a = _mm512_permutexvar_epi8(a, b); +} +#else +#error "AVX512-CNL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512common.cpp b/cmake/checks/cpu_avx512common.cpp new file mode 100644 index 0000000000..1754a95399 --- /dev/null +++ b/cmake/checks/cpu_avx512common.cpp @@ -0,0 +1,14 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i zmm = _mm512_setzero_si512(); + zmm = _mm512_lzcnt_epi32(zmm); +#if defined __GNUC__ && defined __x86_64__ + asm volatile ("" : : : "zmm16", "zmm17", "zmm18", "zmm19"); +#endif +} +#else +#error "AVX512-COMMON is not supported" +#endif +int main() { return 0; } diff --git a/cmake/checks/cpu_avx512icl.cpp b/cmake/checks/cpu_avx512icl.cpp new file mode 100644 index 0000000000..a67f5f35d4 --- /dev/null +++ b/cmake/checks/cpu_avx512icl.cpp @@ -0,0 +1,13 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i a, b, c; + a = _mm512_popcnt_epi8(a); + a = _mm512_shrdv_epi64(a, b, c); + a = _mm512_popcnt_epi64(a); +} +#else +#error "AVX512-ICL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512knl.cpp b/cmake/checks/cpu_avx512knl.cpp new file mode 100644 index 0000000000..f0eaa646e3 --- /dev/null +++ b/cmake/checks/cpu_avx512knl.cpp @@ -0,0 +1,16 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include + +void test() +{ + int* base; + __m512i idx; + __mmask16 m16; + __m512 f; + _mm512_mask_prefetch_i32gather_ps(idx, m16, base, 1, _MM_HINT_T1); + f = _mm512_rsqrt28_ps(f); +} +#else +#error "AVX512-KNL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512knm.cpp b/cmake/checks/cpu_avx512knm.cpp new file mode 100644 index 0000000000..18b2bf53e8 --- /dev/null +++ b/cmake/checks/cpu_avx512knm.cpp @@ -0,0 +1,17 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512 a, b, c, d, e; + __m512i ai, bi, ci, di, ei, fi; + __m128 *mem; + __m128i *memi; + __mmask16 m; + a = _mm512_4fnmadd_ps(a, b, c, d, e, mem); + ai = _mm512_4dpwssd_epi32(ai, bi, ci, di, ei, memi); + ai = _mm512_popcnt_epi64(ai); +} +#else +#error "AVX512-KNM is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 7f6d6b0fb9..483cc8f269 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -87,9 +87,41 @@ # include # define CV_AVX_512F 1 #endif +#ifdef CV_CPU_COMPILE_AVX512_COMMON +# define CV_AVX512_COMMON 1 +# define CV_AVX_512CD 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNL +# define CV_AVX512_KNL 1 +# define CV_AVX_512ER 1 +# define CV_AVX_512PF 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNM +# define CV_AVX512_KNM 1 +# define CV_AVX_5124FMAPS 1 +# define CV_AVX_5124VNNIW 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif #ifdef CV_CPU_COMPILE_AVX512_SKX -# include # define CV_AVX512_SKX 1 +# define CV_AVX_512VL 1 +# define CV_AVX_512BW 1 +# define CV_AVX_512DQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CNL +# define CV_AVX512_CNL 1 +# define CV_AVX_512IFMA 1 +# define CV_AVX_512VBMI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CEL +# define CV_AVX512_CEL 1 +# define CV_AVX_512VNNI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_ICL +# define CV_AVX512_ICL 1 +# define CV_AVX_512VBMI2 1 +# define CV_AVX_512BITALG 1 +# define CV_AVX_512VPOPCNTDQ 1 #endif #ifdef CV_CPU_COMPILE_FMA3 # define CV_FMA3 1 @@ -223,9 +255,10 @@ struct VZeroUpperGuard { #ifndef CV_AVX_512ER # define CV_AVX_512ER 0 #endif -#ifndef CV_AVX_512IFMA512 -# define CV_AVX_512IFMA512 0 +#ifndef CV_AVX_512IFMA +# define CV_AVX_512IFMA 0 #endif +#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated #ifndef CV_AVX_512PF # define CV_AVX_512PF 0 #endif @@ -235,9 +268,45 @@ struct VZeroUpperGuard { #ifndef CV_AVX_512VL # define CV_AVX_512VL 0 #endif +#ifndef CV_AVX_5124FMAPS +# define CV_AVX_5124FMAPS 0 +#endif +#ifndef CV_AVX_5124VNNIW +# define CV_AVX_5124VNNIW 0 +#endif +#ifndef CV_AVX_512VPOPCNTDQ +# define CV_AVX_512VPOPCNTDQ 0 +#endif +#ifndef CV_AVX_512VNNI +# define CV_AVX_512VNNI 0 +#endif +#ifndef CV_AVX_512VBMI2 +# define CV_AVX_512VBMI2 0 +#endif +#ifndef CV_AVX_512BITALG +# define CV_AVX_512BITALG 0 +#endif +#ifndef CV_AVX512_COMMON +# define CV_AVX512_COMMON 0 +#endif +#ifndef CV_AVX512_KNL +# define CV_AVX512_KNL 0 +#endif +#ifndef CV_AVX512_KNM +# define CV_AVX512_KNM 0 +#endif #ifndef CV_AVX512_SKX # define CV_AVX512_SKX 0 #endif +#ifndef CV_AVX512_CNL +# define CV_AVX512_CNL 0 +#endif +#ifndef CV_AVX512_CEL +# define CV_AVX512_CEL 0 +#endif +#ifndef CV_AVX512_ICL +# define CV_AVX512_ICL 0 +#endif #ifndef CV_NEON # define CV_NEON 0 diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index ad1339796d..90e0e9b9e3 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -252,6 +252,69 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 1 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON)) +# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +#else +# define CV_TRY_AVX512_COMMON 0 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1 +# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL)) +# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +#else +# define CV_TRY_AVX512_KNL 0 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0 +# define CV_CPU_CALL_AVX512_KNL(fn, args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1 +# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM)) +# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +#else +# define CV_TRY_AVX512_KNM 0 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0 +# define CV_CPU_CALL_AVX512_KNM(fn, args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX # define CV_TRY_AVX512_SKX 1 # define CV_CPU_FORCE_AVX512_SKX 1 @@ -273,6 +336,69 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1 +# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL)) +# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +#else +# define CV_TRY_AVX512_CNL 0 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0 +# define CV_CPU_CALL_AVX512_CNL(fn, args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CEL +# define CV_TRY_AVX512_CEL 1 +# define CV_CPU_FORCE_AVX512_CEL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CEL 1 +# define CV_CPU_CALL_AVX512_CEL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CEL_(fn, args) return (opt_AVX512_CEL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CEL +# define CV_TRY_AVX512_CEL 1 +# define CV_CPU_FORCE_AVX512_CEL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CEL (cv::checkHardwareSupport(CV_CPU_AVX512_CEL)) +# define CV_CPU_CALL_AVX512_CEL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args) +# define CV_CPU_CALL_AVX512_CEL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args) +#else +# define CV_TRY_AVX512_CEL 0 +# define CV_CPU_FORCE_AVX512_CEL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CEL 0 +# define CV_CPU_CALL_AVX512_CEL(fn, args) +# define CV_CPU_CALL_AVX512_CEL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CEL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CEL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1 +# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL)) +# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +#else +# define CV_TRY_AVX512_ICL 0 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0 +# define CV_CPU_CALL_AVX512_ICL(fn, args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON # define CV_TRY_NEON 1 # define CV_CPU_FORCE_NEON 1 diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index be7da7a4f6..9805d96100 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -249,6 +249,12 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard #define CV_CPU_AVX_512PF 19 #define CV_CPU_AVX_512VBMI 20 #define CV_CPU_AVX_512VL 21 +#define CV_CPU_AVX_512VBMI2 22 +#define CV_CPU_AVX_512VNNI 23 +#define CV_CPU_AVX_512BITALG 24 +#define CV_CPU_AVX_512VPOPCNTDQ 25 +#define CV_CPU_AVX_5124VNNIW 26 +#define CV_CPU_AVX_5124FMAPS 27 #define CV_CPU_NEON 100 @@ -257,6 +263,12 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard // CPU features groups #define CV_CPU_AVX512_SKX 256 +#define CV_CPU_AVX512_COMMON 257 +#define CV_CPU_AVX512_KNL 258 +#define CV_CPU_AVX512_KNM 259 +#define CV_CPU_AVX512_CNL 260 +#define CV_CPU_AVX512_CEL 261 +#define CV_CPU_AVX512_ICL 262 // when adding to this list remember to update the following enum #define CV_HARDWARE_MAX_FEATURE 512 @@ -287,6 +299,12 @@ enum CpuFeatures { CPU_AVX_512PF = 19, CPU_AVX_512VBMI = 20, CPU_AVX_512VL = 21, + CPU_AVX_512VBMI2 = 22, + CPU_AVX_512VNNI = 23, + CPU_AVX_512BITALG = 24, + CPU_AVX_512VPOPCNTDQ= 25, + CPU_AVX_5124VNNIW = 26, + CPU_AVX_5124FMAPS = 27, CPU_NEON = 100, @@ -294,6 +312,12 @@ enum CpuFeatures { CPU_VSX3 = 201, CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL + CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512 + CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF + CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ + CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI + CPU_AVX512_CEL = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI + CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE }; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index d0ea0e311e..6fa792d6bd 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -356,6 +356,12 @@ struct HWFeatures g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF"; g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI"; g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL"; + g_hwFeatureNames[CPU_AVX_512VBMI2] = "AVX512VBMI2"; + g_hwFeatureNames[CPU_AVX_512VNNI] = "AVX512VNNI"; + g_hwFeatureNames[CPU_AVX_512BITALG] = "AVX512BITALG"; + g_hwFeatureNames[CPU_AVX_512VPOPCNTDQ] = "AVX512VPOPCNTDQ"; + g_hwFeatureNames[CPU_AVX_5124VNNIW] = "AVX5124VNNIW"; + g_hwFeatureNames[CPU_AVX_5124FMAPS] = "AVX5124FMAPS"; g_hwFeatureNames[CPU_NEON] = "NEON"; @@ -363,6 +369,11 @@ struct HWFeatures g_hwFeatureNames[CPU_VSX3] = "VSX3"; g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX"; + g_hwFeatureNames[CPU_AVX512_KNL] = "AVX512-KNL"; + g_hwFeatureNames[CPU_AVX512_KNM] = "AVX512-KNM"; + g_hwFeatureNames[CPU_AVX512_CNL] = "AVX512-CNL"; + g_hwFeatureNames[CPU_AVX512_CEL] = "AVX512-CEL"; + g_hwFeatureNames[CPU_AVX512_ICL] = "AVX512-ICL"; } void initialize(void) @@ -404,15 +415,21 @@ struct HWFeatures have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0; - have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; - have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; - have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0; - have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; - have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; - have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; - have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; - have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; - have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; + have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; + have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; + have[CV_CPU_AVX_512IFMA] = (cpuid_data_ex[1] & (1<<21)) != 0; + have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; + have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; + have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; + have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; + have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; + have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; + have[CV_CPU_AVX_512VBMI2] = (cpuid_data_ex[2] & (1<<6)) != 0; + have[CV_CPU_AVX_512VNNI] = (cpuid_data_ex[2] & (1<<11)) != 0; + have[CV_CPU_AVX_512BITALG] = (cpuid_data_ex[2] & (1<<12)) != 0; + have[CV_CPU_AVX_512VPOPCNTDQ] = (cpuid_data_ex[2] & (1<<14)) != 0; + have[CV_CPU_AVX_5124VNNIW] = (cpuid_data_ex[3] & (1<<2)) != 0; + have[CV_CPU_AVX_5124FMAPS] = (cpuid_data_ex[3] & (1<<3)) != 0; bool have_AVX_OS_support = true; bool have_AVX512_OS_support = true; @@ -446,15 +463,38 @@ struct HWFeatures have[CV_CPU_AVX_512CD] = false; have[CV_CPU_AVX_512DQ] = false; have[CV_CPU_AVX_512ER] = false; - have[CV_CPU_AVX_512IFMA512] = false; + have[CV_CPU_AVX_512IFMA] = false; have[CV_CPU_AVX_512PF] = false; have[CV_CPU_AVX_512VBMI] = false; have[CV_CPU_AVX_512VL] = false; + have[CV_CPU_AVX_512VBMI2] = false; + have[CV_CPU_AVX_512VNNI] = false; + have[CV_CPU_AVX_512BITALG] = false; + have[CV_CPU_AVX_512VPOPCNTDQ] = false; + have[CV_CPU_AVX_5124VNNIW] = false; + have[CV_CPU_AVX_5124FMAPS] = false; } - if (have[CV_CPU_AVX_512F]) + have[CV_CPU_AVX512_COMMON] = have[CV_CPU_AVX_512F] && have[CV_CPU_AVX_512CD]; + if (have[CV_CPU_AVX512_COMMON]) { - have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512F] & have[CV_CPU_AVX_512CD] & have[CV_CPU_AVX_512BW] & have[CV_CPU_AVX_512DQ] & have[CV_CPU_AVX_512VL]; + have[CV_CPU_AVX512_KNL] = have[CV_CPU_AVX_512ER] && have[CV_CPU_AVX_512PF]; + have[CV_CPU_AVX512_KNM] = have[CV_CPU_AVX512_KNL] && have[CV_CPU_AVX_5124FMAPS] && + have[CV_CPU_AVX_5124VNNIW] && have[CV_CPU_AVX_512VPOPCNTDQ]; + have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512BW] && have[CV_CPU_AVX_512DQ] && have[CV_CPU_AVX_512VL]; + have[CV_CPU_AVX512_CNL] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI]; + have[CV_CPU_AVX512_CEL] = have[CV_CPU_AVX512_CNL] && have[CV_CPU_AVX_512VNNI]; + have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_CEL] && have[CV_CPU_AVX_512VBMI2] && + have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ]; + } + else + { + have[CV_CPU_AVX512_KNL] = false; + have[CV_CPU_AVX512_KNM] = false; + have[CV_CPU_AVX512_SKX] = false; + have[CV_CPU_AVX512_CNL] = false; + have[CV_CPU_AVX512_CEL] = false; + have[CV_CPU_AVX512_ICL] = false; } } #endif // CV_CPUID_X86 @@ -621,11 +661,14 @@ struct HWFeatures } if (isBaseline) { - if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i)); + if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'." + "This has very limited effect, because code optimizations for this feature are executed unconditionally " + "in the most cases.\n", getHWFeatureNameSafe(i)); } if (!have[i]) { - if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i)); + if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", + getHWFeatureNameSafe(i)); } have[i] = false;