From e397434cb689723eeee4ab0dffe4e8cebd0a710b Mon Sep 17 00:00:00 2001 From: maver1 Date: Wed, 24 Oct 2018 15:02:53 +0300 Subject: [PATCH] Merge pull request #12877 from maver1:3.4 * Updated ICV packages and IPP integration * core(test): minMaxIdx IPP regression test * core(ipp): workaround minMaxIdx problem * core(ipp): workaround meanStdDev() CV_32FC3 buffer overrun * Returned semicolon after CV_INSTRUMENT_REGION_IPP() --- 3rdparty/ippicv/ippicv.cmake | 26 +- cmake/OpenCVFindIPP.cmake | 7 +- cmake/OpenCVFindIPPIW.cmake | 115 ++++---- cmake/templates/cvconfig.h.in | 1 + modules/core/include/opencv2/core/base.hpp | 6 +- modules/core/include/opencv2/core/private.hpp | 4 +- modules/core/src/channels.cpp | 4 +- modules/core/src/copy.cpp | 6 +- modules/core/src/mean.cpp | 5 + modules/core/src/merge.cpp | 2 +- modules/core/src/minmax.cpp | 6 + modules/core/src/split.cpp | 2 +- modules/core/src/system.cpp | 25 +- modules/core/test/test_arithm.cpp | 26 ++ modules/imgproc/src/filter.cpp | 2 +- modules/imgproc/src/resize.cpp | 2 +- modules/objdetect/src/haar.cpp | 259 ++++-------------- 17 files changed, 201 insertions(+), 297 deletions(-) diff --git a/3rdparty/ippicv/ippicv.cmake b/3rdparty/ippicv/ippicv.cmake index a54d8f11ae..ae8748c283 100644 --- a/3rdparty/ippicv/ippicv.cmake +++ b/3rdparty/ippicv/ippicv.cmake @@ -2,37 +2,37 @@ function(download_ippicv root_var) set(${root_var} "" PARENT_SCOPE) # Commit SHA in the opencv_3rdparty repo - set(IPPICV_COMMIT "bdb7bb85f34a8cb0d35e40a81f58da431aa1557a") + set(IPPICV_COMMIT "32e315a5b106a7b89dbed51c28f8120a48b368b4") # Define actual ICV versions if(APPLE) set(OPENCV_ICV_PLATFORM "macosx") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac") if(X86_64) - set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20180518.tgz") - set(OPENCV_ICV_HASH "3ae52b9be0fe73dd45bc5e9429cd3732") + set(OPENCV_ICV_NAME "ippicv_2019_mac_intel64_general_20180723.tgz") + set(OPENCV_ICV_HASH "fe6b2bb75ae0e3f19ad3ae1a31dfa4a2") else() - set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20180518.tgz") - set(OPENCV_ICV_HASH "698660b975b62bee3ef6c5af51e97544") + set(OPENCV_ICV_NAME "ippicv_2019_mac_ia32_general_20180723.tgz") + set(OPENCV_ICV_HASH "b5dfa78c87eb75c64470cbe5ec876f4f") endif() elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86")) set(OPENCV_ICV_PLATFORM "linux") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx") if(X86_64) - set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20180518.tgz") - set(OPENCV_ICV_HASH "b7cc351267db2d34b9efa1cd22ff0572") + set(OPENCV_ICV_NAME "ippicv_2019_lnx_intel64_general_20180723.tgz") + set(OPENCV_ICV_HASH "c0bd78adb4156bbf552c1dfe90599607") else() - set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20180518.tgz") - set(OPENCV_ICV_HASH "ea72de74dae3c604eb6348395366e78e") + set(OPENCV_ICV_NAME "ippicv_2019_lnx_ia32_general_20180723.tgz") + set(OPENCV_ICV_HASH "4f38432c30bfd6423164b7a24bbc98a0") endif() elseif(WIN32 AND NOT ARM) set(OPENCV_ICV_PLATFORM "windows") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win") if(X86_64) - set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20180518.zip") - set(OPENCV_ICV_HASH "915ff92958089ede8ea532d3c4fe7187") + set(OPENCV_ICV_NAME "ippicv_2019_win_intel64_20180723_general.zip") + set(OPENCV_ICV_HASH "1d222685246896fe089f88b8858e4b2f") else() - set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20180518.zip") - set(OPENCV_ICV_HASH "928168c2d99ab284047dfcfb7a821d91") + set(OPENCV_ICV_NAME "ippicv_2019_win_ia32_20180723_general.zip") + set(OPENCV_ICV_HASH "0157251a2eb9cd63a3ebc7eed0f3e59e") endif() else() return() diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake index 52c8d50638..403d0494b5 100644 --- a/cmake/OpenCVFindIPP.cmake +++ b/cmake/OpenCVFindIPP.cmake @@ -240,11 +240,12 @@ endif() if(NOT DEFINED IPPROOT) include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake") - download_ippicv(IPPROOT) - if(NOT IPPROOT) + download_ippicv(ICV_PACKAGE_ROOT) + if(NOT ICV_PACKAGE_ROOT) return() endif() - ocv_install_3rdparty_licenses(ippicv "${IPPROOT}/readme.htm" "${IPPROOT}/license/ippEULA.txt") + set(IPPROOT "${ICV_PACKAGE_ROOT}/icv") + ocv_install_3rdparty_licenses(ippicv "${IPPROOT}/readme.htm" "${ICV_PACKAGE_ROOT}/EULA.txt") endif() file(TO_CMAKE_PATH "${IPPROOT}" __IPPROOT) diff --git a/cmake/OpenCVFindIPPIW.cmake b/cmake/OpenCVFindIPPIW.cmake index 3b63aa1a0d..79a9fb21be 100644 --- a/cmake/OpenCVFindIPPIW.cmake +++ b/cmake/OpenCVFindIPPIW.cmake @@ -1,17 +1,19 @@ # -# The script to detect Intel(R) Integrated Performance Primitives Integration Wrappers (IPP IW) +# The script to detect Intel(R) Integrated Performance Primitives Integration Wrappers (IPP Integration Wrappers) # installation/package # # # On return this will define: # -# HAVE_IPP_IW - True if Intel IPP found -# IPP_IW_PATH - Root of Intel IPP IW directory -# IPP_IW_LIBRARIES - Intel IPP IW libraries -# IPP_IW_INCLUDES - Intel IPP IW include folder +# HAVE_IPP_IW - True if Intel IPP Integration Wrappers found +# HAVE_IPP_IW_LL - True if Intel IPP Integration Wrappers found with Low Level API header +# IPP_IW_PATH - Root of Intel IPP Integration Wrappers directory +# IPP_IW_LIBRARIES - Intel IPP Integration Wrappers libraries +# IPP_IW_INCLUDES - Intel IPP Integration Wrappers include folder # unset(HAVE_IPP_IW CACHE) +unset(HAVE_IPP_IW_LL CACHE) unset(IPP_IW_PATH) unset(IPP_IW_LIBRARIES) unset(IPP_IW_INCLUDES) @@ -29,13 +31,16 @@ macro(ippiw_debugmsg MESSAGE) endmacro() file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT) -# This function detects Intel IPP IW version by analyzing .h file +# This function detects Intel IPP Integration Wrappers version by analyzing .h file macro(ippiw_setup PATH BUILD) - set(FILE "${PATH}/include/iw/iw_ll.h") # check if Intel IPP IW is OpenCV specific - ippiw_debugmsg("Checking path: ${PATH}") + set(FILE "${PATH}/include/iw/iw_version.h") + if(${BUILD}) + ippiw_debugmsg("Checking sources: ${PATH}") + else() + ippiw_debugmsg("Checking binaries: ${PATH}") + endif() if(EXISTS "${FILE}") - set(FILE "${PATH}/include/iw/iw_version.h") - ippiw_debugmsg("vfile\tok") + ippiw_debugmsg("vfile\tfound") file(STRINGS "${FILE}" IW_VERSION_MAJOR REGEX "IW_VERSION_MAJOR") file(STRINGS "${FILE}" IW_VERSION_MINOR REGEX "IW_VERSION_MINOR") file(STRINGS "${FILE}" IW_VERSION_UPDATE REGEX "IW_VERSION_UPDATE") @@ -56,13 +61,13 @@ macro(ippiw_setup PATH BUILD) math(EXPR IW_MIN_COMPATIBLE_IPP_EXP "${IW_MIN_COMPATIBLE_IPP_MAJOR}*10000 + ${IW_MIN_COMPATIBLE_IPP_MINOR}*100 + ${IW_MIN_COMPATIBLE_IPP_UPDATE}") if((IPP_VERSION_EXP GREATER IW_MIN_COMPATIBLE_IPP_EXP) OR (IPP_VERSION_EXP EQUAL IW_MIN_COMPATIBLE_IPP_EXP)) - ippiw_debugmsg("version\tok") + ippiw_debugmsg("vcheck\tpassed") if(${BUILD}) # check sources if(EXISTS "${PATH}/src/iw_core.c") - ippiw_debugmsg("sources\tok") + ippiw_debugmsg("sources\tyes") set(IPP_IW_PATH "${PATH}") - message(STATUS "found Intel IPP IW sources: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}") + message(STATUS "found Intel IPP Integration Wrappers sources: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}") message(STATUS "at: ${IPP_IW_PATH}") set(IPP_IW_LIBRARY ippiw) @@ -72,7 +77,13 @@ macro(ippiw_setup PATH BUILD) add_subdirectory("${IPP_IW_PATH}/" ${OpenCV_BINARY_DIR}/3rdparty/ippiw) set(HAVE_IPP_IW 1) + set(FILE "${PATH}/include/iw/iw_ll.h") # check if Intel IPP Integration Wrappers is OpenCV specific + if(EXISTS "${FILE}") + set(HAVE_IPP_IW_LL 1) + endif() return() + else() + ippiw_debugmsg("sources\tno") endif() else() # check binaries @@ -82,9 +93,9 @@ macro(ippiw_setup PATH BUILD) set(FILE "${PATH}/lib/ia32/${CMAKE_STATIC_LIBRARY_PREFIX}ipp_iw${CMAKE_STATIC_LIBRARY_SUFFIX}") endif() if(EXISTS ${FILE}) - ippiw_debugmsg("binaries\tok (64=${IPP_X64})") + ippiw_debugmsg("binaries\tyes (64=${IPP_X64})") set(IPP_IW_PATH "${PATH}") - message(STATUS "found Intel IPP IW binaries: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}") + message(STATUS "found Intel IPP Integration Wrappers binaries: ${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE}") message(STATUS "at: ${IPP_IW_PATH}") add_library(ippiw STATIC IMPORTED) @@ -105,81 +116,77 @@ macro(ippiw_setup PATH BUILD) set(HAVE_IPP_IW 1) set(BUILD_IPP_IW 0) + set(FILE "${PATH}/include/iw/iw_ll.h") # check if Intel IPP Integration Wrappers is OpenCV specific + if(EXISTS "${FILE}") + set(HAVE_IPP_IW_LL 1) + endif() return() + else() + ippiw_debugmsg("binaries\tno") endif() endif() + else() + ippiw_debugmsg("vcheck\tfailed") endif() + else() + ippiw_debugmsg("vfile\tnot found") endif() set(HAVE_IPP_IW 0) + set(HAVE_IPP_IW_LL 0) endmacro() -# check os and architecture -if(APPLE) - set(IW_PACKAGE_SUBDIR "ippiw_mac") -elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86")) - set(IW_PACKAGE_SUBDIR "ippiw_lnx") -elseif(WIN32 AND NOT ARM) - set(IW_PACKAGE_SUBDIR "ippiw_win") -else() - message(SEND_ERROR "Improper system for Intel IPP Integrations Wrappers. This message shouldn't appear. Check Intel IPP configurations steps") - return() -endif() - # check build options first if(BUILD_IPP_IW) # custom path if(DEFINED IPPIWROOT) ippiw_setup("${IPPIWROOT}/" 1) - message(STATUS "Can't find Intel IPP IW sources at: ${IPPIWROOT}") + message(STATUS "Can't find Intel IPP Integration Wrappers sources at: ${IPPIWROOT}") endif() # local sources ippiw_setup("${OpenCV_SOURCE_DIR}/3rdparty/ippiw" 1) - set(IPPIW_ROOT "${IPPROOT}/../${IW_PACKAGE_SUBDIR}") + set(IPPIW_ROOT "${IPPROOT}/../iw") ocv_install_3rdparty_licenses(ippiw - "${IPPIW_ROOT}/EULA.txt" - "${IPPIW_ROOT}/redist.txt" - "${IPPIW_ROOT}/support.txt" - "${IPPIW_ROOT}/third-party-programs.txt") + "${IPPIW_ROOT}/../EULA.txt" + "${IPPIW_ROOT}/../support.txt" + "${IPPIW_ROOT}/../third-party-programs.txt") # Package sources - get_filename_component(__PATH "${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" ABSOLUTE) + get_filename_component(__PATH "${IPPROOT}/../iw/" ABSOLUTE) ippiw_setup("${__PATH}" 1) + + # take Intel IPP Integration Wrappers from ICV package + if(NOT HAVE_IPP_ICV) + message(STATUS "Cannot find Intel IPP Integration Wrappers. Checking \"Intel IPP for OpenCV\" package") + set(TEMP_ROOT 0) + include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake") + download_ippicv(TEMP_ROOT) + set(IPPIW_ROOT "${TEMP_ROOT}/iw/") + ocv_install_3rdparty_licenses(ippiw + "${IPPIW_ROOT}/../EULA.txt" + "${IPPIW_ROOT}/../support.txt" + "${IPPIW_ROOT}/../third-party-programs.txt") + + ippiw_setup("${IPPIW_ROOT}" 1) + endif() endif() # custom binaries if(DEFINED IPPIWROOT) ippiw_setup("${IPPIWROOT}/" 0) - message(STATUS "Can't find Intel IPP IW sources at: ${IPPIWROOT}") + message(STATUS "Can't find Intel IPP Integration Wrappers binaries at: ${IPPIWROOT}") endif() # check binaries in IPP folder ippiw_setup("${IPPROOT}/" 0) # check binaries near IPP folder -ippiw_setup("${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" 0) - - -# take Intel IPP IW from ICV package -if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW) - message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package") - set(TEMP_ROOT 0) - include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake") - download_ippicv(TEMP_ROOT) - set(IPPIW_ROOT "${TEMP_ROOT}/../${IW_PACKAGE_SUBDIR}") - ocv_install_3rdparty_licenses(ippiw - "${IPPIW_ROOT}/EULA.txt" - "${IPPIW_ROOT}/redist.txt" - "${IPPIW_ROOT}/support.txt" - "${IPPIW_ROOT}/third-party-programs.txt") - - # Package sources. Only sources are compatible with regular Intel IPP - ippiw_setup("${IPPIW_ROOT}" 1) -endif() +ippiw_setup("${IPPROOT}/../iw/" 0) set(HAVE_IPP_IW 0) +set(HAVE_IPP_IW_LL 0) message(STATUS "Cannot find Intel IPP Integration Wrappers, optimizations will be limited. Use IPPIWROOT to set custom location") return() diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 8ffac75862..0fcbeaa2fc 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -105,6 +105,7 @@ #cmakedefine HAVE_IPP #cmakedefine HAVE_IPP_ICV #cmakedefine HAVE_IPP_IW +#cmakedefine HAVE_IPP_IW_LL /* JPEG-2000 codec */ #cmakedefine HAVE_JASPER diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 98683a2023..a68dca7d56 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -767,9 +767,13 @@ CV_EXPORTS_W void setUseIPP(bool flag); CV_EXPORTS_W String getIppVersion(); // IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results -// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests. +// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests. +CV_EXPORTS_W bool useIPP_NotExact(); +CV_EXPORTS_W void setUseIPP_NotExact(bool flag); +#if OPENCV_ABI_COMPATIBILITY < 400 CV_EXPORTS_W bool useIPP_NE(); CV_EXPORTS_W void setUseIPP_NE(bool flag); +#endif } // ipp diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 869b34d8a7..26611f04ae 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -194,8 +194,8 @@ T* allocSingleton(size_t count = 1) { return static_cast(allocSingletonBuffe #define IPP_DISABLE_LAB_RGB 1 // breaks OCL accuracy tests #define IPP_DISABLE_RGB_XYZ 1 // big accuracy difference #define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference -#define IPP_DISABLE_HAAR 1 // improper integration/results #define IPP_DISABLE_HOUGH 1 // improper integration/results +#define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7 #define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3) @@ -229,7 +229,9 @@ T* allocSingleton(size_t count = 1) { return static_cast(allocSingletonBuffe # pragma GCC diagnostic ignored "-Wsuggest-override" # endif #include "iw++/iw.hpp" +# ifdef HAVE_IPP_IW_LL #include "iw/iw_ll.h" +# endif # if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 # pragma GCC diagnostic pop # endif diff --git a/modules/core/src/channels.cpp b/modules/core/src/channels.cpp index 4e464d910c..87d38a5dd5 100644 --- a/modules/core/src/channels.cpp +++ b/modules/core/src/channels.cpp @@ -341,7 +341,7 @@ namespace cv { static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel) { -#ifdef HAVE_IPP_IW +#ifdef HAVE_IPP_IW_LL CV_INSTRUMENT_REGION_IPP(); int srcChannels = src.channels(); @@ -379,7 +379,7 @@ static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel) static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel) { -#ifdef HAVE_IPP_IW +#ifdef HAVE_IPP_IW_LL CV_INSTRUMENT_REGION_IPP(); int srcChannels = src.channels(); diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 7fb850ee38..fc4f363c7b 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -328,7 +328,7 @@ void Mat::copyTo( OutputArray _dst ) const #ifdef HAVE_IPP static bool ipp_copyTo(const Mat &src, Mat &dst, const Mat &mask) { -#ifdef HAVE_IPP_IW +#ifdef HAVE_IPP_IW_LL CV_INSTRUMENT_REGION_IPP(); if(mask.channels() > 1 || mask.depth() != CV_8U) @@ -463,7 +463,7 @@ Mat& Mat::operator = (const Scalar& s) #ifdef HAVE_IPP static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask) { -#ifdef HAVE_IPP_IW +#ifdef HAVE_IPP_IW_LL CV_INSTRUMENT_REGION_IPP(); if(mask.empty()) @@ -1152,7 +1152,7 @@ namespace cv { static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom, int left, int right, int _borderType, const Scalar& value ) { -#if defined HAVE_IPP_IW && !IPP_DISABLE_PERF_COPYMAKE +#if defined HAVE_IPP_IW_LL && !IPP_DISABLE_PERF_COPYMAKE CV_INSTRUMENT_REGION_IPP(); ::ipp::IwiBorderSize borderSize(left, top, right, bottom); diff --git a/modules/core/src/mean.cpp b/modules/core/src/mean.cpp index 8542381d49..b488ee230a 100644 --- a/modules/core/src/mean.cpp +++ b/modules/core/src/mean.cpp @@ -674,6 +674,11 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m if (cn > 1) return false; #endif +#if IPP_VERSION_X100 < 201901 + // IPP_DISABLE: 32f C3C functions can read outside of allocated memory + if (cn > 1 && src.depth() == CV_32F) + return false; +#endif size_t total_size = src.total(); int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; diff --git a/modules/core/src/merge.cpp b/modules/core/src/merge.cpp index c701fd4658..b460d212d1 100644 --- a/modules/core/src/merge.cpp +++ b/modules/core/src/merge.cpp @@ -228,7 +228,7 @@ static MergeFunc getMergeFunc(int depth) namespace cv { static bool ipp_merge(const Mat* mv, Mat& dst, int channels) { -#ifdef HAVE_IPP_IW +#ifdef HAVE_IPP_IW_LL CV_INSTRUMENT_REGION_IPP(); if(channels != 3 && channels != 4) diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 7c5b318398..daad21038d 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -8,6 +8,8 @@ #include "opencv2/core/openvx/ovx_defs.hpp" #include "stat.hpp" +#define IPP_DISABLE_MINMAXIDX_MANY_ROWS 1 // see Core_MinMaxIdx.rows_overflow test + /****************************************************************************************\ * minMaxLoc * \****************************************************************************************/ @@ -624,6 +626,10 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI if(src.dims <= 2) { IppiSize size = ippiSize(src.size()); +#if defined(_WIN32) && !defined(_WIN64) && IPP_VERSION_X100 == 201900 && IPP_DISABLE_MINMAXIDX_MANY_ROWS + if (size.height > 65536) + return false; // test: Core_MinMaxIdx.rows_overflow +#endif size.width *= src.channels(); status = ippMinMaxFun(src.ptr(), (int)src.step, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, (Ipp8u*)mask.ptr(), (int)mask.step); diff --git a/modules/core/src/split.cpp b/modules/core/src/split.cpp index 5b56fa0c5a..ac1b7217bb 100644 --- a/modules/core/src/split.cpp +++ b/modules/core/src/split.cpp @@ -236,7 +236,7 @@ static SplitFunc getSplitFunc(int depth) namespace cv { static bool ipp_split(const Mat& src, Mat* mv, int channels) { -#ifdef HAVE_IPP_IW +#ifdef HAVE_IPP_IW_LL CV_INSTRUMENT_REGION_IPP(); if(channels != 3 && channels != 4) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index eb987f87a2..ad5a4e3cc0 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -2078,7 +2078,12 @@ public: cv::String env = pIppEnv; if(env.size()) { -#if IPP_VERSION_X100 >= 201703 +#if IPP_VERSION_X100 >= 201900 + const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C| + ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER| + ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI|ippCPUID_AVX512_4FMADDPS| + ippCPUID_AVX512_4VNNIW|ippCPUID_AVX512IFMA; +#elif IPP_VERSION_X100 >= 201703 const Ipp64u minorFeatures = ippCPUID_MOVBE|ippCPUID_AES|ippCPUID_CLMUL|ippCPUID_ABR|ippCPUID_RDRAND|ippCPUID_F16C| ippCPUID_ADCOX|ippCPUID_RDSEED|ippCPUID_PREFETCHW|ippCPUID_SHA|ippCPUID_MPX|ippCPUID_AVX512CD|ippCPUID_AVX512ER| ippCPUID_AVX512PF|ippCPUID_AVX512BW|ippCPUID_AVX512DQ|ippCPUID_AVX512VL|ippCPUID_AVX512VBMI; @@ -2279,7 +2284,7 @@ void setUseIPP(bool flag) #endif } -bool useIPP_NE() +bool useIPP_NotExact() { #ifdef HAVE_IPP CoreTLSData* data = getCoreTlsData().get(); @@ -2293,17 +2298,29 @@ bool useIPP_NE() #endif } -void setUseIPP_NE(bool flag) +void setUseIPP_NotExact(bool flag) { CoreTLSData* data = getCoreTlsData().get(); #ifdef HAVE_IPP - data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false; + data->useIPP_NE = flag; #else CV_UNUSED(flag); data->useIPP_NE = false; #endif } +#if OPENCV_ABI_COMPATIBILITY < 400 +bool useIPP_NE() +{ + return useIPP_NotExact(); +} + +void setUseIPP_NE(bool flag) +{ + setUseIPP_NotExact(flag); +} +#endif + } // namespace ipp } // namespace cv diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 256493c54b..0eb2318c76 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2298,4 +2298,30 @@ TEST(UMat_Core_DivideRules, type_32f) { testDivide(); } TEST(Core_DivideRules, type_64f) { testDivide(); } TEST(UMat_Core_DivideRules, type_64f) { testDivide(); } + +TEST(Core_MinMaxIdx, rows_overflow) +{ + const int N = 65536 + 1; + const int M = 1; + { + setRNGSeed(123); + Mat m(N, M, CV_32FC1); + randu(m, -100, 100); + double minVal = 0, maxVal = 0; + int minIdx[CV_MAX_DIM] = { 0 }, maxIdx[CV_MAX_DIM] = { 0 }; + cv::minMaxIdx(m, &minVal, &maxVal, minIdx, maxIdx); + + double minVal0 = 0, maxVal0 = 0; + int minIdx0[CV_MAX_DIM] = { 0 }, maxIdx0[CV_MAX_DIM] = { 0 }; + cv::ipp::setUseIPP(false); + cv::minMaxIdx(m, &minVal0, &maxVal0, minIdx0, maxIdx0); + cv::ipp::setUseIPP(true); + + EXPECT_FALSE(fabs(minVal0 - minVal) > 1e-6 || fabs(maxVal0 - maxVal) > 1e-6) << "NxM=" << N << "x" << M << + " min=" << minVal0 << " vs " << minVal << + " max=" << maxVal0 << " vs " << maxVal; + } +} + + }} // namespace diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 5fd3f3eb95..da2370e5a7 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -4585,7 +4585,7 @@ static bool ippFilter2D(int stype, int dtype, int kernel_type, return false; #endif -#if IPP_VERSION_X100 < 201801 +#if IPP_DISABLE_FILTER2D_BIG_MASK // Too big difference compared to OpenCV FFT-based convolution if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7)) return false; diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 7eeefc7098..123ad4fd72 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3321,7 +3321,7 @@ static bool ipp_resize(const uchar * src_data, size_t src_step, int src_width, i return false; // Resize which doesn't match OpenCV exactly - if (!cv::ipp::useIPP_NE()) + if (!cv::ipp::useIPP_NotExact()) { if (ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear)) return false; diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp index 588ea2b46f..9e20111f43 100644 --- a/modules/objdetect/src/haar.cpp +++ b/modules/objdetect/src/haar.cpp @@ -94,7 +94,6 @@ typedef struct CvHidHaarClassifierCascade sqsumtype *pq0, *pq1, *pq2, *pq3; sumtype *p0, *p1, *p2, *p3; - void** ipp_stages; bool is_tree; bool isStumpBased; } CvHidHaarClassifierCascade; @@ -128,23 +127,6 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade ) { if( _cascade && *_cascade ) { -#ifdef HAVE_IPP - CvHidHaarClassifierCascade* cascade = *_cascade; - if( CV_IPP_CHECK_COND && cascade->ipp_stages ) - { - int i; - for( i = 0; i < cascade->count; i++ ) - { - if( cascade->ipp_stages[i] ) -#if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR - ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] ); -#else - cvFree(&cascade->ipp_stages[i]); -#endif - } - } - cvFree( &cascade->ipp_stages ); -#endif cvFree( _cascade ); } } @@ -153,10 +135,6 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade ) static CvHidHaarClassifierCascade* icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade ) { - CvRect* ipp_features = 0; - float *ipp_weights = 0, *ipp_thresholds = 0, *ipp_val1 = 0, *ipp_val2 = 0; - int* ipp_counts = 0; - CvHidHaarClassifierCascade* out = 0; int i, j, k, l; @@ -312,72 +290,9 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade ) } } -#if defined HAVE_IPP && !IPP_DISABLE_HAAR - int can_use_ipp = CV_IPP_CHECK_COND && (!out->has_tilted_features && !out->is_tree && out->isStumpBased); - - if( can_use_ipp ) - { - int ipp_datasize = cascade->count*sizeof(out->ipp_stages[0]); - float ipp_weight_scale=(float)(1./((orig_window_size.width-icv_object_win_border*2)* - (orig_window_size.height-icv_object_win_border*2))); - - out->ipp_stages = (void**)cvAlloc( ipp_datasize ); - memset( out->ipp_stages, 0, ipp_datasize ); - - ipp_features = (CvRect*)cvAlloc( max_count*3*sizeof(ipp_features[0]) ); - ipp_weights = (float*)cvAlloc( max_count*3*sizeof(ipp_weights[0]) ); - ipp_thresholds = (float*)cvAlloc( max_count*sizeof(ipp_thresholds[0]) ); - ipp_val1 = (float*)cvAlloc( max_count*sizeof(ipp_val1[0]) ); - ipp_val2 = (float*)cvAlloc( max_count*sizeof(ipp_val2[0]) ); - ipp_counts = (int*)cvAlloc( max_count*sizeof(ipp_counts[0]) ); - - for( i = 0; i < cascade->count; i++ ) - { - CvHaarStageClassifier* stage_classifier = cascade->stage_classifier + i; - for( j = 0, k = 0; j < stage_classifier->count; j++ ) - { - CvHaarClassifier* classifier = stage_classifier->classifier + j; - int rect_count = 2 + (classifier->haar_feature->rect[2].r.width != 0); - - ipp_thresholds[j] = classifier->threshold[0]; - ipp_val1[j] = classifier->alpha[0]; - ipp_val2[j] = classifier->alpha[1]; - ipp_counts[j] = rect_count; - - for( l = 0; l < rect_count; l++, k++ ) - { - ipp_features[k] = classifier->haar_feature->rect[l].r; - //ipp_features[k].y = orig_window_size.height - ipp_features[k].y - ipp_features[k].height; - ipp_weights[k] = classifier->haar_feature->rect[l].weight*ipp_weight_scale; - } - } - - if( ippiHaarClassifierInitAlloc_32f( (IppiHaarClassifier_32f**)&out->ipp_stages[i], - (const IppiRect*)ipp_features, ipp_weights, ipp_thresholds, - ipp_val1, ipp_val2, ipp_counts, stage_classifier->count ) < 0 ) - break; - } - - if( i < cascade->count ) - { - for( j = 0; j < i; j++ ) - if( out->ipp_stages[i] ) - ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)out->ipp_stages[i] ); - cvFree( &out->ipp_stages ); - } - } -#endif - cascade->hid_cascade = out; assert( (char*)haar_node_ptr - (char*)out <= datasize ); - cvFree( &ipp_features ); - cvFree( &ipp_weights ); - cvFree( &ipp_thresholds ); - cvFree( &ipp_val1 ); - cvFree( &ipp_val2 ); - cvFree( &ipp_counts ); - return out; } @@ -975,120 +890,54 @@ public: std::vector rejectLevelsLocal; std::vector levelWeightsLocal; -#ifdef HAVE_IPP - if(CV_IPP_CHECK_COND && cascade->hid_cascade->ipp_stages ) - { - IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height}; - CV_INSTRUMENT_FUN_IPP(ippiRectStdDev_32f_C1R, sum1.ptr(y1), (int)sum1.step, - sqsum1.ptr(y1), (int)sqsum1.step, - norm1->ptr(y1), (int)norm1->step, - ippiSize(ssz.width, ssz.height), iequRect); - - int positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep); - - if( ystep == 1 ) - (*mask1) = Scalar::all(1); - else - for( y = y1; y < y2; y++ ) - { - uchar* mask1row = mask1->ptr(y); - memset( mask1row, 0, ssz.width ); - - if( y % ystep == 0 ) - for( x = 0; x < ssz.width; x += ystep ) - mask1row[x] = (uchar)1; - } - - for( int j = 0; j < cascade->count; j++ ) + for( y = y1; y < y2; y += ystep ) + for( x = 0; x < ssz.width; x += ystep ) { - if (CV_INSTRUMENT_FUN_IPP(ippiApplyHaarClassifier_32f_C1R, - sum1.ptr(y1), (int)sum1.step, - norm1->ptr(y1), (int)norm1->step, - mask1->ptr(y1), (int)mask1->step, - ippiSize(ssz.width, ssz.height), &positive, - cascade->hid_cascade->stage_classifier[j].threshold, - (IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 ) - positive = 0; - if( positive <= 0 ) - break; + double gypWeight; + int result = cvRunHaarClassifierCascadeSum( cascade, cvPoint(x,y), gypWeight, 0 ); + if( rejectLevels ) + { + if( result == 1 ) + result = -1*cascade->count; + if( cascade->count + result < 4 ) + { + vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor), + winSize.width, winSize.height)); + rejectLevelsLocal.push_back(-result); + levelWeightsLocal.push_back(gypWeight); + + if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE) + { + mtx->lock(); + vec->insert(vec->end(), vecLocal.begin(), vecLocal.end()); + rejectLevels->insert(rejectLevels->end(), rejectLevelsLocal.begin(), rejectLevelsLocal.end()); + levelWeights->insert(levelWeights->end(), levelWeightsLocal.begin(), levelWeightsLocal.end()); + mtx->unlock(); + + vecLocal.clear(); + rejectLevelsLocal.clear(); + levelWeightsLocal.clear(); + } + } + } + else + { + if( result > 0 ) + { + vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor), + winSize.width, winSize.height)); + + if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE) + { + mtx->lock(); + vec->insert(vec->end(), vecLocal.begin(), vecLocal.end()); + mtx->unlock(); + + vecLocal.clear(); + } + } + } } - CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); - - if( positive > 0 ) - for( y = y1; y < y2; y += ystep ) - { - uchar* mask1row = mask1->ptr(y); - for( x = 0; x < ssz.width; x += ystep ) - if( mask1row[x] != 0 ) - { - vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor), - winSize.width, winSize.height)); - - if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE) - { - mtx->lock(); - vec->insert(vec->end(), vecLocal.begin(), vecLocal.end()); - mtx->unlock(); - - vecLocal.clear(); - } - if( --positive == 0 ) - break; - } - if( positive == 0 ) - break; - } - } - else -#endif // IPP - for( y = y1; y < y2; y += ystep ) - for( x = 0; x < ssz.width; x += ystep ) - { - double gypWeight; - int result = cvRunHaarClassifierCascadeSum( cascade, cvPoint(x,y), gypWeight, 0 ); - if( rejectLevels ) - { - if( result == 1 ) - result = -1*cascade->count; - if( cascade->count + result < 4 ) - { - vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor), - winSize.width, winSize.height)); - rejectLevelsLocal.push_back(-result); - levelWeightsLocal.push_back(gypWeight); - - if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE) - { - mtx->lock(); - vec->insert(vec->end(), vecLocal.begin(), vecLocal.end()); - rejectLevels->insert(rejectLevels->end(), rejectLevelsLocal.begin(), rejectLevelsLocal.end()); - levelWeights->insert(levelWeights->end(), levelWeightsLocal.begin(), levelWeightsLocal.end()); - mtx->unlock(); - - vecLocal.clear(); - rejectLevelsLocal.clear(); - levelWeightsLocal.clear(); - } - } - } - else - { - if( result > 0 ) - { - vecLocal.push_back(Rect(cvRound(x*factor), cvRound(y*factor), - winSize.width, winSize.height)); - - if (vecLocal.size() >= PARALLEL_LOOP_BATCH_SIZE) - { - mtx->lock(); - vec->insert(vec->end(), vecLocal.begin(), vecLocal.end()); - mtx->unlock(); - - vecLocal.clear(); - } - } - } - } if (rejectLevelsLocal.size()) { @@ -1283,12 +1132,6 @@ cvHaarDetectObjectsForROC( const CvArr* _img, if( flags & CV_HAAR_SCALE_IMAGE ) { CvSize winSize0 = cascade->orig_window_size; -#ifdef HAVE_IPP - int use_ipp = CV_IPP_CHECK_COND && (cascade->hid_cascade->ipp_stages != 0); - - if( use_ipp ) - normImg.reset(cvCreateMat( img->rows, img->cols, CV_32FC1)); -#endif imgSmall.reset(cvCreateMat( img->rows + 1, img->cols + 1, CV_8UC1 )); for( factor = 1; ; factor *= scaleFactor ) @@ -1330,15 +1173,7 @@ cvHaarDetectObjectsForROC( const CvArr* _img, int stripCount = ((sz1.width/ystep)*(sz1.height + ystep-1)/ystep + LOCS_PER_THREAD/2)/LOCS_PER_THREAD; stripCount = std::min(std::max(stripCount, 1), 100); -#ifdef HAVE_IPP - if( use_ipp ) - { - cv::Mat fsum(sum1.rows, sum1.cols, CV_32F, sum1.data.ptr, sum1.step); - cv::cvarrToMat(&sum1).convertTo(fsum, CV_32F, 1, -(1<<24)); - } - else -#endif - cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. ); + cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. ); cv::Mat _norm1 = cv::cvarrToMat(&norm1), _mask1 = cv::cvarrToMat(&mask1); cv::parallel_for_(cv::Range(0, stripCount),