diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 24f7fc69b8..4af6e3b443 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -208,8 +208,6 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); } #define IPP_DISABLE_HOUGH 1 // improper integration/results #define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7 -#define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3) - // Temporary disabled named IPP region. Performance #define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations #define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653) diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index a74c883cd3..3b45482481 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -12,3 +12,9 @@ ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_add_dispatched_file(undistort SSE2 AVX2) ocv_define_module(imgproc opencv_core WRAP java python js) + +ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR) +option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF) +if(OPENCV_IPP_GAUSSIAN_BLUR) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1") +endif() diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index 4ac7df8b4d..65122d20e2 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -470,9 +470,14 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, #endif -#if 0 //defined HAVE_IPP +#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option + +#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1 +#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1 +#define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1 + // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling -#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL +#if IPP_VERSION_X100 < 201900 #define IPP_GAUSSIANBLUR_PARALLEL 0 #else #define IPP_GAUSSIANBLUR_PARALLEL 1 @@ -555,6 +560,14 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, return false; const int threads = ippiSuggestThreadsNum(iwDst, 2); + + if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == 1 && ksize.width > 25)) + return false; + if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == 1 && src.type() == CV_16SC4)) + return false; + if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4)) + return false; + if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { bool ok; ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); @@ -655,8 +668,6 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, CV_OVX_RUN(true, openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) - //CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); - if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix())) { std::vector fkx, fky; @@ -681,6 +692,11 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, } } +#if defined ENABLE_IPP_GAUSSIAN_BLUR + // IPP is not bit-exact to OpenCV implementation + CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); +#endif + sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType); }