From 11c2ffaf1c4eef3508dafc2bf94c1473d26acb1a Mon Sep 17 00:00:00 2001 From: Pavel Vlasov Date: Fri, 21 Apr 2017 14:52:45 +0300 Subject: [PATCH] Update for IPP for OpenCV 2017u2 integration; Updated integrations for: cv::split cv::merge cv::insertChannel cv::extractChannel cv::Mat::convertTo - now with scaled conversions support cv::LUT - disabled due to performance issues Mat::copyTo Mat::setTo cv::flip cv::copyMakeBorder - currently disabled cv::polarToCart cv::pow - ipp pow function was removed due to performance issues cv::hal::magnitude32f/64f - disabled for <= SSE42, poor performance cv::countNonZero cv::minMaxIdx cv::norm cv::canny - new integration. Disabled for threaded; cv::cornerHarris cv::boxFilter cv::bilateralFilter cv::integral --- 3rdparty/ippicv/ippicv.cmake | 2 +- modules/core/include/opencv2/core/private.hpp | 2 - modules/core/src/convert.cpp | 333 +++++++-- modules/core/src/copy.cpp | 426 ++++------- modules/core/src/mathfuncs.cpp | 109 +-- modules/core/src/mathfuncs_core.dispatch.cpp | 4 +- modules/core/src/matmul.cpp | 87 +-- modules/core/src/stat.cpp | 704 ++++++++++-------- modules/imgproc/src/canny.cpp | 156 ++-- modules/imgproc/src/corner.cpp | 34 +- modules/imgproc/src/filter.cpp | 14 +- modules/imgproc/src/hough.cpp | 16 +- modules/imgproc/src/moments.cpp | 303 ++++---- modules/imgproc/src/pyramids.cpp | 12 +- modules/imgproc/src/smooth.cpp | 318 ++++---- modules/imgproc/src/sumpixels.cpp | 82 +- .../imgproc/test/test_bilateral_filter.cpp | 9 +- modules/imgproc/test/test_houghLines.cpp | 2 +- modules/objdetect/src/cascadedetect.cpp | 10 + modules/objdetect/src/cascadedetect.hpp | 8 + modules/objdetect/src/haar.cpp | 6 +- 21 files changed, 1392 insertions(+), 1245 deletions(-) diff --git a/3rdparty/ippicv/ippicv.cmake b/3rdparty/ippicv/ippicv.cmake index 7cec77c4ee..d601da4bb8 100644 --- a/3rdparty/ippicv/ippicv.cmake +++ b/3rdparty/ippicv/ippicv.cmake @@ -21,7 +21,7 @@ function(download_ippicv root_var) set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz") set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a") else() - set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170406.tgz") + set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz") set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e") endif() elseif(WIN32 AND NOT ARM) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 50b5de2ac8..dbe8bb631f 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -217,8 +217,6 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #define IPP_DISABLE_PERF_MAG_SSE42 1 // cv::magnitude optimizations problem #define IPP_DISABLE_PERF_BOX16S_SSE42 1 // cv::boxFilter optimizations problem -#define IPP_DISABLE_BLOCK 0 // legacy switch - #ifdef HAVE_IPP #include "ippversion.h" #ifndef IPP_VERSION_UPDATE // prior to 7.1 diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index dd3fa641b1..f431033c09 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -85,6 +85,66 @@ static MergeFunc getMergeFunc(int depth) return mergeTab[depth]; } +#ifdef HAVE_IPP +#ifdef HAVE_IPP_IW +extern "C" { +IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep, + IppiSize size, int typeSize, int channels); +} +#endif + +namespace cv { +static bool ipp_split(const Mat& src, Mat* mv, int channels) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + if(channels != 3 && channels != 4) + return false; + + if(src.dims <= 2) + { + IppiSize size = ippiSize(src.size()); + void *dstPtrs[4] = {NULL}; + size_t dstStep = mv[0].step; + for(int i = 0; i < channels; i++) + { + dstPtrs[i] = mv[i].ptr(); + if(dstStep != mv[i].step) + return false; + } + + return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0; + } + else + { + const Mat *arrays[5] = {NULL}; + uchar *ptrs[5] = {NULL}; + arrays[0] = &src; + + for(int i = 1; i < channels; i++) + { + arrays[i] = &mv[i-1]; + } + + NAryMatIterator it(arrays, ptrs); + IppiSize size = { (int)it.size, 1 }; + + for( size_t i = 0; i < it.nplanes; i++, ++it ) + { + if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0) + return false; + } + return true; + } +#else + CV_UNUSED(src); CV_UNUSED(mv); CV_UNUSED(channels); + return false; +#endif +} +} +#endif + void cv::split(const Mat& src, Mat* mv) { CV_INSTRUMENT_REGION() @@ -96,6 +156,13 @@ void cv::split(const Mat& src, Mat* mv) return; } + for( k = 0; k < cn; k++ ) + { + mv[k].create(src.dims, src.size, depth); + } + + CV_IPP_RUN_FAST(ipp_split(src, mv, cn)); + SplitFunc func = getSplitFunc(depth); CV_Assert( func != 0 ); @@ -108,7 +175,6 @@ void cv::split(const Mat& src, Mat* mv) arrays[0] = &src; for( k = 0; k < cn; k++ ) { - mv[k].create(src.dims, src.size, depth); arrays[k+1] = &mv[k]; } @@ -206,6 +272,66 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv) split(m, &dst[0]); } +#ifdef HAVE_IPP +#ifdef HAVE_IPP_IW +extern "C" { +IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep, + IppiSize size, int typeSize, int channels); +} +#endif + +namespace cv { +static bool ipp_merge(const Mat* mv, Mat& dst, int channels) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + if(channels != 3 && channels != 4) + return false; + + if(mv[0].dims <= 2) + { + IppiSize size = ippiSize(mv[0].size()); + const void *srcPtrs[4] = {NULL}; + size_t srcStep = mv[0].step; + for(int i = 0; i < channels; i++) + { + srcPtrs[i] = mv[i].ptr(); + if(srcStep != mv[i].step) + return false; + } + + return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0; + } + else + { + const Mat *arrays[5] = {NULL}; + uchar *ptrs[5] = {NULL}; + arrays[0] = &dst; + + for(int i = 1; i < channels; i++) + { + arrays[i] = &mv[i-1]; + } + + NAryMatIterator it(arrays, ptrs); + IppiSize size = { (int)it.size, 1 }; + + for( size_t i = 0; i < it.nplanes; i++, ++it ) + { + if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0) + return false; + } + return true; + } +#else + CV_UNUSED(dst); CV_UNUSED(mv); CV_UNUSED(channels); + return false; +#endif +} +} +#endif + void cv::merge(const Mat* mv, size_t n, OutputArray _dst) { CV_INSTRUMENT_REGION() @@ -234,6 +360,8 @@ void cv::merge(const Mat* mv, size_t n, OutputArray _dst) return; } + CV_IPP_RUN_FAST(ipp_merge(mv, dst, (int)n)); + if( !allch1 ) { AutoBuffer pairs(cn*2); @@ -691,6 +819,59 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, mixChannels(&buf[0], nsrc, &buf[nsrc], ndst, &fromTo[0], fromTo.size()/2); } +#ifdef HAVE_IPP +#ifdef HAVE_IPP_IW +extern "C" { +IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels, + IppiSize size, int typeSize, int channelsShift); +} +#endif + +namespace cv +{ +static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + int srcChannels = src.channels(); + int dstChannels = dst.channels(); + + if(src.dims != dst.dims) + return false; + + if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1)) + return false; + + if(src.dims <= 2) + { + IppiSize size = ippiSize(src.size()); + + return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0; + } + else + { + const Mat *arrays[] = {&dst, NULL}; + uchar *ptrs[2] = {NULL}; + NAryMatIterator it(arrays, ptrs); + + IppiSize size = {(int)it.size, 1}; + + for( size_t i = 0; i < it.nplanes; i++, ++it ) + { + if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0) + return false; + } + return true; + } +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel); + return false; +#endif +} +} +#endif + void cv::extractChannel(InputArray _src, OutputArray _dst, int coi) { CV_INSTRUMENT_REGION() @@ -711,6 +892,9 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi) Mat src = _src.getMat(); _dst.create(src.dims, &src.size[0], depth); Mat dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi)) + mixChannels(&src, 1, &dst, 1, ch, 1); } @@ -732,6 +916,9 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi) } Mat src = _src.getMat(), dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi)) + mixChannels(&src, 1, &dst, 1, ch, 1); } @@ -5264,6 +5451,72 @@ void cv::convertFp16( InputArray _src, OutputArray _dst) } } +#ifdef HAVE_IPP +namespace cv +{ +static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + IppDataType srcDepth = ippiGetDataType(src.depth()); + IppDataType dstDepth = ippiGetDataType(dst.depth()); + int channels = src.channels(); + + if(src.dims == 0) + return false; + + ::ipp::IwiImage iwSrc; + ::ipp::IwiImage iwDst; + + try + { + IppHintAlgorithm mode = ippAlgHintFast; + if(dstDepth == ipp64f || + (dstDepth == ipp32f && (srcDepth == ipp32s || srcDepth == ipp64f)) || + (dstDepth == ipp32s && (srcDepth == ipp32s || srcDepth == ipp64f))) + mode = ippAlgHintAccurate; + + if(src.dims <= 2) + { + Size sz = getContinuousSize(src, dst, channels); + + iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step); + iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step); + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode); + } + else + { + const Mat *arrays[] = {&src, &dst, NULL}; + uchar *ptrs[2] = {NULL}; + NAryMatIterator it(arrays, ptrs); + + iwSrc.Init(ippiSize(it.size, 1), srcDepth, channels); + iwDst.Init(ippiSize(it.size, 1), dstDepth, channels); + + for(size_t i = 0; i < it.nplanes; i++, ++it) + { + iwSrc.m_ptr = ptrs[0]; + iwDst.m_ptr = ptrs[1]; + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode); + } + } + } + catch (::ipp::IwException) + { + return false; + } + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(alpha); CV_UNUSED(beta); + return false; +#endif +} +} +#endif + void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const { CV_INSTRUMENT_REGION() @@ -5283,6 +5536,13 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) } Mat src = *this; + if( dims <= 2 ) + _dst.create( size(), _type ); + else + _dst.create( dims, size, _type ); + Mat dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_convertTo(src, dst, alpha, beta )); BinaryFunc func = noScale ? getConvertFunc(sdepth, ddepth) : getConvertScaleFunc(sdepth, ddepth); double scale[] = {alpha, beta}; @@ -5291,15 +5551,12 @@ void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) if( dims <= 2 ) { - _dst.create( size(), _type ); - Mat dst = _dst.getMat(); Size sz = getContinuousSize(src, dst, cn); + func( src.data, src.step, 0, 0, dst.data, dst.step, sz, scale ); } else { - _dst.create( dims, size, _type ); - Mat dst = _dst.getMat(); const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2]; NAryMatIterator it(arrays, ptrs); @@ -5436,9 +5693,9 @@ static bool openvx_LUT(Mat src, Mat dst, Mat _lut) #endif #if defined(HAVE_IPP) +#if !IPP_DISABLE_PERF_LUT // there are no performance benefits (PR #2653) namespace ipp { -#if IPP_DISABLE_BLOCK // there are no performance benefits (PR #2653) class IppLUTParallelBody_LUTC1 : public ParallelLoopBody { public: @@ -5447,25 +5704,17 @@ public: const Mat& lut_; Mat& dst_; - typedef IppStatus (*IppFn)(const Ipp8u* pSrc, int srcStep, void* pDst, int dstStep, - IppiSize roiSize, const void* pTable, int nBitSize); - IppFn fn; - int width; + size_t elemSize1; IppLUTParallelBody_LUTC1(const Mat& src, const Mat& lut, Mat& dst, bool* _ok) : ok(_ok), src_(src), lut_(lut), dst_(dst) { width = dst.cols * dst.channels(); + elemSize1 = CV_ELEM_SIZE1(dst.depth()); - size_t elemSize1 = CV_ELEM_SIZE1(dst.depth()); - - fn = - elemSize1 == 1 ? (IppFn)ippiLUTPalette_8u_C1R : - elemSize1 == 4 ? (IppFn)ippiLUTPalette_8u32u_C1R : - NULL; - - *ok = (fn != NULL); + CV_DbgAssert(elemSize1 == 1 || elemSize1 == 4); + *ok = true; } void operator()( const cv::Range& range ) const @@ -5481,19 +5730,22 @@ public: IppiSize sz = { width, dst.rows }; - CV_DbgAssert(fn != NULL); - if (fn(src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) < 0) + if (elemSize1 == 1) { - setIppErrorStatus(); - *ok = false; + if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C1R, (const Ipp8u*)src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) >= 0) + return; } - CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); + else if (elemSize1 == 4) + { + if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u32u_C1R, (const Ipp8u*)src.data, (int)src.step[0], (Ipp32u*)dst.data, (int)dst.step[0], sz, (Ipp32u*)lut_.data, 8) >= 0) + return; + } + *ok = false; } private: IppLUTParallelBody_LUTC1(const IppLUTParallelBody_LUTC1&); IppLUTParallelBody_LUTC1& operator=(const IppLUTParallelBody_LUTC1&); }; -#endif class IppLUTParallelBody_LUTCN : public ParallelLoopBody { @@ -5527,21 +5779,13 @@ public: { IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C3P3R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256); if (status < 0) - { - setIppErrorStatus(); return; - } - CV_IMPL_ADD(CV_IMPL_IPP); } else if (lutcn == 4) { IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C4P4R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256); if (status < 0) - { - setIppErrorStatus(); return; - } - CV_IMPL_ADD(CV_IMPL_IPP); } *ok = true; @@ -5568,25 +5812,14 @@ public: if (lutcn == 3) { - if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C3R, - src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], - ippiSize(dst.size()), lutTable, 8) >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); + if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C3R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0) return; - } } else if (lutcn == 4) { - if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C4R, - src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], - ippiSize(dst.size()), lutTable, 8) >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); + if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C4R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0) return; - } } - setIppErrorStatus(); *ok = false; } private: @@ -5608,15 +5841,13 @@ static bool ipp_lut(Mat &src, Mat &lut, Mat &dst) Ptr body; size_t elemSize1 = CV_ELEM_SIZE1(dst.depth()); -#if IPP_DISABLE_BLOCK // there are no performance benefits (PR #2653) + if (lutcn == 1) { ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTC1(src, lut, dst, &ok); body.reset(p); } - else -#endif - if ((lutcn == 3 || lutcn == 4) && elemSize1 == 1) + else if ((lutcn == 3 || lutcn == 4) && elemSize1 == 1) { ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTCN(src, lut, dst, &ok); body.reset(p); @@ -5635,6 +5866,8 @@ static bool ipp_lut(Mat &src, Mat &lut, Mat &dst) return false; } + +#endif #endif // IPP class LUTParallelBody : public ParallelLoopBody @@ -5703,7 +5936,9 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst ) CV_OVX_RUN(true, openvx_LUT(src, dst, lut)) +#if !IPP_DISABLE_PERF_LUT CV_IPP_RUN(_src.dims() <= 2, ipp_lut(src, lut, dst)); +#endif if (_src.dims() <= 2) { diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index fb8da3bfac..b13687463d 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -49,6 +49,19 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" +#ifdef HAVE_IPP_IW +extern "C" { +IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep, + IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep); +IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep, + IppiSize size, IppDataType dataType, int channels); +IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep, + IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep); +IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep, + IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal); +} +#endif + namespace cv { @@ -326,6 +339,42 @@ void Mat::copyTo( OutputArray _dst ) const } } +#ifdef HAVE_IPP +static bool ipp_copyTo(const Mat &src, Mat &dst, const Mat &mask) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + if(mask.channels() > 1 && mask.depth() != CV_8U) + return false; + + if (src.dims <= 2) + { + IppiSize size = ippiSize(src.size()); + return CV_INSTRUMENT_FUN_IPP(llwiCopyMask, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, size, (int)src.elemSize1(), src.channels(), mask.ptr(), (int)mask.step) >= 0; + } + else + { + const Mat *arrays[] = {&src, &dst, &mask, NULL}; + uchar *ptrs[3] = {NULL}; + NAryMatIterator it(arrays, ptrs); + + IppiSize size = ippiSize(it.size, 1); + + for (size_t i = 0; i < it.nplanes; i++, ++it) + { + if(CV_INSTRUMENT_FUN_IPP(llwiCopyMask, ptrs[0], 0, ptrs[1], 0, size, (int)src.elemSize1(), src.channels(), ptrs[2], 0) < 0) + return false; + } + return true; + } +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(mask); + return false; +#endif +} +#endif + void Mat::copyTo( OutputArray _dst, InputArray _mask ) const { CV_INSTRUMENT_REGION() @@ -340,9 +389,10 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const int cn = channels(), mcn = mask.channels(); CV_Assert( mask.depth() == CV_8U && (mcn == 1 || mcn == cn) ); bool colorMask = mcn > 1; - - size_t esz = colorMask ? elemSize1() : elemSize(); - BinaryFunc copymask = getCopyMaskFunc(esz); + if( dims <= 2 ) + { + CV_Assert( size() == mask.size() ); + } uchar* data0 = _dst.getMat().data; _dst.create( dims, size, type() ); @@ -351,9 +401,13 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const if( dst.data != data0 ) // do not leave dst uninitialized dst = Scalar(0); + CV_IPP_RUN_FAST(ipp_copyTo(*this, dst, mask)) + + size_t esz = colorMask ? elemSize1() : elemSize(); + BinaryFunc copymask = getCopyMaskFunc(esz); + if( dims <= 2 ) { - CV_Assert( size() == mask.size() ); Size sz = getContinuousSize(*this, dst, mask, mcn); copymask(data, step, mask.data, mask.step, dst.data, dst.step, sz, &esz); return; @@ -380,36 +434,6 @@ Mat& Mat::operator = (const Scalar& s) if( is[0] == 0 && is[1] == 0 && is[2] == 0 && is[3] == 0 ) { -#if defined HAVE_IPP && IPP_DISABLE_BLOCK - CV_IPP_CHECK() - { - if (dims <= 2 || isContinuous()) - { - IppiSize roisize = { cols, rows }; - if (isContinuous()) - { - roisize.width = (int)total(); - roisize.height = 1; - - if (ippsZero_8u(data, static_cast(roisize.width * elemSize())) >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP) - return *this; - } - setIppErrorStatus(); - } - roisize.width *= (int)elemSize(); - - if (ippiSet_8u_C1R(0, data, (int)step, roisize) >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP) - return *this; - } - setIppErrorStatus(); - } - } -#endif - for( size_t i = 0; i < it.nplanes; i++, ++it ) memset( dptr, 0, elsize ); } @@ -437,89 +461,55 @@ Mat& Mat::operator = (const Scalar& s) return *this; } -#if defined HAVE_IPP -static bool ipp_Mat_setTo(Mat *src, Mat &value, Mat &mask) +#ifdef HAVE_IPP +static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask) { +#ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() - int cn = src->channels(), depth0 = src->depth(); + if(mask.empty()) + return false; - if (!mask.empty() && (src->dims <= 2 || (src->isContinuous() && mask.isContinuous())) && - (/*depth0 == CV_8U ||*/ depth0 == CV_16U || depth0 == CV_16S || depth0 == CV_32S || depth0 == CV_32F) && - (cn == 1 || cn == 3 || cn == 4)) + if(mask.depth() != CV_8U || mask.channels() > 1) + return false; + + if(dst.channels() > 4) + return false; + + if(dst.dims <= 2) { - uchar _buf[32]; - void * buf = _buf; - convertAndUnrollScalar( value, src->type(), _buf, 1 ); + IppiSize size = ippiSize(dst.size()); + IppDataType dataType = ippiGetDataType(dst.depth()); + ::ipp::IwValue s; + convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1); - IppStatus status = (IppStatus)-1; - IppiSize roisize = { src->cols, src->rows }; - int mstep = (int)mask.step[0], dstep = (int)src->step[0]; - - if (src->isContinuous() && mask.isContinuous()) - { - roisize.width = (int)src->total(); - roisize.height = 1; - } - - if (cn == 1) - { - /*if (depth0 == CV_8U) - status = ippiSet_8u_C1MR(*(Ipp8u *)buf, (Ipp8u *)data, dstep, roisize, mask.data, mstep); - else*/ if (depth0 == CV_16U) - status = CV_INSTRUMENT_FUN_IPP(ippiSet_16u_C1MR, *(Ipp16u *)buf, (Ipp16u *)src->data, dstep, roisize, mask.data, mstep); - else if (depth0 == CV_16S) - status = CV_INSTRUMENT_FUN_IPP(ippiSet_16s_C1MR, *(Ipp16s *)buf, (Ipp16s *)src->data, dstep, roisize, mask.data, mstep); - else if (depth0 == CV_32S) - status = CV_INSTRUMENT_FUN_IPP(ippiSet_32s_C1MR, *(Ipp32s *)buf, (Ipp32s *)src->data, dstep, roisize, mask.data, mstep); - else if (depth0 == CV_32F) - status = CV_INSTRUMENT_FUN_IPP(ippiSet_32f_C1MR, *(Ipp32f *)buf, (Ipp32f *)src->data, dstep, roisize, mask.data, mstep); - } - else if (cn == 3 || cn == 4) - { - -#define IPP_SET(ippfavor, ippcn) \ - do \ - { \ - typedef Ipp##ippfavor ipptype; \ - ipptype ippvalue[4] = { ((ipptype *)buf)[0], ((ipptype *)buf)[1], ((ipptype *)buf)[2], ((ipptype *)buf)[3] }; \ - status = CV_INSTRUMENT_FUN_IPP(ippiSet_##ippfavor##_C##ippcn##MR, ippvalue, (ipptype *)src->data, dstep, roisize, mask.data, mstep); \ - } while ((void)0, 0) - -#define IPP_SET_CN(ippcn) \ - do \ - { \ - if (cn == ippcn) \ - { \ - /*if (depth0 == CV_8U) \ - IPP_SET(8u, ippcn); \ - else*/ if (depth0 == CV_16U) \ - IPP_SET(16u, ippcn); \ - else if (depth0 == CV_16S) \ - IPP_SET(16s, ippcn); \ - else if (depth0 == CV_32S) \ - IPP_SET(32s, ippcn); \ - else if (depth0 == CV_32F) \ - IPP_SET(32f, ippcn); \ - } \ - } while ((void)0, 0) - - IPP_SET_CN(3); - IPP_SET_CN(4); - -#undef IPP_SET_CN -#undef IPP_SET - } - - if (status >= 0) - return true; + return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0; } + else + { + const Mat *arrays[] = {&dst, mask.empty()?NULL:&mask, NULL}; + uchar *ptrs[2] = {NULL}; + NAryMatIterator it(arrays, ptrs); + IppiSize size = {(int)it.size, 1}; + IppDataType dataType = ippiGetDataType(dst.depth()); + ::ipp::IwValue s; + convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1); + + for( size_t i = 0; i < it.nplanes; i++, ++it) + { + if(CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, ptrs[0], 0, size, dataType, dst.channels(), ptrs[1], 0) < 0) + return false; + } + return true; + } +#else + CV_UNUSED(dst); CV_UNUSED(_val); CV_UNUSED(mask); return false; +#endif } #endif - Mat& Mat::setTo(InputArray _value, InputArray _mask) { CV_INSTRUMENT_REGION() @@ -532,7 +522,7 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask) CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::MAT )); CV_Assert( mask.empty() || (mask.type() == CV_8U && size == mask.size) ); - CV_IPP_RUN_FAST(ipp_Mat_setTo((cv::Mat*)this, value, mask), *this) + CV_IPP_RUN_FAST(ipp_Mat_setTo_Mat(*this, value, mask), *this) size_t esz = elemSize(); BinaryFunc copymask = getCopyMaskFunc(esz); @@ -707,73 +697,36 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) #endif #if defined HAVE_IPP -static bool ipp_flip( Mat &src, Mat &dst, int flip_mode ) +static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) { +#ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() - int type = src.type(); - - typedef IppStatus (CV_STDCALL * IppiMirror)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize, IppiAxis flip); - typedef IppStatus (CV_STDCALL * IppiMirrorI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize, IppiAxis flip); - IppiMirror ippiMirror = 0; - IppiMirrorI ippiMirror_I = 0; - - if (src.data == dst.data) - { - CV_SUPPRESS_DEPRECATED_START - ippiMirror_I = - type == CV_8UC1 ? (IppiMirrorI)ippiMirror_8u_C1IR : - type == CV_8UC3 ? (IppiMirrorI)ippiMirror_8u_C3IR : - type == CV_8UC4 ? (IppiMirrorI)ippiMirror_8u_C4IR : - type == CV_16UC1 ? (IppiMirrorI)ippiMirror_16u_C1IR : - type == CV_16UC3 ? (IppiMirrorI)ippiMirror_16u_C3IR : - type == CV_16UC4 ? (IppiMirrorI)ippiMirror_16u_C4IR : - type == CV_16SC1 ? (IppiMirrorI)ippiMirror_16s_C1IR : - type == CV_16SC3 ? (IppiMirrorI)ippiMirror_16s_C3IR : - type == CV_16SC4 ? (IppiMirrorI)ippiMirror_16s_C4IR : - type == CV_32SC1 ? (IppiMirrorI)ippiMirror_32s_C1IR : - type == CV_32SC3 ? (IppiMirrorI)ippiMirror_32s_C3IR : - type == CV_32SC4 ? (IppiMirrorI)ippiMirror_32s_C4IR : - type == CV_32FC1 ? (IppiMirrorI)ippiMirror_32f_C1IR : - type == CV_32FC3 ? (IppiMirrorI)ippiMirror_32f_C3IR : - type == CV_32FC4 ? (IppiMirrorI)ippiMirror_32f_C4IR : 0; - CV_SUPPRESS_DEPRECATED_END - } + IppiAxis ippMode; + if(flip_mode < 0) + ippMode = ippAxsBoth; + else if(flip_mode == 0) + ippMode = ippAxsHorizontal; else - { - ippiMirror = - type == CV_8UC1 ? (IppiMirror)ippiMirror_8u_C1R : - type == CV_8UC3 ? (IppiMirror)ippiMirror_8u_C3R : - type == CV_8UC4 ? (IppiMirror)ippiMirror_8u_C4R : - type == CV_16UC1 ? (IppiMirror)ippiMirror_16u_C1R : - type == CV_16UC3 ? (IppiMirror)ippiMirror_16u_C3R : - type == CV_16UC4 ? (IppiMirror)ippiMirror_16u_C4R : - type == CV_16SC1 ? (IppiMirror)ippiMirror_16s_C1R : - type == CV_16SC3 ? (IppiMirror)ippiMirror_16s_C3R : - type == CV_16SC4 ? (IppiMirror)ippiMirror_16s_C4R : - type == CV_32SC1 ? (IppiMirror)ippiMirror_32s_C1R : - type == CV_32SC3 ? (IppiMirror)ippiMirror_32s_C3R : - type == CV_32SC4 ? (IppiMirror)ippiMirror_32s_C4R : - type == CV_32FC1 ? (IppiMirror)ippiMirror_32f_C1R : - type == CV_32FC3 ? (IppiMirror)ippiMirror_32f_C3R : - type == CV_32FC4 ? (IppiMirror)ippiMirror_32f_C4R : 0; - } - IppiAxis axis = flip_mode == 0 ? ippAxsHorizontal : - flip_mode > 0 ? ippAxsVertical : ippAxsBoth; - IppiSize roisize = { dst.cols, dst.rows }; + ippMode = ippAxsVertical; - if (ippiMirror != 0) + try { - if (CV_INSTRUMENT_FUN_IPP(ippiMirror, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, ippiSize(src.cols, src.rows), axis) >= 0) - return true; + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode); } - else if (ippiMirror_I != 0) + catch(::ipp::IwException) { - if (CV_INSTRUMENT_FUN_IPP(ippiMirror_I, dst.ptr(), (int)dst.step, roisize, axis) >= 0) - return true; + return false; } + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); return false; +#endif } #endif @@ -1178,7 +1131,41 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int } } +#endif +#ifdef HAVE_IPP +namespace cv { + +static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom, + int left, int right, int _borderType, const Scalar& value ) +{ +#if defined HAVE_IPP_IW && !IPP_DISABLE_PERF_COPYMAKE + CV_INSTRUMENT_REGION_IPP() + + ::ipp::IwiBorderSize borderSize(left, top, right, bottom); + ::ipp::IwiSize size(_src.cols, _src.rows); + IppDataType dataType = ippiGetDataType(_src.depth()); + IppiBorderType borderType = ippiGetBorderType(_borderType); + if((int)borderType == -1) + return false; + + if(_src.dims > 2) + return false; + + Rect dstRect(borderSize.borderLeft, borderSize.borderTop, + _dst.cols - borderSize.borderRight - borderSize.borderLeft, + _dst.rows - borderSize.borderBottom - borderSize.borderTop); + Mat subDst = Mat(_dst, dstRect); + Mat *pSrc = &_src; + + return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0; +#else + CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right); + CV_UNUSED(_borderType); CV_UNUSED(value); + return false; +#endif +} +} #endif void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, @@ -1222,120 +1209,7 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, borderType &= ~BORDER_ISOLATED; -#if defined HAVE_IPP && IPP_DISABLE_BLOCK - CV_IPP_CHECK() - { - typedef IppStatus (CV_STDCALL * ippiCopyMakeBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst, - int dstStep, IppiSize dstRoiSize, int topBorderHeight, int leftBorderWidth); - typedef IppStatus (CV_STDCALL * ippiCopyMakeBorderI)(const void * pSrc, int srcDstStep, IppiSize srcRoiSize, IppiSize dstRoiSize, - int topBorderHeight, int leftborderwidth); - typedef IppStatus (CV_STDCALL * ippiCopyConstBorder)(const void * pSrc, int srcStep, IppiSize srcRoiSize, void * pDst, int dstStep, - IppiSize dstRoiSize, int topBorderHeight, int leftBorderWidth, void * value); - - IppiSize srcRoiSize = { src.cols, src.rows }, dstRoiSize = { dst.cols, dst.rows }; - ippiCopyMakeBorder ippFunc = 0; - ippiCopyMakeBorderI ippFuncI = 0; - ippiCopyConstBorder ippFuncConst = 0; - bool inplace = dst.datastart == src.datastart; - - if (borderType == BORDER_CONSTANT) - { - ippFuncConst = - // type == CV_8UC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C1R : bug in IPP 8.1 - type == CV_16UC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C1R : - // type == CV_16SC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C1R : bug in IPP 8.1 - // type == CV_32SC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C1R : bug in IPP 8.1 - // type == CV_32FC1 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C1R : bug in IPP 8.1 - type == CV_8UC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C3R : - type == CV_16UC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C3R : - type == CV_16SC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C3R : - type == CV_32SC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C3R : - type == CV_32FC3 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C3R : - type == CV_8UC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_8u_C4R : - type == CV_16UC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_16u_C4R : - type == CV_16SC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_16s_C4R : - type == CV_32SC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_32s_C4R : - type == CV_32FC4 ? (ippiCopyConstBorder)ippiCopyConstBorder_32f_C4R : 0; - } - else if (borderType == BORDER_WRAP) - { - if (inplace) - { - CV_SUPPRESS_DEPRECATED_START - ippFuncI = - type == CV_32SC1 ? (ippiCopyMakeBorderI)ippiCopyWrapBorder_32s_C1IR : - type == CV_32FC1 ? (ippiCopyMakeBorderI)ippiCopyWrapBorder_32s_C1IR : 0; - CV_SUPPRESS_DEPRECATED_END - } - else - { - ippFunc = - type == CV_32SC1 ? (ippiCopyMakeBorder)ippiCopyWrapBorder_32s_C1R : - type == CV_32FC1 ? (ippiCopyMakeBorder)ippiCopyWrapBorder_32s_C1R : 0; - } - } - else if (borderType == BORDER_REPLICATE) - { - if (inplace) - { - CV_SUPPRESS_DEPRECATED_START - ippFuncI = - type == CV_8UC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C1IR : - type == CV_16UC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C1IR : - type == CV_16SC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C1IR : - type == CV_32SC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C1IR : - type == CV_32FC1 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C1IR : - type == CV_8UC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C3IR : - type == CV_16UC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C3IR : - type == CV_16SC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C3IR : - type == CV_32SC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C3IR : - type == CV_32FC3 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C3IR : - type == CV_8UC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_8u_C4IR : - type == CV_16UC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16u_C4IR : - type == CV_16SC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_16s_C4IR : - type == CV_32SC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32s_C4IR : - type == CV_32FC4 ? (ippiCopyMakeBorderI)ippiCopyReplicateBorder_32f_C4IR : 0; - CV_SUPPRESS_DEPRECATED_END - } - else - { - ippFunc = - type == CV_8UC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C1R : - type == CV_16UC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C1R : - type == CV_16SC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C1R : - type == CV_32SC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C1R : - type == CV_32FC1 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C1R : - type == CV_8UC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C3R : - type == CV_16UC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C3R : - type == CV_16SC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C3R : - type == CV_32SC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C3R : - type == CV_32FC3 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C3R : - type == CV_8UC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_8u_C4R : - type == CV_16UC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16u_C4R : - type == CV_16SC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_16s_C4R : - type == CV_32SC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32s_C4R : - type == CV_32FC4 ? (ippiCopyMakeBorder)ippiCopyReplicateBorder_32f_C4R : 0; - } - } - - if (ippFunc || ippFuncI || ippFuncConst) - { - uchar scbuf[32]; - scalarToRawData(value, scbuf, type); - - if ( (ippFunc && ippFunc(src.data, (int)src.step, srcRoiSize, dst.data, (int)dst.step, dstRoiSize, top, left) >= 0) || - (ippFuncI && ippFuncI(src.data, (int)src.step, srcRoiSize, dstRoiSize, top, left) >= 0) || - (ippFuncConst && ippFuncConst(src.data, (int)src.step, srcRoiSize, dst.data, (int)dst.step, - dstRoiSize, top, left, scbuf) >= 0)) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return; - } - - setIppErrorStatus(); - } - } -#endif + CV_IPP_RUN_FAST(ipp_copyMakeBorder(src, dst, top, bottom, left, right, borderType, value)) if( borderType != BORDER_CONSTANT ) copyMakeBorder_8u( src.ptr(), src.step, src.size(), diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index b9bdf49eb3..a3dfbd18e8 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -497,6 +497,65 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle, #endif +#ifdef HAVE_IPP +static bool ipp_polarToCart(Mat &mag, Mat &angle, Mat &x, Mat &y) +{ + CV_INSTRUMENT_REGION_IPP() + + int depth = angle.depth(); + if(depth != CV_32F && depth != CV_64F) + return false; + + if(angle.dims <= 2) + { + int len = (int)(angle.cols*angle.channels()); + + if(depth == CV_32F) + { + for (int h = 0; h < angle.rows; h++) + { + if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_32f, (const float*)mag.ptr(h), (const float*)angle.ptr(h), (float*)x.ptr(h), (float*)y.ptr(h), len) < 0) + return false; + } + } + else + { + for (int h = 0; h < angle.rows; h++) + { + if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_64f, (const double*)mag.ptr(h), (const double*)angle.ptr(h), (double*)x.ptr(h), (double*)y.ptr(h), len) < 0) + return false; + } + } + return true; + } + else + { + const Mat *arrays[] = {&mag, &angle, &x, &y, NULL}; + uchar *ptrs[4] = {NULL}; + NAryMatIterator it(arrays, ptrs); + int len = (int)(it.size*angle.channels()); + + if(depth == CV_32F) + { + for (size_t i = 0; i < it.nplanes; i++, ++it) + { + if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_32f, (const float*)ptrs[0], (const float*)ptrs[1], (float*)ptrs[2], (float*)ptrs[3], len) < 0) + return false; + } + } + else + { + for (size_t i = 0; i < it.nplanes; i++, ++it) + { + if(CV_INSTRUMENT_FUN_IPP(ippsPolarToCart_64f, (const double*)ptrs[0], (const double*)ptrs[1], (double*)ptrs[2], (double*)ptrs[3], len) < 0) + return false; + } + } + return true; + } +} +#endif + void polarToCart( InputArray src1, InputArray src2, OutputArray dst1, OutputArray dst2, bool angleInDegrees ) { @@ -514,28 +573,7 @@ void polarToCart( InputArray src1, InputArray src2, dst2.create( Angle.dims, Angle.size, type ); Mat X = dst1.getMat(), Y = dst2.getMat(); -#if defined(HAVE_IPP) - CV_IPP_CHECK() - { - if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees) - { - typedef IppStatus (CV_STDCALL * IppsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase, - void * pDstRe, void * pDstIm, int len); - IppsPolarToCart ippsPolarToCart = - depth == CV_32F ? (IppsPolarToCart)ippsPolarToCart_32f : - depth == CV_64F ? (IppsPolarToCart)ippsPolarToCart_64f : 0; - CV_Assert(ippsPolarToCart != 0); - - IppStatus status = CV_INSTRUMENT_FUN_IPP(ippsPolarToCart, Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast(cn * X.total())); - if (status >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return; - } - setIppErrorStatus(); - } - } -#endif + CV_IPP_RUN(!angleInDegrees, ipp_polarToCart(Mag, Angle, X, Y)); const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0}; uchar* ptrs[4]; @@ -1167,11 +1205,6 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, #endif -static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt32f(src, dst, n); } -static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt64f(src, dst, n); } -static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt32f(src, dst, n); } -static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt64f(src, dst, n); } - void pow( InputArray _src, double power, OutputArray _dst ) { CV_INSTRUMENT_REGION() @@ -1228,8 +1261,8 @@ void pow( InputArray _src, double power, OutputArray _dst ) else if( fabs(fabs(power) - 0.5) < DBL_EPSILON ) { MathFunc func = power < 0 ? - (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) : - (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f); + (depth == CV_32F ? (MathFunc)hal::invSqrt32f : (MathFunc)hal::invSqrt64f) : + (depth == CV_32F ? (MathFunc)hal::sqrt32f : (MathFunc)hal::sqrt64f); for( size_t i = 0; i < it.nplanes; i++, ++it ) func( ptrs[0], ptrs[1], len ); @@ -1261,24 +1294,6 @@ void pow( InputArray _src, double power, OutputArray _dst ) { int bsz = std::min(len - j, blockSize); -#if defined(HAVE_IPP) - CV_IPP_CHECK() - { - IppStatus status = depth == CV_32F ? - CV_INSTRUMENT_FUN_IPP(ippsPowx_32f_A21, (const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) : - CV_INSTRUMENT_FUN_IPP(ippsPowx_64f_A50, (const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz); - - if (status >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP); - ptrs[0] += bsz*esz1; - ptrs[1] += bsz*esz1; - continue; - } - setIppErrorStatus(); - } -#endif - if( depth == CV_32F ) { float* x0 = (float*)ptrs[0]; diff --git a/modules/core/src/mathfuncs_core.dispatch.cpp b/modules/core/src/mathfuncs_core.dispatch.cpp index 1a462a0635..b64ec4af42 100644 --- a/modules/core/src/mathfuncs_core.dispatch.cpp +++ b/modules/core/src/mathfuncs_core.dispatch.cpp @@ -44,7 +44,7 @@ void magnitude32f(const float* x, const float* y, float* mag, int len) CV_INSTRUMENT_REGION() CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len); - CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); + CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len), CV_CPU_DISPATCH_MODES_ALL); @@ -55,7 +55,7 @@ void magnitude64f(const double* x, const double* y, double* mag, int len) CV_INSTRUMENT_REGION() CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len); - CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0); + CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0); CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len), CV_CPU_DISPATCH_MODES_ALL); diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 8b06193cf6..219089d157 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -3100,18 +3100,8 @@ dotProd_(const T* src1, const T* src2, int len) static double dotProd_8u(const uchar* src1, const uchar* src2, int len) { double r = 0; -#if ARITHM_USE_IPP && IPP_DISABLE_BLOCK - CV_IPP_CHECK() - { - if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, (src1, (int)(len*sizeof(src1[0])), - src2, (int)(len*sizeof(src2[0])), - ippiSize(len, 1), &r))) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return r; - } - setIppErrorStatus(); - } +#if ARITHM_USE_IPP + CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r); #endif int i = 0; @@ -3298,51 +3288,27 @@ static double dotProd_8s(const schar* src1, const schar* src2, int len) static double dotProd_16u(const ushort* src1, const ushort* src2, int len) { -#if (ARITHM_USE_IPP == 1) - CV_IPP_CHECK() - { - double r = 0; - if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_16u64f_C1R, src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r)) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return r; - } - setIppErrorStatus(); - } +#if ARITHM_USE_IPP + double r = 0; + CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_16u64f_C1R, src1, len*sizeof(ushort), src2, len*sizeof(ushort), ippiSize(len, 1), &r) >= 0, r); #endif return dotProd_(src1, src2, len); } static double dotProd_16s(const short* src1, const short* src2, int len) { -#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0 - CV_IPP_CHECK() - { - double r = 0; - if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_16s64f_C1R, src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r)) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return r; - } - setIppErrorStatus(); - } +#if ARITHM_USE_IPP && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0 + double r = 0; + CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_16s64f_C1R, src1, len*sizeof(short), src2, len*sizeof(short), ippiSize(len, 1), &r) >= 0, r); #endif return dotProd_(src1, src2, len); } static double dotProd_32s(const int* src1, const int* src2, int len) { -#if (ARITHM_USE_IPP == 1) - CV_IPP_CHECK() - { - double r = 0; - if (0 <= CV_INSTRUMENT_FUN_IPP(ippiDotProd_32s64f_C1R, src1, (int)(len*sizeof(src1[0])), src2, (int)(len*sizeof(src2[0])), ippiSize(len, 1), &r)) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return r; - } - setIppErrorStatus(); - } +#if ARITHM_USE_IPP + double r = 0; + CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_32s64f_C1R, src1, len*sizeof(int), src2, len*sizeof(int), ippiSize(len, 1), &r) >= 0, r); #endif return dotProd_(src1, src2, len); } @@ -3350,19 +3316,13 @@ static double dotProd_32s(const int* src1, const int* src2, int len) static double dotProd_32f(const float* src1, const float* src2, int len) { double r = 0.0; + +#if ARITHM_USE_IPP + CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_32f64f_C1R, src1, len*sizeof(float), src2, len*sizeof(float), ippiSize(len, 1), &r, ippAlgHintFast) >= 0, r); +#endif int i = 0; -#if (ARITHM_USE_IPP == 1) - CV_IPP_CHECK() - { - if (0 <= CV_INSTRUMENT_FUN_IPP(ippsDotProd_32f64f, src1, src2, len, &r)) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return r; - } - setIppErrorStatus(); - } -#elif CV_NEON +#if CV_NEON int len0 = len & -4, blockSize0 = (1 << 13), blockSize; float32x4_t v_zero = vdupq_n_f32(0.0f); CV_DECL_ALIGNED(16) float buf[4]; @@ -3389,18 +3349,11 @@ static double dotProd_32f(const float* src1, const float* src2, int len) static double dotProd_64f(const double* src1, const double* src2, int len) { -#if (ARITHM_USE_IPP == 1) - CV_IPP_CHECK() - { - double r = 0; - if (0 <= CV_INSTRUMENT_FUN_IPP(ippsDotProd_64f, src1, src2, len, &r)) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return r; - } - setIppErrorStatus(); - } +#if ARITHM_USE_IPP + double r = 0; + CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsDotProd_64f, src1, src2, len, &r) >= 0, r); #endif + return dotProd_(src1, src2, len); } diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 3c5fb73de4..587d06d03c 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1309,30 +1309,51 @@ static bool ipp_countNonZero( Mat &src, int &res ) { CV_INSTRUMENT_REGION_IPP() - Ipp32s count = 0; - IppStatus status = ippStsNoErr; + Ipp32s count = 0; + int depth = src.depth(); - int type = src.type(), depth = CV_MAT_DEPTH(type); - IppiSize roiSize = { src.cols, src.rows }; - Ipp32s srcstep = (Ipp32s)src.step; - if (src.isContinuous()) + if(src.dims <= 2) { - roiSize.width = (Ipp32s)src.total(); - roiSize.height = 1; - srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type); + IppStatus status; + IppiSize size = {src.cols*src.channels(), src.rows}; + + if(depth == CV_8U) + status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0); + else if(depth == CV_32F) + status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0); + else + return false; + + if(status < 0) + return false; + + res = size.width*size.height - count; + } + else + { + IppStatus status; + const Mat *arrays[] = {&src, NULL}; + uchar *ptrs[1] = {NULL}; + NAryMatIterator it(arrays, ptrs); + IppiSize size = {(int)it.size*src.channels(), 1}; + + for (size_t i = 0; i < it.nplanes; i++, ++it) + { + if(depth == CV_8U) + status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0); + else if(depth == CV_32F) + status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0); + else + return false; + + if(status < 0) + return false; + + res += (size.width*size.height - count); + } } - if (depth == CV_8U) - status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0); - else if (depth == CV_32F) - status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0); - - if (status >= 0) - { - res = ((Ipp32s)src.total() - count); - return true; - } - return false; + return true; } } #endif @@ -1356,7 +1377,7 @@ int cv::countNonZero( InputArray _src ) #endif Mat src = _src.getMat(); - CV_IPP_RUN(0 && (_src.dims() <= 2 || _src.isContinuous()), ipp_countNonZero(src, res), res); + CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res); CountNonZeroFunc func = getCountNonZeroTab(src.depth()); CV_Assert( func != 0 ); @@ -2373,109 +2394,273 @@ static bool openvx_minMaxIdx(Mat &src, double* minVal, double* maxVal, int* minI #endif #ifdef HAVE_IPP -static bool ipp_minMaxIdx( Mat &src, double* minVal, double* maxVal, int* minIdx, int* maxIdx, Mat &mask) +static IppStatus ipp_minMaxIndex_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType, + float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u*, int) { + switch(dataType) + { + case ipp8u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex); + case ipp16u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex); + case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex); + default: return ippStsDataTypeErr; + } +} + +static IppStatus ipp_minMaxIndexMask_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType, + float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u* pMask, int maskStep) +{ + switch(dataType) + { + case ipp8u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_8u_C1MR, (const Ipp8u*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex); + case ipp16u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_16u_C1MR, (const Ipp16u*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex); + case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_32f_C1MR, (const Ipp32f*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex); + default: return ippStsDataTypeErr; + } +} + +static IppStatus ipp_minMax_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType, + float* pMinVal, float* pMaxVal, IppiPoint*, IppiPoint*, const Ipp8u*, int) +{ + IppStatus status; + + switch(dataType) + { +#if IPP_VERSION_X100 > 201701 // wrong min values + case ipp8u: + { + Ipp8u val[2]; + status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val[0], &val[1]); + *pMinVal = val[0]; + *pMaxVal = val[1]; + return status; + } +#endif + case ipp16u: + { + Ipp16u val[2]; + status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val[0], &val[1]); + *pMinVal = val[0]; + *pMaxVal = val[1]; + return status; + } + case ipp16s: + { + Ipp16s val[2]; + status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val[0], &val[1]); + *pMinVal = val[0]; + *pMaxVal = val[1]; + return status; + } + case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMax_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, pMaxVal); + default: return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, pMinVal, pMaxVal, NULL, NULL, NULL, 0); + } +} + +static IppStatus ipp_minIdx_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType, + float* pMinVal, float*, IppiPoint* pMinIndex, IppiPoint*, const Ipp8u*, int) +{ + IppStatus status; + + switch(dataType) + { + case ipp8u: + { + Ipp8u val; + status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y); + *pMinVal = val; + return status; + } + case ipp16u: + { + Ipp16u val; + status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y); + *pMinVal = val; + return status; + } + case ipp16s: + { + Ipp16s val; + status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y); + *pMinVal = val; + return status; + } + case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, &pMinIndex->x, &pMinIndex->y); + default: return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, pMinVal, NULL, pMinIndex, NULL, NULL, 0); + } +} + +static IppStatus ipp_maxIdx_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType, + float*, float* pMaxVal, IppiPoint*, IppiPoint* pMaxIndex, const Ipp8u*, int) +{ + IppStatus status; + + switch(dataType) + { + case ipp8u: + { + Ipp8u val; + status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y); + *pMaxVal = val; + return status; + } + case ipp16u: + { + Ipp16u val; + status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y); + *pMaxVal = val; + return status; + } + case ipp16s: + { + Ipp16s val; + status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y); + *pMaxVal = val; + return status; + } + case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMaxVal, &pMaxIndex->x, &pMaxIndex->y); + default: return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, NULL, pMaxVal, NULL, pMaxIndex, NULL, 0); + } +} + +typedef IppStatus (*IppMinMaxSelector)(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType, + float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u* pMask, int maskStep); + +static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minIdx, int* _maxIdx, Mat &mask) +{ +#if IPP_VERSION_X100 >= 700 CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 700 - int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - size_t total_size = src.total(); - int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; - if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) + IppStatus status; + IppDataType dataType = ippiGetDataType(src.depth()); + float minVal = 0; + float maxVal = 0; + IppiPoint minIdx = {-1, -1}; + IppiPoint maxIdx = {-1, -1}; + + float *pMinVal = (_minVal)?&minVal:NULL; + float *pMaxVal = (_maxVal)?&maxVal:NULL; + IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL; + IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL; + + IppMinMaxSelector ippMinMaxFun = ipp_minMaxIndexMask_wrap; + if(mask.empty()) { - IppiSize sz = { cols * cn, rows }; + if(_maxVal && _maxIdx && !_minVal && !_minIdx) + ippMinMaxFun = ipp_maxIdx_wrap; + else if(!_maxVal && !_maxIdx && _minVal && _minIdx) + ippMinMaxFun = ipp_minIdx_wrap; + else if(_maxVal && !_maxIdx && _minVal && !_minIdx) + ippMinMaxFun = ipp_minMax_wrap; + else + ippMinMaxFun = ipp_minMaxIndex_wrap; + } - if( !mask.empty() ) + if(src.dims <= 2) + { + IppiSize size = ippiSize(src.size()); + size.width *= src.channels(); + + status = ippMinMaxFun(src.ptr(), (int)src.step, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, (Ipp8u*)mask.ptr(), (int)mask.step); + if(status < 0 || status == ippStsNoOperation) + return false; + if(_minVal) + *_minVal = minVal; + if(_maxVal) + *_maxVal = maxVal; + if(_minIdx) { - typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int, - IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *); - - CV_SUPPRESS_DEPRECATED_START - ippiMaskMinMaxIndxFuncC1 ippiMinMaxIndx_C1MR = - type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR : -#endif - type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR : - type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0; - CV_SUPPRESS_DEPRECATED_END - - if( ippiMinMaxIndx_C1MR ) + if(!mask.empty() && !minIdx.y && !minIdx.x) { - Ipp32f min, max; - IppiPoint minp, maxp; - if( CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 ) - { - if( minVal ) - *minVal = (double)min; - if( maxVal ) - *maxVal = (double)max; - if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.ptr()[0] ) - minp.x = maxp.x = -1; - if( minIdx ) - { - size_t minidx = minp.y * cols + minp.x + 1; - ofs2idx(src, minidx, minIdx); - } - if( maxIdx ) - { - size_t maxidx = maxp.y * cols + maxp.x + 1; - ofs2idx(src, maxidx, maxIdx); - } - return true; - } + _minIdx[0] = -1; + _minIdx[1] = -1; + } + else + { + _minIdx[0] = minIdx.y; + _minIdx[1] = minIdx.x; } } - else + if(_maxIdx) { - typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *); - - CV_SUPPRESS_DEPRECATED_START - ippiMinMaxIndxFuncC1 ippiMinMaxIndx_C1R = -#if IPP_VERSION_X100 != 900 // bug in 9.0.0 avx2 optimization - depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R : -#endif -#if IPP_VERSION_X100 < 900 - depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R : -#endif - depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R : -#if IPP_DISABLE_BLOCK && !((defined _MSC_VER && defined _M_IX86) || defined __i386__) - // See bug #4955: the function fails with SEGFAULT when the source matrix contains NANs - // IPPICV version is 9.0.1. - depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : -#endif - 0; - CV_SUPPRESS_DEPRECATED_END - - if( ippiMinMaxIndx_C1R ) + if(!mask.empty() && !maxIdx.y && !maxIdx.x) { - Ipp32f min, max; - IppiPoint minp, maxp; - if( CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_C1R, src.ptr(), (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 ) - { - if( minVal ) - *minVal = (double)min; - if( maxVal ) - *maxVal = (double)max; - if( minIdx ) - { - size_t minidx = minp.y * cols + minp.x + 1; - ofs2idx(src, minidx, minIdx); - } - if( maxIdx ) - { - size_t maxidx = maxp.y * cols + maxp.x + 1; - ofs2idx(src, maxidx, maxIdx); - } - return true; - } + _maxIdx[0] = -1; + _maxIdx[1] = -1; + } + else + { + _maxIdx[0] = maxIdx.y; + _maxIdx[1] = maxIdx.x; } } } + else + { + const Mat *arrays[] = {&src, mask.empty()?NULL:&mask, NULL}; + uchar *ptrs[3] = {NULL}; + NAryMatIterator it(arrays, ptrs); + IppiSize size = ippiSize(it.size*src.channels(), 1); + int srcStep = (int)(size.width*src.elemSize1()); + int maskStep = size.width; + size_t idxPos = 1; + size_t minIdxAll = 0; + size_t maxIdxAll = 0; + float minValAll = IPP_MAXABS_32F; + float maxValAll = -IPP_MAXABS_32F; + + for(size_t i = 0; i < it.nplanes; i++, ++it, idxPos += size.width) + { + status = ippMinMaxFun(ptrs[0], srcStep, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, ptrs[1], maskStep); + if(status < 0) + return false; +#if IPP_VERSION_X100 > 201701 + // Zero-mask check, function should return ippStsNoOperation warning + if(status == ippStsNoOperation) + continue; #else + // Crude zero-mask check, waiting for fix in IPP function + if(ptrs[1]) + { + Mat localMask(Size(size.width, 1), CV_8U, ptrs[1], maskStep); + if(!cv::countNonZero(localMask)) + continue; + } #endif + + if(_minVal && minVal < minValAll) + { + minValAll = minVal; + minIdxAll = idxPos+minIdx.x; + } + if(_maxVal && maxVal > maxValAll) + { + maxValAll = maxVal; + maxIdxAll = idxPos+maxIdx.x; + } + } + if(!src.empty() && mask.empty()) + { + if(minIdxAll == 0) + minIdxAll = 1; + if(maxValAll == 0) + maxValAll = 1; + } + + if(_minVal) + *_minVal = minValAll; + if(_maxVal) + *_maxVal = maxValAll; + if(_minIdx) + ofs2idx(src, minIdxAll, _minIdx); + if(_maxIdx) + ofs2idx(src, maxIdxAll, _maxIdx); + } + + return true; +#else CV_UNUSED(src); CV_UNUSED(minVal); CV_UNUSED(maxVal); CV_UNUSED(minIdx); CV_UNUSED(maxIdx); CV_UNUSED(mask); return false; +#endif } #endif @@ -2499,7 +2684,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal, CV_OVX_RUN(true, openvx_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask)) - CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask)) + CV_IPP_RUN_FAST(ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask)) MinMaxIdxFunc func = getMinmaxTab(depth); CV_Assert( func != 0 ); @@ -2837,42 +3022,31 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result) CV_INSTRUMENT_REGION_IPP() #if IPP_VERSION_X100 >= 700 - int cn = src.channels(); size_t total_size = src.total(); int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous())) - && cols > 0 && (size_t)rows*cols == total_size - && (normType == NORM_INF || normType == NORM_L1 || - normType == NORM_L2 || normType == NORM_L2SQR) ) + && cols > 0 && (size_t)rows*cols == total_size ) { - IppiSize sz = { cols, rows }; - int type = src.type(); if( !mask.empty() ) { + IppiSize sz = { cols, rows }; + int type = src.type(); + typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *); ippiMaskNormFuncC1 ippiNorm_C1MR = normType == NORM_INF ? (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR : -#endif -// type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR : + type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR : 0) : normType == NORM_L1 ? (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR : -#endif type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR : -#endif type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR : 0) : 0; @@ -2885,39 +3059,29 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result) return true; } } -#if IPP_DISABLE_BLOCK typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *); ippiMaskNormFuncC3 ippiNorm_C3CMR = normType == NORM_INF ? (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR : -#endif type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR : type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR : 0) : normType == NORM_L1 ? (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR : -#endif type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR : type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR : -#endif type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR : type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR : 0) : 0; if( ippiNorm_C3CMR ) { Ipp64f norm1, norm2, norm3; - if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, (src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1)) >= 0 && - CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, (src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2)) >= 0 && - CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, (src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3)) >= 0) + if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 && + CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 && + CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0) { Ipp64f norm = normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) : @@ -2928,81 +3092,46 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result) return true; } } -#endif } else { + IppiSize sz = { cols*src.channels(), rows }; + int type = src.depth(); + typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *); ippiNormFuncHint ippiNormHint = normType == NORM_L1 ? (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R : - type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R : - type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R : - type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R : - type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R : 0) : 0; ippiNormFuncNoHint ippiNorm = normType == NORM_INF ? (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R : - type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R : - type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R : type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R : - type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R : - type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R : type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R : -#if (IPP_VERSION_X100 >= 810) - type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 - type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 -#endif type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R : - type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R : - type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R : 0) : normType == NORM_L1 ? (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R : - type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R : - type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R : type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R : - type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R : - type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R : type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R : - type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R : - type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R : - type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R : - type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R : type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R : - type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R : - type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R : type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R : - type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R : - type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R : 0) : 0; - // Make sure only zero or one version of the function pointer is valid - CV_Assert(!ippiNormHint || !ippiNorm); if( ippiNormHint || ippiNorm ) { - Ipp64f norm_array[4]; - IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, norm_array, ippAlgHintAccurate) : - CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, norm_array); + Ipp64f norm; + IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, &norm, ippAlgHintAccurate) : + CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, &norm); if( ret >= 0 ) { - Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0]; - for( int i = 1; i < cn; i++ ) - { - norm = - normType == NORM_INF ? std::max(norm, norm_array[i]) : - normType == NORM_L1 ? norm + norm_array[i] : - normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] : - 0; - } - result = (normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm); + result = (normType == NORM_L2SQR) ? norm * norm : norm; return true; } } @@ -3248,53 +3377,38 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra if( normType & CV_RELATIVE ) { normType &= NORM_TYPE_MASK; - CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR || - ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); + size_t total_size = src1.total(); int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0; if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous())) - && cols > 0 && (size_t)rows*cols == total_size - && (normType == NORM_INF || normType == NORM_L1 || - normType == NORM_L2 || normType == NORM_L2SQR) ) + && cols > 0 && (size_t)rows*cols == total_size ) { - IppiSize sz = { cols, rows }; - int type = src1.type(); if( !mask.empty() ) { - typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); - ippiMaskNormRelFuncC1 ippiNormDiff_C1MR = + IppiSize sz = { cols, rows }; + int type = src1.type(); + + typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); + ippiMaskNormDiffFuncC1 ippiNormRel_C1MR = normType == NORM_INF ? - (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR : -#if IPP_VERSION_X100 < 900 -#ifndef __APPLE__ - type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR : -#endif -#endif - type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR : - type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR : + (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_8u_C1MR : + type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_32f_C1MR : 0) : normType == NORM_L1 ? - (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR : -#if IPP_VERSION_X100 < 900 -#ifndef __APPLE__ - type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR : -#endif -#endif - type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR : - type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR : + (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_8u_C1MR : + type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_32f_C1MR : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? - (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR : -#endif - type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR : - type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR : + (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_8u_C1MR : + type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_32f_C1MR : 0) : 0; - if( ippiNormDiff_C1MR ) + if( ippiNormRel_C1MR ) { Ipp64f norm; - if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 ) + if( CV_INSTRUMENT_FUN_IPP(ippiNormRel_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 ) { result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm); return true; @@ -3303,47 +3417,43 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra } else { - typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *); + IppiSize sz = { cols*src1.channels(), rows }; + int type = src1.depth(); + typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); - ippiNormRelFuncNoHint ippiNormDiff = + typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *); + ippiNormRelFuncHint ippiNormRelHint = + normType == NORM_L1 ? + (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R : + 0) : 0; + ippiNormRelFuncNoHint ippiNormRel = normType == NORM_INF ? - (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R : - type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R : - type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R : - type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R : + (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R : + type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R : + type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R : + type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R : 0) : normType == NORM_L1 ? - (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R : - type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R : - type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R : + (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R : + type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R : + type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? - (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R : - type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R : - type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R : + (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R : + type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R : + type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R : 0) : 0; - ippiNormRelFuncHint ippiNormDiffHint = - normType == NORM_L1 ? - (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R : - 0) : - normType == NORM_L2 || normType == NORM_L2SQR ? - (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R : - 0) : 0; - if (ippiNormDiff) + if( ippiNormRelHint || ippiNormRel ) { Ipp64f norm; - if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm) >= 0 ) + IppStatus ret = ippiNormRelHint ? CV_INSTRUMENT_FUN_IPP(ippiNormRelHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) : + CV_INSTRUMENT_FUN_IPP(ippiNormRel, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm); + if( ret >= 0 ) { - result = (double)norm; - return true; - } - } - if (ippiNormDiffHint) - { - Ipp64f norm; - if( CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 ) - { - result = (double)norm; + result = (normType == NORM_L2SQR) ? norm * norm : norm; return true; } } @@ -3352,47 +3462,32 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra return false; } - normType &= 7; - CV_Assert( normType == NORM_INF || normType == NORM_L1 || - normType == NORM_L2 || normType == NORM_L2SQR || - ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); + normType &= NORM_TYPE_MASK; size_t total_size = src1.total(); int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0; if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous())) - && cols > 0 && (size_t)rows*cols == total_size - && (normType == NORM_INF || normType == NORM_L1 || - normType == NORM_L2 || normType == NORM_L2SQR) ) + && cols > 0 && (size_t)rows*cols == total_size ) { - IppiSize sz = { cols, rows }; - int type = src1.type(); if( !mask.empty() ) { + IppiSize sz = { cols, rows }; + int type = src1.type(); + typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); ippiMaskNormDiffFuncC1 ippiNormDiff_C1MR = normType == NORM_INF ? (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR : -#endif type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR : 0) : normType == NORM_L1 ? (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR : -#if IPP_VERSION_X100 < 900 -#ifndef __APPLE__ - type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR : -#endif -#endif type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR : -#endif type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR : type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR : 0) : 0; @@ -3405,30 +3500,20 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra return true; } } -#ifndef __APPLE__ typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *); ippiMaskNormDiffFuncC3 ippiNormDiff_C3CMR = normType == NORM_INF ? (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR : -#endif type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR : type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR : 0) : normType == NORM_L1 ? (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR : -#endif type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR : type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR : -#if IPP_VERSION_X100 < 900 - type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR : -#endif type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR : type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR : 0) : 0; @@ -3448,83 +3533,46 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra return true; } } -#endif } else { + IppiSize sz = { cols*src1.channels(), rows }; + int type = src1.depth(); + typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *); ippiNormDiffFuncHint ippiNormDiffHint = normType == NORM_L1 ? - (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R : - type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R : - type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R : + (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? - (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R : - type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R : - type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R : + (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R : 0) : 0; ippiNormDiffFuncNoHint ippiNormDiff = normType == NORM_INF ? - (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R : - type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R : - type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R : - type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R : - type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R : - type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R : - type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R : -#if (IPP_VERSION_X100 >= 810) - type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 - type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 -#endif - type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R : - type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R : - type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R : + (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R : + type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R : + type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R : + type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R : 0) : normType == NORM_L1 ? - (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R : - type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R : - type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R : - type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R : - type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R : - type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R : -#if !(IPP_VERSION_X100 == 820 || IPP_VERSION_X100 == 821) // Oct 2014: Accuracy issue with IPP 8.2 / 8.2.1 - type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R : -#endif - type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R : - type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R : + (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R : + type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R : + type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R : 0) : normType == NORM_L2 || normType == NORM_L2SQR ? - (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R : - type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R : - type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R : - type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R : - type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R : - type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R : - type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R : - type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R : - type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R : + (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R : + type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R : + type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R : 0) : 0; - // Make sure only zero or one version of the function pointer is valid - CV_Assert(!ippiNormDiffHint || !ippiNormDiff); if( ippiNormDiffHint || ippiNormDiff ) { - Ipp64f norm_array[4]; - IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) : - CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array); + Ipp64f norm; + IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) : + CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm); if( ret >= 0 ) { - Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0]; - for( int i = 1; i < src1.channels(); i++ ) - { - norm = - normType == NORM_INF ? std::max(norm, norm_array[i]) : - normType == NORM_L1 ? norm + norm_array[i] : - normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] : - 0; - } - result = (normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm); + result = (normType == NORM_L2SQR) ? norm * norm : norm; return true; } } diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index d10b5f7dfa..fbbb804dbf 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -51,14 +51,6 @@ #pragma warning( disable: 4127 ) // conditional expression is constant #endif - -#if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700) -#define USE_IPP_CANNY 1 -#else -#define USE_IPP_CANNY 0 -#endif - - namespace cv { @@ -66,73 +58,79 @@ static void CannyImpl(Mat& dx_, Mat& dy_, Mat& _dst, double low_thresh, double h #ifdef HAVE_IPP -template -static bool ippCanny(const Mat& _src, const Mat& dx_, const Mat& dy_, Mat& _dst, float low, float high) +static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, float low, float high, bool L2gradient, int aperture_size) { +#ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() -#if USE_IPP_CANNY - if (!useCustomDeriv && _src.isSubmatrix()) - return false; // IPP Sobel doesn't support transparent ROI border +#if IPP_DISABLE_PERF_CANNY_MT + if(cv::getNumThreads()>1) + return false; +#endif - int size = 0, size1 = 0; - IppiSize roi = { _src.cols, _src.rows }; + ::ipp::IwiSize size(dst.cols, dst.rows); + IppDataType type = ippiGetDataType(dst.depth()); + int channels = dst.channels(); + IppNormType norm = (L2gradient)?ippNormL2:ippNormL1; - if (ippiCannyGetSize(roi, &size) < 0) + if(size.width <= 3 || size.height <= 3) return false; - if (!useCustomDeriv) + if(channels != 1) + return false; + + if(type != ipp8u) + return false; + + if(src.empty()) { -#if IPP_VERSION_X100 < 900 - if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0) - return false; - size = std::max(size, size1); - if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0) - return false; -#else - if (ippiFilterSobelNegVertBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size1) < 0) - return false; - size = std::max(size, size1); - if (ippiFilterSobelHorizBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size1) < 0) - return false; -#endif - size = std::max(size, size1); - } + try + { + ::ipp::IwiImage iwSrcDx; + ::ipp::IwiImage iwSrcDy; + ::ipp::IwiImage iwDst; - AutoBuffer buf(size + 64); - uchar* buffer = alignPtr((uchar*)buf, 32); + ippiGetImage(dx_, iwSrcDx); + ippiGetImage(dy_, iwSrcDy); + ippiGetImage(dst, iwDst); - Mat dx, dy; - if (!useCustomDeriv) - { - Mat _dx(_src.rows, _src.cols, CV_16S); - if( CV_INSTRUMENT_FUN_IPP(ippiFilterSobelNegVertBorder_8u16s_C1R, _src.ptr(), (int)_src.step, - _dx.ptr(), (int)_dx.step, roi, - ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high); + } + catch (::ipp::IwException ex) + { return false; - - Mat _dy(_src.rows, _src.cols, CV_16S); - if( CV_INSTRUMENT_FUN_IPP(ippiFilterSobelHorizBorder_8u16s_C1R, _src.ptr(), (int)_src.step, - _dy.ptr(), (int)_dy.step, roi, - ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) - return false; - - swap(dx, _dx); - swap(dy, _dy); + } } else { - dx = dx_; - dy = dy_; + IppiMaskSize kernel; + + if(aperture_size == 3) + kernel = ippMskSize3x3; + else if(aperture_size == 5) + kernel = ippMskSize5x5; + else + return false; + + try + { + ::ipp::IwiImage iwSrc; + ::ipp::IwiImage iwDst; + + ippiGetImage(src, iwSrc); + ippiGetImage(dst, iwDst); + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl); + } + catch (::ipp::IwException) + { + return false; + } } - if( CV_INSTRUMENT_FUN_IPP(ippiCanny_16s8u_C1R, dx.ptr(), (int)dx.step, - dy.ptr(), (int)dy.step, - _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 ) - return false; return true; #else - CV_UNUSED(_src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(_dst); CV_UNUSED(low); CV_UNUSED(high); + CV_UNUSED(src); CV_UNUSED(dx_); CV_UNUSED(dy_); CV_UNUSED(dst); CV_UNUSED(low); CV_UNUSED(high); CV_UNUSED(L2gradient); CV_UNUSED(aperture_size); return false; #endif } @@ -318,6 +316,8 @@ public: // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which // uses the pixels outside of the ROI to form a border. + // + // TODO: statement above is not true anymore, so adjustments may be required int ksize2 = aperture_size / 2; // If Scharr filter: aperture_size is 3 and ksize2 is 1 if(aperture_size == -1) @@ -882,18 +882,18 @@ void Canny( InputArray _src, OutputArray _dst, return; #endif - CV_IPP_RUN(USE_IPP_CANNY && (aperture_size == 3 && !L2gradient && 1 == cn), ippCanny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh)) + CV_IPP_RUN_FAST(ipp_Canny(src, Mat(), Mat(), dst, (float)low_thresh, (float)high_thresh, L2gradient, aperture_size)) -if (L2gradient) -{ - low_thresh = std::min(32767.0, low_thresh); - high_thresh = std::min(32767.0, high_thresh); + if (L2gradient) + { + low_thresh = std::min(32767.0, low_thresh); + high_thresh = std::min(32767.0, high_thresh); - if (low_thresh > 0) low_thresh *= low_thresh; - if (high_thresh > 0) high_thresh *= high_thresh; -} -int low = cvFloor(low_thresh); -int high = cvFloor(high_thresh); + if (low_thresh > 0) low_thresh *= low_thresh; + if (high_thresh > 0) high_thresh *= high_thresh; + } + int low = cvFloor(low_thresh); + int high = cvFloor(high_thresh); ptrdiff_t mapstep = src.cols + 2; AutoBuffer buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int)); @@ -938,15 +938,15 @@ int high = cvFloor(high_thresh); { m = borderPeaksParallel.front(); borderPeaksParallel.pop(); - if (!m[-1]) CANNY_PUSH_SERIAL(m - 1); - if (!m[1]) CANNY_PUSH_SERIAL(m + 1); - if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1); - if (!m[-mapstep]) CANNY_PUSH_SERIAL(m - mapstep); - if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1); - if (!m[mapstep-1]) CANNY_PUSH_SERIAL(m + mapstep - 1); - if (!m[mapstep]) CANNY_PUSH_SERIAL(m + mapstep); - if (!m[mapstep+1]) CANNY_PUSH_SERIAL(m + mapstep + 1); -} + if (!m[-1]) CANNY_PUSH_SERIAL(m - 1); + if (!m[1]) CANNY_PUSH_SERIAL(m + 1); + if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1); + if (!m[-mapstep]) CANNY_PUSH_SERIAL(m - mapstep); + if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1); + if (!m[mapstep-1]) CANNY_PUSH_SERIAL(m + mapstep - 1); + if (!m[mapstep]) CANNY_PUSH_SERIAL(m + mapstep); + if (!m[mapstep+1]) CANNY_PUSH_SERIAL(m + mapstep + 1); + } parallel_for_(Range(0, dst.rows), finalPass(map, dst, mapstep), dst.total()/(double)(1<<16)); } @@ -955,6 +955,8 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst, double low_thresh, double high_thresh, bool L2gradient ) { + CV_INSTRUMENT_REGION() + CV_Assert(_dx.dims() == 2); CV_Assert(_dx.type() == CV_16SC1 || _dx.type() == CV_16SC3); CV_Assert(_dy.type() == _dx.type()); @@ -975,7 +977,7 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst, Mat dx = _dx.getMat(); Mat dy = _dy.getMat(); - CV_IPP_RUN(USE_IPP_CANNY && (!L2gradient && 1 == cn), ippCanny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh)) + CV_IPP_RUN_FAST(ipp_Canny(Mat(), dx, dy, dst, (float)low_thresh, (float)high_thresh, L2gradient, 0)) if (cn > 1) { diff --git a/modules/imgproc/src/corner.cpp b/modules/imgproc/src/corner.cpp index da2e08ddb6..2f37ff6c90 100644 --- a/modules/imgproc/src/corner.cpp +++ b/modules/imgproc/src/corner.cpp @@ -604,9 +604,9 @@ namespace cv { static bool ipp_cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType ) { +#if IPP_VERSION_X100 >= 800 CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 800 Mat src = _src.getMat(); _dst.create( src.size(), CV_32FC1 ); Mat dst = _dst.getMat(); @@ -703,15 +703,11 @@ void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, in #if defined(HAVE_IPP) namespace cv { -static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType ) +static bool ipp_cornerHarris( Mat &src, Mat &dst, int blockSize, int ksize, double k, int borderType ) { +#if IPP_VERSION_X100 >= 810 CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK - Mat src = _src.getMat(); - _dst.create( src.size(), CV_32FC1 ); - Mat dst = _dst.getMat(); - { int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int borderTypeNI = borderType & ~BORDER_ISOLATED; @@ -734,17 +730,17 @@ static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize, if (ippiHarrisCornerGetBufferSize(roisize, masksize, blockSize, datatype, cn, &bufsize) >= 0) { - Ipp8u * buffer = ippsMalloc_8u(bufsize); + Ipp8u * buffer = (Ipp8u*)CV_IPP_MALLOC(bufsize); IppiDifferentialKernel filterType = ksize > 0 ? ippFilterSobel : ippFilterScharr; IppiBorderType borderTypeIpp = borderTypeNI == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl; IppStatus status = (IppStatus)-1; if (depth == CV_8U) - status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_8u32f_C1R,((const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize, - filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer)); + status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_8u32f_C1R, (const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize, + filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer); else if (depth == CV_32F) - status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_32f_C1R,((const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize, - filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer)); + status = CV_INSTRUMENT_FUN_IPP(ippiHarrisCorner_32f_C1R, (const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize, + filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer); ippsFree(buffer); if (status >= 0) @@ -756,7 +752,7 @@ static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize, } } #else - CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(blockSize); CV_UNUSED(ksize); CV_UNUSED(k); CV_UNUSED(borderType); + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(blockSize); CV_UNUSED(ksize); CV_UNUSED(k); CV_UNUSED(borderType); #endif return false; } @@ -770,19 +766,17 @@ void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksi CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS)) + Mat src = _src.getMat(); + _dst.create( src.size(), CV_32FC1 ); + Mat dst = _dst.getMat(); + #ifdef HAVE_IPP int borderTypeNI = borderType & ~BORDER_ISOLATED; bool isolated = (borderType & BORDER_ISOLATED) != 0; #endif CV_IPP_RUN(((ksize == 3 || ksize == 5) && (_src.type() == CV_8UC1 || _src.type() == CV_32FC1) && (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && CV_MAT_CN(_src.type()) == 1 && - (!_src.isSubmatrix() || isolated)) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK, ipp_cornerHarris( _src, _dst, blockSize, ksize, k, borderType )); - - - Mat src = _src.getMat(); - _dst.create( src.size(), CV_32FC1 ); - Mat dst = _dst.getMat(); - + (!_src.isSubmatrix() || isolated)) && IPP_VERSION_X100 >= 810, ipp_cornerHarris( src, dst, blockSize, ksize, k, borderType )); cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType ); } diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 51d0bc4f51..10d8315772 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -1360,14 +1360,14 @@ struct RowVec_32f { kernel = _kernel; haveSSE = checkHardwareSupport(CV_CPU_SSE); -#if defined USE_IPP_SEP_FILTERS && IPP_DISABLE_BLOCK +#if defined USE_IPP_SEP_FILTERS bufsz = -1; #endif } int operator()(const uchar* _src, uchar* _dst, int width, int cn) const { -#if defined USE_IPP_SEP_FILTERS && IPP_DISABLE_BLOCK +#if defined USE_IPP_SEP_FILTERS CV_IPP_CHECK() { int ret = ippiOperator(_src, _dst, width, cn); @@ -1408,7 +1408,7 @@ struct RowVec_32f Mat kernel; bool haveSSE; -#if defined USE_IPP_SEP_FILTERS && IPP_DISABLE_BLOCK +#if defined USE_IPP_SEP_FILTERS private: mutable int bufsz; int ippiOperator(const uchar* _src, uchar* _dst, int width, int cn) const @@ -1436,10 +1436,10 @@ private: float borderValue[] = {0.f, 0.f, 0.f}; // here is the trick. IPP needs border type and extrapolates the row. We did it already. // So we pass anchor=0 and ignore the right tail of results since they are incorrect there. - if( (cn == 1 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C1R,(src, step, &dst, roisz, _kx, _ksize, 0, - ippBorderRepl, borderValue[0], bufptr)) < 0) || - (cn == 3 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C3R,(src, step, &dst, roisz, _kx, _ksize, 0, - ippBorderRepl, borderValue, bufptr)) < 0)) + if( (cn == 1 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C1R, src, step, &dst, roisz, _kx, _ksize, 0, + ippBorderRepl, borderValue[0], bufptr) < 0) || + (cn == 3 && CV_INSTRUMENT_FUN_IPP(ippiFilterRowBorderPipeline_32f_C3R, src, step, &dst, roisz, _kx, _ksize, 0, + ippBorderRepl, borderValue, bufptr) < 0)) { setIppErrorStatus(); return 0; diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 13c11dbee3..6850499e65 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -96,7 +96,7 @@ HoughLinesStandard( const Mat& img, float rho, float theta, int numangle = cvRound((max_theta - min_theta) / theta); int numrho = cvRound(((width + height) * 2 + 1) / rho); -#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK +#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_HOUGH CV_IPP_CHECK() { IppiSize srcSize = { width, height }; @@ -108,8 +108,8 @@ HoughLinesStandard( const Mat& img, float rho, float theta, int linesCount = 0; lines.resize(ipp_linesMax); IppStatus ok = ippiHoughLineGetSize_8u_C1R(srcSize, delta, ipp_linesMax, &bufferSize); - Ipp8u* buffer = ippsMalloc_8u(bufferSize); - if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughLine_Region_8u32f_C1R,(image, step, srcSize, (IppPointPolar*) &lines[0], dstRoi, ipp_linesMax, &linesCount, delta, threshold, buffer))}; + Ipp8u* buffer = ippsMalloc_8u_L(bufferSize); + if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughLine_Region_8u32f_C1R, image, step, srcSize, (IppPointPolar*) &lines[0], dstRoi, ipp_linesMax, &linesCount, delta, threshold, buffer);}; ippsFree(buffer); if (ok >= 0) { @@ -429,7 +429,7 @@ HoughLinesProbabilistic( Mat& image, int numangle = cvRound(CV_PI / theta); int numrho = cvRound(((width + height) * 2 + 1) / rho); -#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK +#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_HOUGH CV_IPP_CHECK() { IppiSize srcSize = { width, height }; @@ -440,12 +440,12 @@ HoughLinesProbabilistic( Mat& image, int linesCount = 0; lines.resize(ipp_linesMax); IppStatus ok = ippiHoughProbLineGetSize_8u_C1R(srcSize, delta, &specSize, &bufferSize); - Ipp8u* buffer = ippsMalloc_8u(bufferSize); - pSpec = (IppiHoughProbSpec*) malloc(specSize); + Ipp8u* buffer = ippsMalloc_8u_L(bufferSize); + pSpec = (IppiHoughProbSpec*) ippsMalloc_8u_L(specSize); if (ok >= 0) ok = ippiHoughProbLineInit_8u32f_C1R(srcSize, delta, ippAlgHintNone, pSpec); - if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughProbLine_8u32f_C1R,(image.data, image.step, srcSize, threshold, lineLength, lineGap, (IppiPoint*) &lines[0], ipp_linesMax, &linesCount, buffer, pSpec))}; + if (ok >= 0) {ok = CV_INSTRUMENT_FUN_IPP(ippiHoughProbLine_8u32f_C1R, image.data, (int)image.step, srcSize, threshold, lineLength, lineGap, (IppiPoint*) &lines[0], ipp_linesMax, &linesCount, buffer, pSpec);}; - free(pSpec); + ippsFree(pSpec); ippsFree(buffer); if (ok >= 0) { diff --git a/modules/imgproc/src/moments.cpp b/modules/imgproc/src/moments.cpp index d445ed270d..d9ae2e2f25 100644 --- a/modules/imgproc/src/moments.cpp +++ b/modules/imgproc/src/moments.cpp @@ -556,13 +556,94 @@ static bool ocl_moments( InputArray _src, Moments& m, bool binary) m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); } + completeMomentState( &m ); + return true; } #endif -} +#ifdef HAVE_IPP +typedef IppStatus (CV_STDCALL * ippiMoments)(const void* pSrc, int srcStep, IppiSize roiSize, IppiMomentState_64f* pCtx); +static bool ipp_moments(Mat &src, Moments &m ) +{ +#if IPP_VERSION_X100 >= 900 + CV_INSTRUMENT_REGION_IPP() + + IppiSize roi = { src.cols, src.rows }; + IppiPoint point = { 0, 0 }; + int type = src.type(); + IppStatus ippStatus; + + IppAutoBuffer state; + int stateSize = 0; + + ippiMoments ippiMoments64f = + (type == CV_8UC1)?(ippiMoments)ippiMoments64f_8u_C1R: + (type == CV_16UC1)?(ippiMoments)ippiMoments64f_16u_C1R: + (type == CV_32FC1)?(ippiMoments)ippiMoments64f_32f_C1R: + NULL; + if(!ippiMoments64f) + return false; + + ippStatus = ippiMomentGetStateSize_64f(ippAlgHintAccurate, &stateSize); + if(ippStatus < 0) + return false; + + if(!state.allocate(stateSize) && stateSize) + return false; + + ippStatus = ippiMomentInit_64f(state, ippAlgHintAccurate); + if(ippStatus < 0) + return false; + + ippStatus = CV_INSTRUMENT_FUN_IPP(ippiMoments64f, src.ptr(), (int)src.step, roi, state); + if(ippStatus < 0) + return false; + + ippStatus = ippiGetSpatialMoment_64f(state, 0, 0, 0, point, &m.m00); + if(ippStatus < 0) + return false; + ippiGetSpatialMoment_64f(state, 1, 0, 0, point, &m.m10); + ippiGetSpatialMoment_64f(state, 0, 1, 0, point, &m.m01); + ippiGetSpatialMoment_64f(state, 2, 0, 0, point, &m.m20); + ippiGetSpatialMoment_64f(state, 1, 1, 0, point, &m.m11); + ippiGetSpatialMoment_64f(state, 0, 2, 0, point, &m.m02); + ippiGetSpatialMoment_64f(state, 3, 0, 0, point, &m.m30); + ippiGetSpatialMoment_64f(state, 2, 1, 0, point, &m.m21); + ippiGetSpatialMoment_64f(state, 1, 2, 0, point, &m.m12); + ippiGetSpatialMoment_64f(state, 0, 3, 0, point, &m.m03); + + ippStatus = ippiGetCentralMoment_64f(state, 2, 0, 0, &m.mu20); + if(ippStatus < 0) + return false; + ippiGetCentralMoment_64f(state, 1, 1, 0, &m.mu11); + ippiGetCentralMoment_64f(state, 0, 2, 0, &m.mu02); + ippiGetCentralMoment_64f(state, 3, 0, 0, &m.mu30); + ippiGetCentralMoment_64f(state, 2, 1, 0, &m.mu21); + ippiGetCentralMoment_64f(state, 1, 2, 0, &m.mu12); + ippiGetCentralMoment_64f(state, 0, 3, 0, &m.mu03); + + ippStatus = ippiGetNormalizedCentralMoment_64f(state, 2, 0, 0, &m.nu20); + if(ippStatus < 0) + return false; + ippiGetNormalizedCentralMoment_64f(state, 1, 1, 0, &m.nu11); + ippiGetNormalizedCentralMoment_64f(state, 0, 2, 0, &m.nu02); + ippiGetNormalizedCentralMoment_64f(state, 3, 0, 0, &m.nu30); + ippiGetNormalizedCentralMoment_64f(state, 2, 1, 0, &m.nu21); + ippiGetNormalizedCentralMoment_64f(state, 1, 2, 0, &m.nu12); + ippiGetNormalizedCentralMoment_64f(state, 0, 3, 0, &m.nu03); + + return true; +#else + CV_UNUSED(src); CV_UNUSED(m); + return false; +#endif +} +#endif + +} cv::Moments cv::moments( InputArray _src, bool binary ) { @@ -579,159 +660,93 @@ cv::Moments cv::moments( InputArray _src, bool binary ) return m; #ifdef HAVE_OPENCL - if( !(ocl::useOpenCL() && type == CV_8UC1 && - _src.isUMat() && ocl_moments(_src, m, binary)) ) + CV_OCL_RUN_(type == CV_8UC1 && _src.isUMat(), ocl_moments(_src, m, binary), m); #endif + + Mat mat = _src.getMat(); + if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S)) + return contourMoments(mat); + + if( cn > 1 ) + CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" ); + + CV_IPP_RUN(!binary, ipp_moments(mat, m), m); + + if( binary || depth == CV_8U ) + func = momentsInTile; + else if( depth == CV_16U ) + func = momentsInTile; + else if( depth == CV_16S ) + func = momentsInTile; + else if( depth == CV_32F ) + func = momentsInTile; + else if( depth == CV_64F ) + func = momentsInTile; + else + CV_Error( CV_StsUnsupportedFormat, "" ); + + Mat src0(mat); + + for( int y = 0; y < size.height; y += TILE_SIZE ) { - Mat mat = _src.getMat(); - if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S)) - return contourMoments(mat); + Size tileSize; + tileSize.height = std::min(TILE_SIZE, size.height - y); - if( cn > 1 ) - CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" ); - -#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK - CV_IPP_CHECK() + for( int x = 0; x < size.width; x += TILE_SIZE ) { - if (!binary) + tileSize.width = std::min(TILE_SIZE, size.width - x); + Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height)); + + if( binary ) { - IppiSize roi = { mat.cols, mat.rows }; - IppiMomentState_64f * moment = NULL; - // ippiMomentInitAlloc_64f, ippiMomentFree_64f are deprecated in 8.1, but there are not another way - // to initialize IppiMomentState_64f. When GetStateSize and Init functions will appear we have to - // change our code. - CV_SUPPRESS_DEPRECATED_START - if (ippiMomentInitAlloc_64f(&moment, ippAlgHintAccurate) >= 0) - { - typedef IppStatus (CV_STDCALL * ippiMoments)(const void * pSrc, int srcStep, IppiSize roiSize, IppiMomentState_64f* pCtx); - ippiMoments ippFunc = - type == CV_8UC1 ? (ippiMoments)ippiMoments64f_8u_C1R : - type == CV_16UC1 ? (ippiMoments)ippiMoments64f_16u_C1R : - type == CV_32FC1? (ippiMoments)ippiMoments64f_32f_C1R : 0; - - if (ippFunc) - { - if (CV_INSTRUMENT_FUN_IPP(ippFunc,(mat.data, (int)mat.step, roi, moment)) >= 0) - { - IppiPoint point = { 0, 0 }; - ippiGetSpatialMoment_64f(moment, 0, 0, 0, point, &m.m00); - ippiGetSpatialMoment_64f(moment, 1, 0, 0, point, &m.m10); - ippiGetSpatialMoment_64f(moment, 0, 1, 0, point, &m.m01); - - ippiGetSpatialMoment_64f(moment, 2, 0, 0, point, &m.m20); - ippiGetSpatialMoment_64f(moment, 1, 1, 0, point, &m.m11); - ippiGetSpatialMoment_64f(moment, 0, 2, 0, point, &m.m02); - - ippiGetSpatialMoment_64f(moment, 3, 0, 0, point, &m.m30); - ippiGetSpatialMoment_64f(moment, 2, 1, 0, point, &m.m21); - ippiGetSpatialMoment_64f(moment, 1, 2, 0, point, &m.m12); - ippiGetSpatialMoment_64f(moment, 0, 3, 0, point, &m.m03); - ippiGetCentralMoment_64f(moment, 2, 0, 0, &m.mu20); - ippiGetCentralMoment_64f(moment, 1, 1, 0, &m.mu11); - ippiGetCentralMoment_64f(moment, 0, 2, 0, &m.mu02); - ippiGetCentralMoment_64f(moment, 3, 0, 0, &m.mu30); - ippiGetCentralMoment_64f(moment, 2, 1, 0, &m.mu21); - ippiGetCentralMoment_64f(moment, 1, 2, 0, &m.mu12); - ippiGetCentralMoment_64f(moment, 0, 3, 0, &m.mu03); - ippiGetNormalizedCentralMoment_64f(moment, 2, 0, 0, &m.nu20); - ippiGetNormalizedCentralMoment_64f(moment, 1, 1, 0, &m.nu11); - ippiGetNormalizedCentralMoment_64f(moment, 0, 2, 0, &m.nu02); - ippiGetNormalizedCentralMoment_64f(moment, 3, 0, 0, &m.nu30); - ippiGetNormalizedCentralMoment_64f(moment, 2, 1, 0, &m.nu21); - ippiGetNormalizedCentralMoment_64f(moment, 1, 2, 0, &m.nu12); - ippiGetNormalizedCentralMoment_64f(moment, 0, 3, 0, &m.nu03); - - ippiMomentFree_64f(moment); - CV_IMPL_ADD(CV_IMPL_IPP); - return m; - } - setIppErrorStatus(); - } - ippiMomentFree_64f(moment); - } - else - setIppErrorStatus(); - CV_SUPPRESS_DEPRECATED_END + cv::Mat tmp(tileSize, CV_8U, nzbuf); + cv::compare( src, 0, tmp, CV_CMP_NE ); + src = tmp; } - } -#endif - if( binary || depth == CV_8U ) - func = momentsInTile; - else if( depth == CV_16U ) - func = momentsInTile; - else if( depth == CV_16S ) - func = momentsInTile; - else if( depth == CV_32F ) - func = momentsInTile; - else if( depth == CV_64F ) - func = momentsInTile; - else - CV_Error( CV_StsUnsupportedFormat, "" ); + double mom[10]; + func( src, mom ); - Mat src0(mat); - - for( int y = 0; y < size.height; y += TILE_SIZE ) - { - Size tileSize; - tileSize.height = std::min(TILE_SIZE, size.height - y); - - for( int x = 0; x < size.width; x += TILE_SIZE ) + if(binary) { - tileSize.width = std::min(TILE_SIZE, size.width - x); - Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height)); - - if( binary ) - { - cv::Mat tmp(tileSize, CV_8U, nzbuf); - cv::compare( src, 0, tmp, CV_CMP_NE ); - src = tmp; - } - - double mom[10]; - func( src, mom ); - - if(binary) - { - double s = 1./255; - for( int k = 0; k < 10; k++ ) - mom[k] *= s; - } - - double xm = x * mom[0], ym = y * mom[0]; - - // accumulate moments computed in each tile - - // + m00 ( = m00' ) - m.m00 += mom[0]; - - // + m10 ( = m10' + x*m00' ) - m.m10 += mom[1] + xm; - - // + m01 ( = m01' + y*m00' ) - m.m01 += mom[2] + ym; - - // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) - m.m20 += mom[3] + x * (mom[1] * 2 + xm); - - // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) - m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1]; - - // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) - m.m02 += mom[5] + y * (mom[2] * 2 + ym); - - // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) - m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); - - // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') - m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; - - // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') - m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; - - // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) - m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + double s = 1./255; + for( int k = 0; k < 10; k++ ) + mom[k] *= s; } + + double xm = x * mom[0], ym = y * mom[0]; + + // accumulate moments computed in each tile + + // + m00 ( = m00' ) + m.m00 += mom[0]; + + // + m10 ( = m10' + x*m00' ) + m.m10 += mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + m.m01 += mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + m.m20 += mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + m.m02 += mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); } } diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index ff260d1783..86d376bbd2 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -1200,7 +1200,7 @@ static bool ipp_pyrdown( InputArray _src, OutputArray _dst, const Size& _dsz, in { CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK +#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_DOWN Size dsz = _dsz.area() == 0 ? Size((_src.cols() + 1)/2, (_src.rows() + 1)/2) : _dsz; bool isolated = (borderType & BORDER_ISOLATED) != 0; int borderTypeNI = borderType & ~BORDER_ISOLATED; @@ -1235,7 +1235,7 @@ static bool ipp_pyrdown( InputArray _src, OutputArray _dst, const Size& _dsz, in CV_SUPPRESS_DEPRECATED_END if (ok >= 0) { - Ipp8u* buffer = ippsMalloc_8u(bufferSize); + Ipp8u* buffer = ippsMalloc_8u_L(bufferSize); ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer); ippsFree(buffer); @@ -1388,7 +1388,7 @@ static bool ipp_pyrup( InputArray _src, OutputArray _dst, const Size& _dsz, int { CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK +#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_UP Size sz = _src.dims() <= 2 ? _src.size() : Size(); Size dsz = _dsz.area() == 0 ? Size(_src.cols()*2, _src.rows()*2) : _dsz; @@ -1421,7 +1421,7 @@ static bool ipp_pyrup( InputArray _src, OutputArray _dst, const Size& _dsz, int CV_SUPPRESS_DEPRECATED_END if (ok >= 0) { - Ipp8u* buffer = ippsMalloc_8u(bufferSize); + Ipp8u* buffer = ippsMalloc_8u_L(bufferSize); ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer); ippsFree(buffer); @@ -1496,7 +1496,7 @@ static bool ipp_buildpyramid( InputArray _src, OutputArrayOfArrays _dst, int max { CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK +#if IPP_VERSION_X100 >= 810 && !IPP_DISABLE_PYRAMIDS_BUILD Mat src = _src.getMat(); _dst.create( maxlevel + 1, 1, 0 ); _dst.getMatRef(0) = src; @@ -1626,7 +1626,7 @@ void cv::buildPyramid( InputArray _src, OutputArrayOfArrays _dst, int maxlevel, int i=1; - CV_IPP_RUN(((IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK) && ((borderType & ~BORDER_ISOLATED) == BORDER_DEFAULT && (!_src.isSubmatrix() || ((borderType & BORDER_ISOLATED) != 0)))), + CV_IPP_RUN(((IPP_VERSION_X100 >= 810) && ((borderType & ~BORDER_ISOLATED) == BORDER_DEFAULT && (!_src.isSubmatrix() || ((borderType & BORDER_ISOLATED) != 0)))), ipp_buildpyramid( _src, _dst, maxlevel, borderType)); for( ; i <= maxlevel; i++ ) diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index ae6629f57c..81d3a7ccb0 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1734,98 +1734,84 @@ namespace cv } #endif -// TODO: IPP performance regression -#if defined(HAVE_IPP) && IPP_DISABLE_BLOCK +#if defined(HAVE_IPP) namespace cv { -static bool ipp_boxfilter( InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, - bool normalize, int borderType ) +static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType) { CV_INSTRUMENT_REGION_IPP() - int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); - if( ddepth < 0 ) - ddepth = sdepth; - int ippBorderType = borderType & ~BORDER_ISOLATED; + // Problem with SSE42 optimization for 16s +#if IPP_DISABLE_PERF_BOX16S_SSE42 + if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX)) + return false; +#endif + + int stype = src.type(), cn = CV_MAT_CN(stype); + IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED); + IppDataType ippType = ippiGetDataType(stype); Point ocvAnchor, ippAnchor; ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x; ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y; ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0); ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0); - Mat src = _src.getMat(); - _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); - Mat dst = _dst.getMat(); - if( borderType != BORDER_CONSTANT && normalize && (borderType & BORDER_ISOLATED) != 0 ) + if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() && + (ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */ + ippBorderType == ippBorderConst || + ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value { - if( src.rows == 1 ) - ksize.height = 1; - if( src.cols == 1 ) - ksize.width = 1; - } + IppStatus status; + Ipp32s bufSize = 0; + IppiSize roiSize = { dst.cols, dst.rows }; + IppiSize maskSize = { ksize.width, ksize.height }; + IppAutoBuffer buffer; - { - if (normalize && !src.isSubmatrix() && ddepth == sdepth && - (/*ippBorderType == BORDER_REPLICATE ||*/ /* returns ippStsStepErr: Step value is not valid */ - ippBorderType == BORDER_CONSTANT) && ocvAnchor == ippAnchor && - dst.cols != ksize.width && dst.rows != ksize.height) // returns ippStsMaskSizeErr: mask has an illegal value - { - Ipp32s bufSize = 0; - IppiSize roiSize = { dst.cols, dst.rows }, maskSize = { ksize.width, ksize.height }; + if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0) + return false; -#define IPP_FILTER_BOX_BORDER(ippType, ippDataType, flavor) \ - do \ - { \ - if (ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippDataType, cn, &bufSize) >= 0) \ - { \ - Ipp8u * buffer = ippsMalloc_8u(bufSize); \ - ippType borderValue[4] = { 0, 0, 0, 0 }; \ - ippBorderType = ippBorderType == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl; \ - IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr(), (int)src.step, dst.ptr(), \ - (int)dst.step, roiSize, maskSize, \ - (IppiBorderType)ippBorderType, borderValue, buffer); \ - ippsFree(buffer); \ - if (status >= 0) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return true; \ - } \ - } \ - } while ((void)0, 0) + buffer.allocate(bufSize); - if (stype == CV_8UC1) - IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C1R); - else if (stype == CV_8UC3) - IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C3R); - else if (stype == CV_8UC4) - IPP_FILTER_BOX_BORDER(Ipp8u, ipp8u, 8u_C4R); - - // Oct 2014: performance with BORDER_CONSTANT - //else if (stype == CV_16UC1) - // IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C1R); - else if (stype == CV_16UC3) - IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C3R); - else if (stype == CV_16UC4) - IPP_FILTER_BOX_BORDER(Ipp16u, ipp16u, 16u_C4R); - - // Oct 2014: performance with BORDER_CONSTANT - //else if (stype == CV_16SC1) - // IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C1R); - else if (stype == CV_16SC3) - IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C3R); - else if (stype == CV_16SC4) - IPP_FILTER_BOX_BORDER(Ipp16s, ipp16s, 16s_C4R); - - else if (stype == CV_32FC1) - IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C1R); - else if (stype == CV_32FC3) - IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C3R); - else if (stype == CV_32FC4) - IPP_FILTER_BOX_BORDER(Ipp32f, ipp32f, 32f_C4R); + #define IPP_FILTER_BOX_BORDER(ippType, flavor)\ + {\ + ippType borderValue[4] = { 0, 0, 0, 0 };\ + status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr(), (int)src.step, dst.ptr(),\ + (int)dst.step, roiSize, maskSize,\ + ippBorderType, borderValue, buffer);\ } -#undef IPP_FILTER_BOX_BORDER + + if (stype == CV_8UC1) + IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R) + else if (stype == CV_8UC3) + IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R) + else if (stype == CV_8UC4) + IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R) + else if (stype == CV_16UC1) + IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R) + else if (stype == CV_16UC3) + IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R) + else if (stype == CV_16UC4) + IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R) + else if (stype == CV_16SC1) + IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R) + else if (stype == CV_16SC3) + IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R) + else if (stype == CV_16SC4) + IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R) + else if (stype == CV_32FC1) + IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R) + else if (stype == CV_32FC3) + IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R) + else if (stype == CV_32FC4) + IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R) + else + return false; + + if(status >= 0) + return true; } +#undef IPP_FILTER_BOX_BORDER + return false; } } @@ -1866,19 +1852,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth, return; #endif -#if defined HAVE_IPP && IPP_DISABLE_BLOCK - int ippBorderType = borderType & ~BORDER_ISOLATED; - Point ocvAnchor, ippAnchor; - ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x; - ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y; - ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0); - ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0); - CV_IPP_RUN((normalize && !_src.isSubmatrix() && ddepth == sdepth && - (/*ippBorderType == BORDER_REPLICATE ||*/ /* returns ippStsStepErr: Step value is not valid */ - ippBorderType == BORDER_CONSTANT) && ocvAnchor == ippAnchor && - _dst.cols() != ksize.width && _dst.rows() != ksize.height), - ipp_boxfilter( _src, _dst, ddepth, ksize, anchor, normalize, borderType)); -#endif + CV_IPP_RUN_FAST(ipp_boxfilter(src, dst, ksize, anchor, normalize, borderType)); Point ofs; Size wsz(src.cols, src.rows); @@ -3691,53 +3665,6 @@ private: float *space_weight, *color_weight; }; -#if defined (HAVE_IPP) && IPP_DISABLE_BLOCK -class IPPBilateralFilter_8u_Invoker : - public ParallelLoopBody -{ -public: - IPPBilateralFilter_8u_Invoker(Mat &_src, Mat &_dst, double _sigma_color, double _sigma_space, int _radius, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), sigma_color(_sigma_color), sigma_space(_sigma_space), radius(_radius), ok(_ok) - { - *ok = true; - } - - virtual void operator() (const Range& range) const - { - int d = radius * 2 + 1; - IppiSize kernel = {d, d}; - IppiSize roi={dst.cols, range.end - range.start}; - int bufsize=0; - if (0 > ippiFilterBilateralGetBufSize_8u_C1R( ippiFilterBilateralGauss, roi, kernel, &bufsize)) - { - *ok = false; - return; - } - AutoBuffer buf(bufsize); - IppiFilterBilateralSpec *pSpec = (IppiFilterBilateralSpec *)alignPtr(&buf[0], 32); - if (0 > ippiFilterBilateralInit_8u_C1R( ippiFilterBilateralGauss, kernel, (Ipp32f)sigma_color, (Ipp32f)sigma_space, 1, pSpec )) - { - *ok = false; - return; - } - if (0 > ippiFilterBilateral_8u_C1R( src.ptr(range.start) + radius * ((int)src.step[0] + 1), (int)src.step[0], dst.ptr(range.start), (int)dst.step[0], roi, kernel, pSpec )) - *ok = false; - else - { - CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); - } - } -private: - Mat &src; - Mat &dst; - double sigma_color; - double sigma_space; - int radius; - bool *ok; - const IPPBilateralFilter_8u_Invoker& operator= (const IPPBilateralFilter_8u_Invoker&); -}; -#endif - #ifdef HAVE_OPENCL static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, @@ -3861,24 +3788,6 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d, Mat temp; copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); -#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700) && IPP_DISABLE_BLOCK - CV_IPP_CHECK() - { - if( cn == 1 ) - { - bool ok; - IPPBilateralFilter_8u_Invoker body(temp, dst, sigma_color * sigma_color, sigma_space * sigma_space, radius, &ok ); - parallel_for_(Range(0, dst.rows), body, dst.total()/(double)(1<<16)); - if( ok ) - { - CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); - return; - } - setIppErrorStatus(); - } - } -#endif - std::vector _color_weight(cn*256); std::vector _space_weight(d*d); std::vector _space_ofs(d*d); @@ -4293,6 +4202,107 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d, parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16)); } +#ifdef HAVE_IPP +#define IPP_BILATERAL_PARALLEL 1 + +#ifdef HAVE_IPP_IW +class ipp_bilateralFilterParallel: public ParallelLoopBody +{ +public: + ipp_bilateralFilterParallel(::ipp::IwiImage &_src, ::ipp::IwiImage &_dst, int _radius, Ipp32f _valSquareSigma, Ipp32f _posSquareSigma, ::ipp::IwiBorderType _borderType, bool *_ok): + src(_src), dst(_dst) + { + pOk = _ok; + + radius = _radius; + valSquareSigma = _valSquareSigma; + posSquareSigma = _posSquareSigma; + borderType = _borderType; + + *pOk = true; + } + ~ipp_bilateralFilterParallel() {} + + virtual void operator() (const Range& range) const + { + if(*pOk == false) + return; + + try + { + ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi); + } + catch(::ipp::IwException) + { + *pOk = false; + return; + } + } +private: + ::ipp::IwiImage &src; + ::ipp::IwiImage &dst; + + int radius; + Ipp32f valSquareSigma; + Ipp32f posSquareSigma; + ::ipp::IwiBorderType borderType; + + bool *pOk; + const ipp_bilateralFilterParallel& operator= (const ipp_bilateralFilterParallel&); +}; +#endif + +static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, double sigmaSpace, int borderType) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + int radius = IPP_MAX(((d <= 0)?cvRound(sigmaSpace*1.5):d/2), 1); + Ipp32f valSquareSigma = (Ipp32f)((sigmaColor <= 0)?1:sigmaColor*sigmaColor); + Ipp32f posSquareSigma = (Ipp32f)((sigmaSpace <= 0)?1:sigmaSpace*sigmaSpace); + + // Acquire data and begin processing + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + ::ipp::IwiBorderSize borderSize(radius); + ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); + if(!ippBorder.m_borderType) + return false; + + // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling + if((((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem)) + return false; + + bool ok = true; + int threads = ippiSuggestThreadsNum(iwDst, 2); + Range range(0, (int)iwDst.m_size.height); + ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok); + if(!ok) + return false; + + if(IPP_BILATERAL_PARALLEL && threads > 1) + parallel_for_(range, invoker, threads*4); + else + invoker(range); + + if(!ok) + return false; + } + catch (::ipp::IwException) + { + return false; + } + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(d); CV_UNUSED(sigmaColor); CV_UNUSED(sigmaSpace); CV_UNUSED(borderType); + return false; +#endif +} +#endif + } void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d, @@ -4308,6 +4318,8 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d, Mat src = _src.getMat(), dst = _dst.getMat(); + CV_IPP_RUN_FAST(ipp_bilateralFilter(src, dst, d, sigmaColor, sigmaSpace, borderType)); + if( src.depth() == CV_8U ) bilateralFilter_8u( src, dst, d, sigmaColor, sigmaSpace, borderType ); else if( src.depth() == CV_32F ) diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.cpp index c9793b299a..d19ef3b0a9 100755 --- a/modules/imgproc/src/sumpixels.cpp +++ b/modules/imgproc/src/sumpixels.cpp @@ -405,58 +405,43 @@ static bool ipp_integral( const uchar* src, size_t srcstep, uchar* sum, size_t sumstep, uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, int width, int height, int cn) { CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 != 900 // Disabled on ICV due invalid results - if( sdepth <= 0 ) - sdepth = depth == CV_8U ? CV_32S : CV_64F; - if ( sqdepth <= 0 ) - sqdepth = CV_64F; - sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth); + IppiSize size = {width, height}; - if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !sqsum || sqdepth == CV_64F ) && ( cn == 1 ) ) + if(cn > 1) + return false; + if(tilted) { - IppStatus status = ippStsErr; - IppiSize srcRoiSize = ippiSize( width, height ); - if( sdepth == CV_32F ) - { - if( sqsum ) - { - status = CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, srcRoiSize, 0, 0); - } - else - { - status = CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, srcRoiSize, 0); - } - } - else if( sdepth == CV_32S ) - { - if( sqsum ) - { - status = CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, srcRoiSize, 0, 0); - } - else - { - status = CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, srcRoiSize, 0); - } - } - if (0 <= status) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return true; - } + CV_UNUSED(tstep); + return false; + } + + if(!sqsum) + { + if(depth == CV_8U && sdepth == CV_32S) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; + else if(depth == CV_8UC1 && sdepth == CV_32F) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; + else if(depth == CV_32FC1 && sdepth == CV_32F) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; + else + return false; + } + else + { + if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else + return false; } -#else - CV_UNUSED(depth); CV_UNUSED(sdepth); CV_UNUSED(sqdepth); - CV_UNUSED(src); CV_UNUSED(srcstep); - CV_UNUSED(sum); CV_UNUSED(sumstep); - CV_UNUSED(sqsum); CV_UNUSED(sqsumstep); - CV_UNUSED(tilted); CV_UNUSED(tstep); - CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn); -#endif - return false; } } #endif @@ -471,12 +456,7 @@ void integral(int depth, int sdepth, int sqdepth, int width, int height, int cn) { CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn); - CV_IPP_RUN(( depth == CV_8U ) - && ( sdepth == CV_32F || sdepth == CV_32S ) - && ( !tilted ) - && ( !sqsum || sqdepth == CV_64F ) - && ( cn == 1 ), - ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, width, height, cn)); + CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)); #define ONE_CALL(A, B, C) integral_((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) diff --git a/modules/imgproc/test/test_bilateral_filter.cpp b/modules/imgproc/test/test_bilateral_filter.cpp index 994f8544e8..48f066b6cd 100644 --- a/modules/imgproc/test/test_bilateral_filter.cpp +++ b/modules/imgproc/test/test_bilateral_filter.cpp @@ -251,20 +251,23 @@ namespace cvtest int CV_BilateralFilterTest::validate_test_results(int test_case_index) { - static const double eps = 4; - + double eps = (_src.depth() < CV_32F)?1:5e-3; + double e; Mat reference_dst, reference_src; if (_src.depth() == CV_32F) + { reference_bilateral_filter(_src, reference_dst, _d, _sigma_color, _sigma_space); + e = cvtest::norm(reference_dst, _parallel_dst, NORM_INF|NORM_RELATIVE); + } else { int type = _src.type(); _src.convertTo(reference_src, CV_32F); reference_bilateral_filter(reference_src, reference_dst, _d, _sigma_color, _sigma_space); reference_dst.convertTo(reference_dst, type); + e = cvtest::norm(reference_dst, _parallel_dst, NORM_INF); } - double e = cvtest::norm(reference_dst, _parallel_dst, NORM_L2); if (e > eps) { ts->printf(cvtest::TS::CONSOLE, "actual error: %g, expected: %g", e, eps); diff --git a/modules/imgproc/test/test_houghLines.cpp b/modules/imgproc/test/test_houghLines.cpp index 93a1202ee1..a233342ae2 100644 --- a/modules/imgproc/test/test_houghLines.cpp +++ b/modules/imgproc/test/test_houghLines.cpp @@ -189,7 +189,7 @@ void BaseHoughLineTest::run_test(int type) else if (type == PROBABILISTIC) count = countMatIntersection(exp_lines, lines, 1e-4f, 0.f); -#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK +#if defined HAVE_IPP && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_HOUGH EXPECT_GE( count, (int) (exp_lines.total() * 0.8) ); #else EXPECT_EQ( count, (int) exp_lines.total()); diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 1670ddf69f..57994fc385 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -484,6 +484,8 @@ bool FeatureEvaluator::updateScaleData( Size imgsz, const std::vector& _s bool FeatureEvaluator::setImage( InputArray _image, const std::vector& _scales ) { + CV_INSTRUMENT_REGION() + Size imgsz = _image.size(); bool recalcOptFeatures = updateScaleData(imgsz, _scales); @@ -628,6 +630,8 @@ Ptr HaarEvaluator::clone() const void HaarEvaluator::computeChannels(int scaleIdx, InputArray img) { + CV_INSTRUMENT_REGION() + const ScaleData& s = scaleData->at(scaleIdx); sqofs = hasTiltedFeatures ? sbufSize.area() * 2 : sbufSize.area(); @@ -670,6 +674,8 @@ void HaarEvaluator::computeChannels(int scaleIdx, InputArray img) void HaarEvaluator::computeOptFeatures() { + CV_INSTRUMENT_REGION() + if (hasTiltedFeatures) tofs = sbufSize.area(); @@ -916,6 +922,8 @@ void CascadeClassifierImpl::read(const FileNode& node) int CascadeClassifierImpl::runAt( Ptr& evaluator, Point pt, int scaleIdx, double& weight ) { + CV_INSTRUMENT_REGION() + assert( !oldCascade && (data.featureType == FeatureEvaluator::HAAR || data.featureType == FeatureEvaluator::LBP || @@ -984,6 +992,8 @@ public: void operator()(const Range& range) const { + CV_INSTRUMENT_REGION() + Ptr evaluator = classifier->featureEvaluator->clone(); double gypWeight = 0.; Size origWinSize = classifier->data.origWinSize; diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp index a395428d67..f359ef5623 100644 --- a/modules/objdetect/src/cascadedetect.hpp +++ b/modules/objdetect/src/cascadedetect.hpp @@ -489,6 +489,8 @@ template inline int predictOrdered( CascadeClassifierImpl& cascade, Ptr &_featureEvaluator, double& sum ) { + CV_INSTRUMENT_REGION() + int nstages = (int)cascade.data.stages.size(); int nodeOfs = 0, leafOfs = 0; FEval& featureEvaluator = (FEval&)*_featureEvaluator; @@ -529,6 +531,8 @@ template inline int predictCategorical( CascadeClassifierImpl& cascade, Ptr &_featureEvaluator, double& sum ) { + CV_INSTRUMENT_REGION() + int nstages = (int)cascade.data.stages.size(); int nodeOfs = 0, leafOfs = 0; FEval& featureEvaluator = (FEval&)*_featureEvaluator; @@ -571,6 +575,8 @@ template inline int predictOrderedStump( CascadeClassifierImpl& cascade, Ptr &_featureEvaluator, double& sum ) { + CV_INSTRUMENT_REGION() + CV_Assert(!cascade.data.stumps.empty()); FEval& featureEvaluator = (FEval&)*_featureEvaluator; const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0]; @@ -608,6 +614,8 @@ template inline int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr &_featureEvaluator, double& sum ) { + CV_INSTRUMENT_REGION() + CV_Assert(!cascade.data.stumps.empty()); int nstages = (int)cascade.data.stages.size(); FEval& featureEvaluator = (FEval&)*_featureEvaluator; diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp index bb37ee91e0..c66d4de329 100644 --- a/modules/objdetect/src/haar.cpp +++ b/modules/objdetect/src/haar.cpp @@ -340,8 +340,8 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade ) out->isStumpBased &= node_count == 1; } } -/* -#ifdef HAVE_IPP + +#if defined HAVE_IPP && !IPP_DISABLE_HAAR int can_use_ipp = CV_IPP_CHECK_COND && (!out->has_tilted_features && !out->is_tree && out->isStumpBased); if( can_use_ipp ) @@ -396,7 +396,7 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade ) } } #endif -*/ + cascade->hid_cascade = out; assert( (char*)haar_node_ptr - (char*)out <= datasize );