diff --git a/modules/core/perf/perf_dft.cpp b/modules/core/perf/perf_dft.cpp index a2d3d503d1..1a025affc3 100644 --- a/modules/core/perf/perf_dft.cpp +++ b/modules/core/perf/perf_dft.cpp @@ -6,6 +6,8 @@ using namespace perf; using std::tr1::make_tuple; using std::tr1::get; +///////////////////////////////////////////////////////dft////////////////////////////////////////////////////////////// + #define MAT_TYPES_DFT CV_32FC1, CV_32FC2, CV_64FC1 #define MAT_SIZES_DFT cv::Size(320, 480), cv::Size(800, 600), cv::Size(1280, 1024), sz1080p, sz2K CV_ENUM(FlagsType, 0, DFT_INVERSE, DFT_SCALE, DFT_COMPLEX_OUTPUT, DFT_ROWS, DFT_INVERSE|DFT_COMPLEX_OUTPUT) @@ -27,5 +29,35 @@ PERF_TEST_P(Size_MatType_FlagsType, dft, TEST_MATS_DFT) TEST_CYCLE() dft(src, dst, flags); + SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); +} + +///////////////////////////////////////////////////////dct////////////////////////////////////////////////////// + +CV_ENUM(DCT_FlagsType, 0, DCT_INVERSE , DCT_ROWS, DCT_INVERSE|DCT_ROWS) + +typedef std::tr1::tuple Size_MatType_Flag_t; +typedef perf::TestBaseWithParam Size_MatType_Flag; + +PERF_TEST_P(Size_MatType_Flag, dct, testing::Combine( + testing::Values(cv::Size(320, 240),cv::Size(800, 600), + cv::Size(1024, 768), cv::Size(1280, 1024), + sz1080p, sz2K), + testing::Values(CV_32FC1, CV_64FC1), DCT_FlagsType::all())) +{ + Size sz = get<0>(GetParam()); + int type = get<1>(GetParam()); + int flags = get<2>(GetParam()); + + Mat src(sz, type); + Mat dst(sz, type); + + declare + .in(src, WARMUP_RNG) + .out(dst) + .time(60); + + TEST_CYCLE() dct(src, dst, flags); + SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); } \ No newline at end of file diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index e3525b98aa..6a1238ade2 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2880,6 +2880,131 @@ static void IDCT_64f(const double* src, int src_step, double* dft_src, double* d } +namespace cv +{ +#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 && !defined HAVE_IPP_ICV_ONLY + +typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*); +typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm); +typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec); +typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*); + +template +class DctIPPLoop_Invoker : public ParallelLoopBody +{ +public: + + DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct* _ippidct, bool _inv, bool *_ok) : + ParallelLoopBody(), src(&_src), dst(&_dst), ippidct(_ippidct), inv(_inv), ok(_ok) + { + *ok = true; + } + + virtual void operator()(const Range& range) const + { + void* pDCTSpec; + AutoBuffer buf; + uchar* pBuffer = 0; + int bufSize=0; + + IppiSize srcRoiSize = {src->cols, 1}; + + CV_SUPPRESS_DEPRECATED_START + + ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f; + ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f; + ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f; + + if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0) + { + buf.allocate( bufSize ); + pBuffer = (uchar*)buf; + + for( int i = range.start; i < range.end; ++i) + if(!(*ippidct)((float*)(src->data+i*src->step), (int)src->step,(float*)(dst->data+i*dst->step), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer)) + *ok = false; + } + else + *ok = false; + + if (pDCTSpec) + ippFree(pDCTSpec); + + CV_SUPPRESS_DEPRECATED_END + } + +private: + const Mat* src; + Mat* dst; + const Dct* ippidct; + bool inv; + bool *ok; +}; + +template +bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv) +{ + bool ok; + parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, &ippidct, inv, &ok), src.rows/(double)(1<<4) ); + return ok; +} + +struct IPPDCTFunctor +{ + IPPDCTFunctor(ippiDCTFunc _func) : func(_func){} + + bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const + { + return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false; + } +private: + ippiDCTFunc func; +}; + +static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) +{ + ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ; + + if (row) + return(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv)); + else + { + IppStatus status; + void* pDCTSpec; + AutoBuffer buf; + uchar* pBuffer = 0; + int bufSize=0; + + IppiSize srcRoiSize = {src.cols, src.rows}; + + CV_SUPPRESS_DEPRECATED_START + + ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f; + ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f; + ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f; + + status = ippStsErr; + + if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0) + { + buf.allocate( bufSize ); + pBuffer = (uchar*)buf; + + status = ippFunc((float*)src.data, (int)src.step, (float*)dst.data, (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer); + } + + if (pDCTSpec) + ippFree(pDCTSpec); + + CV_SUPPRESS_DEPRECATED_END + + return status >= 0; + } +} + +#endif +} + void cv::dct( InputArray _src0, OutputArray _dst, int flags ) { static DCTFunc dct_tbl[4] = @@ -2893,7 +3018,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) bool inv = (flags & DCT_INVERSE) != 0; Mat src0 = _src0.getMat(), src = src0; int type = src.type(), depth = src.depth(); - void /* *spec_dft = 0, */ *spec = 0; + void *spec = 0; double scale = 1.; int prev_len = 0, nf = 0, stage, end_stage; @@ -2910,6 +3035,16 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) _dst.create( src.rows, src.cols, type ); Mat dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && !defined HAVE_IPP_ICV_ONLY + bool row = (flags & DCT_ROWS) != 0; + if (src.type() == CV_32F) + { + if(ippi_DCT_32f(src,dst,inv, row)) + return; + setIppErrorStatus(); + } +#endif + DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2]; if( (flags & DCT_ROWS) || src.rows == 1 || @@ -2962,27 +3097,6 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) spec = 0; inplace_transform = 1; - /*if( len*count >= 64 && DFTInitAlloc_R_32f_p ) - { - int ipp_sz = 0; - if( depth == CV_32F ) - { - if( spec_dft ) - IPPI_CALL( DFTFree_R_32f_p( spec_dft )); - IPPI_CALL( DFTInitAlloc_R_32f_p( &spec_dft, len, 8, cvAlgHintNone )); - IPPI_CALL( DFTGetBufSize_R_32f_p( spec_dft, &ipp_sz )); - } - else - { - if( spec_dft ) - IPPI_CALL( DFTFree_R_64f_p( spec_dft )); - IPPI_CALL( DFTInitAlloc_R_64f_p( &spec_dft, len, 8, cvAlgHintNone )); - IPPI_CALL( DFTGetBufSize_R_64f_p( spec_dft, &ipp_sz )); - } - spec = spec_dft; - sz += ipp_sz; - } - else*/ { sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size; diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index c03d4b9a8e..55a58c7fe5 100644 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -577,7 +577,7 @@ trueDistTrans( const Mat& src, Mat& dst ) for( ; i <= m*3; i++ ) sat_tab[i] = i - shift; - cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab)); + cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab), src.total()/(double)(1<<16)); // stage 2: compute modified distance transform for each row float* inv_tab = sqr_tab + n;