From 863d61e9eb64bd7d2ab6ddd2513719e32b7a5716 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 8 Nov 2010 09:55:10 +0000 Subject: [PATCH] fix gpu module compilation under linux --- modules/gpu/src/arithm.cpp | 289 ++++++++++++++++++-------- modules/gpu/src/graphcuts.cpp | 12 +- modules/gpu/src/imgproc_gpu.cpp | 288 +++++++++++++------------ modules/gpu/src/matrix_operations.cpp | 88 +++++--- 4 files changed, 425 insertions(+), 252 deletions(-) diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index db118d0f24..510ec114ea 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -84,162 +84,230 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, #else /* !defined (HAVE_CUDA) */ +#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR) + +#if (defined(_WIN32) || defined(_WIN64)) && (NPP_VERSION >= 32) +# define NPP_HAVE_COMPLEX_TYPE +#endif + //////////////////////////////////////////////////////////////////////// // add subtract multiply divide namespace { - typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, - NppiSize oSizeROI, int nScaleFactor); - typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, - int nDstStep, NppiSize oSizeROI); - typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, - int nDstStep, NppiSize oSizeROI); + typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, + NppiSize oSizeROI, int nScaleFactor); + typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, + int nDstStep, NppiSize oSizeROI); + typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, + int nDstStep, NppiSize oSizeROI); - void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, - npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, + void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, + npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1) - { + { CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); +#if NPP_VERSION >= 32 CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); +#else + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1); +#endif dst.create( src1.size(), src1.type() ); - NppiSize sz; - sz.width = src1.cols; - sz.height = src1.rows; + NppiSize sz; + sz.width = src1.cols; + sz.height = src1.rows; switch (src1.type()) { case CV_8UC1: - nppSafeCall( npp_func_8uc1(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz, 0) ); + nppSafeCall( npp_func_8uc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz, 0) ); break; case CV_8UC4: - nppSafeCall( npp_func_8uc4(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz, 0) ); + nppSafeCall( npp_func_8uc4(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz, 0) ); break; +#if NPP_VERSION >= 32 case CV_32SC1: - nppSafeCall( npp_func_32sc1(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); + nppSafeCall( npp_func_32sc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); break; +#endif case CV_32FC1: - nppSafeCall( npp_func_32fc1(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); + nppSafeCall( npp_func_32fc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); break; default: CV_Assert(!"Unsupported source type"); } - } + } template struct NppArithmScalarFunc; template<> struct NppArithmScalarFunc<1> { - typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, + typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, int nDstStep, NppiSize oSizeROI); }; +#ifdef NPP_HAVE_COMPLEX_TYPE template<> struct NppArithmScalarFunc<2> - { + { typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst, int nDstStep, NppiSize oSizeROI); }; +#endif template::func_ptr func> struct NppArithmScalar; template::func_ptr func> struct NppArithmScalar<1, func> { static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst) - { + { dst.create(src.size(), src.type()); - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; - nppSafeCall( func(src.ptr(), src.step, (Npp32f)sc[0], dst.ptr(), dst.step, sz) ); - } + nppSafeCall( func(src.ptr(), src.step, (Npp32f)sc[0], dst.ptr(), dst.step, sz) ); + } }; +#ifdef NPP_HAVE_COMPLEX_TYPE template::func_ptr func> struct NppArithmScalar<2, func> { static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst) - { + { dst.create(src.size(), src.type()); - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; Npp32fc nValue; nValue.re = (Npp32f)sc[0]; nValue.im = (Npp32f)sc[1]; - nppSafeCall( func(src.ptr(), src.step, nValue, dst.ptr(), dst.step, sz) ); - } + nppSafeCall( func(src.ptr(), src.step, nValue, dst.ptr(), dst.step, sz) ); + } }; +#endif } void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { +#if NPP_VERSION >= 32 nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R); +#else + nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, 0, nppiAdd_32f_C1R); +#endif } -void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R); +#if NPP_VERSION >= 32 + nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R); +#else + nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, 0, nppiSub_32f_C1R); +#endif } void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R); +#if NPP_VERSION >= 32 + nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R); +#else + nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, 0, nppiMul_32f_C1R); +#endif } void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R); +#if NPP_VERSION >= 32 + nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R); +#else + nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, 0, nppiDiv_32f_C1R); +#endif } void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst) { +#ifdef NPP_HAVE_COMPLEX_TYPE typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc}; + static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc}; CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); callers[src.channels()](src, sc, dst); +#else +# if NPP_VERSION >= 32 + CV_Assert(src.type() == CV_32FC1); + NppArithmScalar<1, nppiAddC_32f_C1R>::calc(src, sc, dst); +# else + CV_Assert(!"This function doesn't supported"); +# endif +#endif } void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst) { +#ifdef NPP_HAVE_COMPLEX_TYPE typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc}; + static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc}; CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); callers[src.channels()](src, sc, dst); +#else +# if NPP_VERSION >= 32 + CV_Assert(src.type() == CV_32FC1); + NppArithmScalar<1, nppiSubC_32f_C1R>::calc(src, sc, dst); +# else + CV_Assert(!"This function doesn't supported"); +# endif +#endif } void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst) { +#ifdef NPP_HAVE_COMPLEX_TYPE typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc}; + static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc}; CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); callers[src.channels()](src, sc, dst); +#else +# if NPP_VERSION >= 32 + CV_Assert(src.type() == CV_32FC1); + NppArithmScalar<1, nppiMulC_32f_C1R>::calc(src, sc, dst); +# else + CV_Assert(!"This function doesn't supported"); +# endif +#endif } void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst) { +#ifdef NPP_HAVE_COMPLEX_TYPE typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc}; + static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc}; CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); callers[src.channels()](src, sc, dst); +#else +# if NPP_VERSION >= 32 + CV_Assert(src.type() == CV_32FC1); + NppArithmScalar<1, nppiDivC_32f_C1R>::calc(src, sc, dst); +# else + CV_Assert(!"This function doesn't supported"); +# endif +#endif } //////////////////////////////////////////////////////////////////////// @@ -263,9 +331,13 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst) void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) { - CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); + CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); - CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); +#if NPP_VERSION >= 32 + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); +#else + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1); +#endif dst.create( src1.size(), src1.type() ); @@ -276,20 +348,22 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) switch (src1.type()) { case CV_8UC1: - nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr(), src1.step, - src2.ptr(), src2.step, + nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr(), src1.step, + src2.ptr(), src2.step, dst.ptr(), dst.step, sz) ); break; case CV_8UC4: - nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr(), src1.step, - src2.ptr(), src2.step, + nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr(), src1.step, + src2.ptr(), src2.step, dst.ptr(), dst.step, sz) ); break; +#if NPP_VERSION >= 32 case CV_32SC1: nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz) ); break; +#endif case CV_32FC1: nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr(), src1.step, src2.ptr(), src2.step, @@ -302,7 +376,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst) { - CV_Assert(src.type() == CV_32FC1); +#if NPP_VERSION >= 32 + CV_Assert(src.type() == CV_32FC1); dst.create( src.size(), src.type() ); @@ -311,6 +386,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst) sz.height = src.rows; nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, (Npp32f)s[0]) ); +#else + CV_Assert(!"This function doesn't supported"); +#endif } //////////////////////////////////////////////////////////////////////// @@ -322,7 +400,7 @@ namespace cv { namespace gpu { namespace mathfunc void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst); }}} -void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) +void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) { CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); @@ -340,8 +418,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c { if (cmpop != CMP_NE) { - nppSafeCall( nppiCompare_8u_C4R(src1.ptr(), src1.step, - src2.ptr(), src2.step, + nppSafeCall( nppiCompare_8u_C4R(src1.ptr(), src1.step, + src2.ptr(), src2.step, dst.ptr(), dst.step, sz, nppCmpOp[cmpop]) ); } else @@ -367,7 +445,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c //////////////////////////////////////////////////////////////////////// // meanStdDev -void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) +void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) { CV_Assert(src.type() == CV_8UC1); @@ -381,7 +459,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) //////////////////////////////////////////////////////////////////////// // norm -double cv::gpu::norm(const GpuMat& src1, int normType) +double cv::gpu::norm(const GpuMat& src1, int normType) { return norm(src1, GpuMat(src1.size(), src1.type(), Scalar::all(0.0)), normType); } @@ -393,7 +471,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) CV_Assert(src1.type() == CV_8UC1); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); - typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, + typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; @@ -405,8 +483,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) int funcIdx = normType >> 1; double retVal; - nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, - src2.ptr(), src2.step, + nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, + src2.ptr(), src2.step, sz, &retVal) ); return retVal; @@ -427,14 +505,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode) if (src.type() == CV_8UC1) { - nppSafeCall( nppiMirror_8u_C1R(src.ptr(), src.step, - dst.ptr(), dst.step, sz, + nppSafeCall( nppiMirror_8u_C1R(src.ptr(), src.step, + dst.ptr(), dst.step, sz, (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); } else { - nppSafeCall( nppiMirror_8u_C4R(src.ptr(), src.step, - dst.ptr(), dst.step, sz, + nppSafeCall( nppiMirror_8u_C4R(src.ptr(), src.step, + dst.ptr(), dst.step, sz, (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); } } @@ -444,33 +522,40 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode) Scalar cv::gpu::sum(const GpuMat& src) { - CV_Assert(!"disabled until fix crash"); - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); NppiSize sz; sz.width = src.cols; sz.height = src.rows; + Scalar res; +#if NPP_VERSION >= 32 + CV_Assert(!"disabled until fix crash"); + int bufsz; - + if (src.type() == CV_8UC1) - { + { nppiReductionGetBufferHostSize_8u_C1R(sz, &bufsz); GpuMat buf(1, bufsz, CV_32S); - Scalar res; nppSafeCall( nppiSum_8u_C1R(src.ptr(), src.step, sz, buf.ptr(), res.val) ); - return res; } else - { + { nppiReductionGetBufferHostSize_8u_C4R(sz, &bufsz); GpuMat buf(1, bufsz, CV_32S); - Scalar res; nppSafeCall( nppiSum_8u_C4R(src.ptr(), src.step, sz, buf.ptr(), res.val) ); - return res; } +#else + if (src.type() == CV_8UC1) + nppSafeCall( nppiSum_8u_C1R(src.ptr(), src.step, sz, res.val) ); + else + nppSafeCall( nppiSum_8u_C4R(src.ptr(), src.step, sz, res.val) ); +#endif + + return res; } //////////////////////////////////////////////////////////////////////// @@ -501,22 +586,30 @@ namespace sz.width = src.cols; sz.height = src.rows; - Npp8u* cuMin = nppsMalloc_8u(4); - Npp8u* cuMax = nppsMalloc_8u(4); + Npp8u* cuMem; - nppSafeCall( nppiMinMax_8u_C4R(src.ptr(), src.step, sz, cuMin, cuMax) ); +#if NPP_VERSION >= 32 + cuMem = nppsMalloc_8u(8); +#else + cudaSafeCall( cudaMalloc((void**)&cuMem, 8 * sizeof(Npp8u)) ); +#endif + + nppSafeCall( nppiMinMax_8u_C4R(src.ptr(), src.step, sz, cuMem, cuMem + 4) ); if (minVal) - cudaMemcpy(minVal, cuMin, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost); + cudaMemcpy(minVal, cuMem, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost); if (maxVal) - cudaMemcpy(maxVal, cuMax, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost); + cudaMemcpy(maxVal, cuMem + 4, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost); - nppsFree(cuMin); - nppsFree(cuMax); +#if NPP_VERSION >= 32 + nppsFree(cuMem); +#else + cudaSafeCall( cudaFree(cuMem) ); +#endif } } -void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) +void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) { typedef void (*minMax_t)(const GpuMat& src, double* minVal, double* maxVal); static const minMax_t minMax_callers[] = {0, minMax_c1, 0, 0, minMax_c4}; @@ -559,13 +652,13 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) NppiSize sz; sz.height = src.rows; sz.width = src.cols; - + Mat nppLut; lut.convertTo(nppLut, CV_32S); if (src.type() == CV_8UC1) { - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, + nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, nppLut.ptr(), lvls.pLevels, 256) ); } else @@ -578,10 +671,10 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) { cv::split(nppLut, nppLut3); pValues3[0] = nppLut3[0].ptr(); - pValues3[1] = nppLut3[1].ptr(); + pValues3[1] = nppLut3[1].ptr(); pValues3[2] = nppLut3[2].ptr(); } - nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), src.step, dst.ptr(), dst.step, sz, + nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), src.step, dst.ptr(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) ); } } @@ -591,6 +684,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) void cv::gpu::exp(const GpuMat& src, GpuMat& dst) { +#if NPP_VERSION >= 32 CV_Assert(src.type() == CV_32FC1); dst.create(src.size(), src.type()); @@ -600,6 +694,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst) sz.height = src.rows; nppSafeCall( nppiExp_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); +#else + CV_Assert(!"This function doesn't supported"); +#endif } //////////////////////////////////////////////////////////////////////// @@ -607,6 +704,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst) void cv::gpu::log(const GpuMat& src, GpuMat& dst) { +#if NPP_VERSION >= 32 CV_Assert(src.type() == CV_32FC1); dst.create(src.size(), src.type()); @@ -616,11 +714,15 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst) sz.height = src.rows; nppSafeCall( nppiLn_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); +#else + CV_Assert(!"This function doesn't supported"); +#endif } //////////////////////////////////////////////////////////////////////// // NPP magnitide +#ifdef NPP_HAVE_COMPLEX_TYPE namespace { typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI); @@ -638,21 +740,30 @@ namespace nppSafeCall( func(src.ptr(), src.step, dst.ptr(), dst.step, sz) ); } } +#endif void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst) { +#ifdef NPP_HAVE_COMPLEX_TYPE ::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R); +#else + CV_Assert(!"This function doesn't supported"); +#endif } void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst) { +#ifdef NPP_HAVE_COMPLEX_TYPE ::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R); +#else + CV_Assert(!"This function doesn't supported"); +#endif } //////////////////////////////////////////////////////////////////////// // Polar <-> Cart -namespace cv { namespace gpu { namespace mathfunc +namespace cv { namespace gpu { namespace mathfunc { void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream); void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream); @@ -721,7 +832,7 @@ void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleI } void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, const Stream& stream) -{ +{ ::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream)); } diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp index 6b29af1cd7..c538a458c7 100644 --- a/modules/gpu/src/graphcuts.cpp +++ b/modules/gpu/src/graphcuts.cpp @@ -48,15 +48,18 @@ void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Gpu #else /* !defined (HAVE_CUDA) */ +#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR) + void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf) { +#if NPP_VERSION >= 32 CV_Assert(leftTransp.type() == CV_32S && rightTransp.type() == CV_32S); CV_Assert(terminals.type() == CV_32S && bottom.type() == CV_32S && top.type() == CV_32S); CV_Assert(terminals.size() == leftTransp.size()); CV_Assert(terminals.size() == rightTransp.size()); - CV_Assert(terminals.size() == top.size() && terminals.size() == bottom.size()); + CV_Assert(terminals.size() == top.size() && terminals.size() == bottom.size()); CV_Assert(top.step == bottom.step && top.step == terminals.step && rightTransp.step == leftTransp.step); - + labels.create(terminals.size(), CV_8U); NppiSize sznpp; @@ -69,8 +72,11 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize()) buf.create(1, bufsz, CV_8U); - nppSafeCall( nppiGraphcut_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), + nppSafeCall( nppiGraphcut_32s8u(terminals.ptr(), leftTransp.ptr(), rightTransp.ptr(), top.ptr(), bottom.ptr(), terminals.step, leftTransp.step, sznpp, labels.ptr(), labels.step, buf.ptr()) ); +#else + CV_Assert(!"This function doesn't supported"); +#endif } diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index c7619120a5..357b655019 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -71,7 +71,9 @@ void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu(); #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace gpu { namespace imgproc +#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR) + +namespace cv { namespace gpu { namespace imgproc { void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst); void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst); @@ -83,7 +85,7 @@ namespace cv { namespace gpu { namespace imgproc void drawColorDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream); void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); - void reprojectImageTo3D_gpu(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); + void reprojectImageTo3D_gpu(const DevMem2D_& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); }}} //////////////////////////////////////////////////////////////////////// @@ -101,9 +103,9 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp out = dst; out.create(xmap.size(), src.type()); - + callers[src.channels() - 1](src, xmap, ymap, out); - + dst = out; } @@ -111,7 +113,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp // meanShiftFiltering_GPU void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria) -{ +{ if( src.empty() ) CV_Error( CV_StsBadArg, "The input image is empty" ); @@ -119,25 +121,25 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); dst.create( src.size(), CV_8UC4 ); - + if( !(criteria.type & TermCriteria::MAX_ITER) ) criteria.maxCount = 5; - + int maxIter = std::min(std::max(criteria.maxCount, 1), 100); - + float eps; if( !(criteria.type & TermCriteria::EPS) ) eps = 1.f; - eps = (float)std::max(criteria.epsilon, 0.0); + eps = (float)std::max(criteria.epsilon, 0.0); - imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); + imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); } //////////////////////////////////////////////////////////////////////// // meanShiftProc_GPU void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria) -{ +{ if( src.empty() ) CV_Error( CV_StsBadArg, "The input image is empty" ); @@ -146,18 +148,18 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int dstr.create( src.size(), CV_8UC4 ); dstsp.create( src.size(), CV_16SC2 ); - + if( !(criteria.type & TermCriteria::MAX_ITER) ) criteria.maxCount = 5; - + int maxIter = std::min(std::max(criteria.maxCount, 1), 100); - + float eps; if( !(criteria.type & TermCriteria::EPS) ) eps = 1.f; - eps = (float)std::max(criteria.epsilon, 0.0); + eps = (float)std::max(criteria.epsilon, 0.0); - imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps); + imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps); } //////////////////////////////////////////////////////////////////////// @@ -167,7 +169,7 @@ namespace { template void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream) - { + { GpuMat out; if (dst.data != src.data) out = dst; @@ -186,14 +188,14 @@ namespace void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp) { CV_Assert(src.type() == CV_8U || src.type() == CV_16S); - + drawColorDisp_callers[src.type()](src, dst, ndisp, 0); } void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, const Stream& stream) { CV_Assert(src.type() == CV_8U || src.type() == CV_16S); - + drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream)); } @@ -204,35 +206,35 @@ namespace { template void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream) - { + { xyzw.create(disp.rows, disp.cols, CV_32FC4); imgproc::reprojectImageTo3D_gpu((DevMem2D_)disp, xyzw, Q.ptr(), stream); } - + typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream); - + const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller, 0, 0, reprojectImageTo3D_caller, 0, 0, 0, 0}; } void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q) { CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4); - + reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, 0); } void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const Stream& stream) { CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4); - + reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// // threshold -double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh) -{ +double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh) +{ CV_Assert(src.type() == CV_32FC1); dst.create( src.size(), src.type() ); @@ -241,7 +243,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh) sz.width = src.cols; sz.height = src.rows; - nppSafeCall( nppiThreshold_32f_C1R(src.ptr(), src.step, + nppSafeCall( nppiThreshold_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, static_cast(thresh), NPP_CMP_GREATER) ); return thresh; @@ -298,7 +300,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub //////////////////////////////////////////////////////////////////////// // copyMakeBorder -void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value) +void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value) { CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1); @@ -308,32 +310,32 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom srcsz.width = src.cols; srcsz.height = src.rows; NppiSize dstsz; - dstsz.width = dst.cols; - dstsz.height = dst.rows; + dstsz.width = dst.cols; + dstsz.height = dst.rows; switch (src.type()) { case CV_8UC1: - { + { Npp8u nVal = static_cast(value[0]); - nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr(), src.step, srcsz, + nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr(), src.step, srcsz, dst.ptr(), dst.step, dstsz, top, left, nVal) ); break; - } + } case CV_8UC4: - { + { Npp8u nVal[] = {static_cast(value[0]), static_cast(value[1]), static_cast(value[2]), static_cast(value[3])}; - nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr(), src.step, srcsz, + nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr(), src.step, srcsz, dst.ptr(), dst.step, dstsz, top, left, nVal) ); break; - } + } case CV_32SC1: - { + { Npp32s nVal = static_cast(value[0]); - nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr(), src.step, srcsz, + nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr(), src.step, srcsz, dst.ptr(), dst.step, dstsz, top, left, nVal) ); break; - } + } default: CV_Assert(!"Unsupported source type"); } @@ -343,26 +345,26 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom // warp namespace -{ - typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst, - int dstStep, NppiRect dstRoi, const double coeffs[][3], +{ + typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst, + int dstStep, NppiRect dstRoi, const double coeffs[][3], int interpolation); - typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst, - int dstStep, NppiRect dstRoi, const double coeffs[][3], + typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst, + int dstStep, NppiRect dstRoi, const double coeffs[][3], int interpolation); - typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst, - int dstStep, NppiRect dstRoi, const double coeffs[][3], + typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst, + int dstStep, NppiRect dstRoi, const double coeffs[][3], int interpolation); - typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst, - int dstStep, NppiRect dstRoi, const double coeffs[][3], + typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst, + int dstStep, NppiRect dstRoi, const double coeffs[][3], int interpolation); - void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags, - npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2], - npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2]) + void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags, + npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2], + npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2]) { static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC}; - + int interpolation = flags & INTER_MAX; CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2); @@ -387,19 +389,19 @@ namespace switch (src.depth()) { case CV_8U: - nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, + nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, dst.ptr(), dst.step, dstroi, coeffs, npp_inter[interpolation]) ); break; case CV_16U: - nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, + nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, dst.ptr(), dst.step, dstroi, coeffs, npp_inter[interpolation]) ); break; case CV_32S: - nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, + nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, dst.ptr(), dst.step, dstroi, coeffs, npp_inter[interpolation]) ); break; case CV_32F: - nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, + nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr(), srcsz, src.step, srcroi, dst.ptr(), dst.step, dstroi, coeffs, npp_inter[interpolation]) ); break; default: @@ -408,38 +410,38 @@ namespace } } -void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags) +void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags) { - static npp_warp_8u_t npp_warpAffine_8u[][2] = + static npp_warp_8u_t npp_warpAffine_8u[][2] = { - {0, 0}, - {nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R}, - {0, 0}, - {nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R}, + {0, 0}, + {nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R}, + {0, 0}, + {nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R}, {nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R} }; - static npp_warp_16u_t npp_warpAffine_16u[][2] = + static npp_warp_16u_t npp_warpAffine_16u[][2] = { - {0, 0}, - {nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R}, - {0, 0}, - {nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R}, + {0, 0}, + {nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R}, + {0, 0}, + {nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R}, {nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R} }; - static npp_warp_32s_t npp_warpAffine_32s[][2] = + static npp_warp_32s_t npp_warpAffine_32s[][2] = { - {0, 0}, - {nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R}, - {0, 0}, - {nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R}, + {0, 0}, + {nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R}, + {0, 0}, + {nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R}, {nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R} }; - static npp_warp_32f_t npp_warpAffine_32f[][2] = + static npp_warp_32f_t npp_warpAffine_32f[][2] = { - {0, 0}, - {nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R}, - {0, 0}, - {nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R}, + {0, 0}, + {nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R}, + {0, 0}, + {nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R}, {nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R} }; @@ -454,36 +456,36 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags) { - static npp_warp_8u_t npp_warpPerspective_8u[][2] = + static npp_warp_8u_t npp_warpPerspective_8u[][2] = { - {0, 0}, - {nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R}, - {0, 0}, - {nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R}, + {0, 0}, + {nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R}, + {0, 0}, + {nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R}, {nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R} }; - static npp_warp_16u_t npp_warpPerspective_16u[][2] = + static npp_warp_16u_t npp_warpPerspective_16u[][2] = { - {0, 0}, - {nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R}, - {0, 0}, - {nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R}, + {0, 0}, + {nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R}, + {0, 0}, + {nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R}, {nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R} }; - static npp_warp_32s_t npp_warpPerspective_32s[][2] = + static npp_warp_32s_t npp_warpPerspective_32s[][2] = { - {0, 0}, - {nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R}, - {0, 0}, - {nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R}, + {0, 0}, + {nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R}, + {0, 0}, + {nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R}, {nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R} }; - static npp_warp_32f_t npp_warpPerspective_32f[][2] = + static npp_warp_32f_t npp_warpPerspective_32f[][2] = { - {0, 0}, - {nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R}, - {0, 0}, - {nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R}, + {0, 0}, + {nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R}, + {0, 0}, + {nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R}, {nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R} }; @@ -502,7 +504,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation) { static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC}; - + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC); @@ -522,12 +524,12 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d if (src.type() == CV_8UC1) { - nppSafeCall( nppiRotate_8u_C1R(src.ptr(), srcsz, src.step, srcroi, + nppSafeCall( nppiRotate_8u_C1R(src.ptr(), srcsz, src.step, srcroi, dst.ptr(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) ); } else { - nppSafeCall( nppiRotate_8u_C4R(src.ptr(), srcsz, src.step, srcroi, + nppSafeCall( nppiRotate_8u_C4R(src.ptr(), srcsz, src.step, srcroi, dst.ptr(), dst.step, dstroi, angle, xShift, yShift, npp_inter[interpolation]) ); } } @@ -538,7 +540,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum) { CV_Assert(src.type() == CV_8UC1); - + int w = src.cols + 1, h = src.rows + 1; sum.create(h, w, CV_32S); @@ -548,7 +550,7 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum) sz.width = src.cols; sz.height = src.rows; - nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(src.ptr(), src.step, sum.ptr(), + nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(src.ptr(), src.step, sum.ptr(), sum.step, sqsum.ptr(), sqsum.step, sz, 0, 0.0f, h) ); } @@ -569,7 +571,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons nppRect.y = rect.y; nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr(), src.step, sqr.ptr(), sqr.step, - dst.ptr(), dst.step, sz, nppRect) ); + dst.ptr(), dst.step, sz, nppRect) ); } //////////////////////////////////////////////////////////////////////// @@ -577,6 +579,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize) { +#if NPP_VERSION >= 32 CV_Assert(!"disabled until fix crash"); CV_Assert(image.type() == CV_8UC1); @@ -598,8 +601,11 @@ void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, doubl nppSafeCall( nppiCannyGetBufferSize(sz, &bufsz) ); GpuMat buf(1, bufsz, CV_8UC1); - nppSafeCall( nppiCanny_32f8u_C1R(srcDx.ptr(), srcDx.step, srcDy.ptr(), srcDy.step, + nppSafeCall( nppiCanny_32f8u_C1R(srcDx.ptr(), srcDx.step, srcDy.ptr(), srcDy.step, edges.ptr(), edges.step, sz, (Npp32f)threshold1, (Npp32f)threshold2, buf.ptr()) ); +#else + CV_Assert(!"This function doesn't supported"); +#endif } //////////////////////////////////////////////////////////////////////// @@ -612,7 +618,7 @@ namespace template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; - + typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize); typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize); @@ -620,20 +626,20 @@ namespace { typedef typename NPPTypeTraits::npp_type src_t; - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist, + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist, int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer); }; template struct NppHistogramEvenFuncC4 { typedef typename NPPTypeTraits::npp_type src_t; - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer); }; - - template::func_ptr func, get_buf_size_c1_t get_buf_size> + + template::func_ptr func, get_buf_size_c1_t get_buf_size> struct NppHistogramEvenC1 - { + { typedef typename NppHistogramEvenFuncC1::src_t src_t; static void hist(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel) @@ -650,13 +656,13 @@ namespace get_buf_size(sz, levels, &buf_size); buffer.create(1, buf_size, CV_8U); - nppSafeCall( func(src.ptr(), src.step, sz, hist.ptr(), levels, + nppSafeCall( func(src.ptr(), src.step, sz, hist.ptr(), levels, lowerLevel, upperLevel, buffer.ptr()) ); } - }; - template::func_ptr func, get_buf_size_c4_t get_buf_size> + }; + template::func_ptr func, get_buf_size_c4_t get_buf_size> struct NppHistogramEvenC4 - { + { typedef typename NppHistogramEvenFuncC4::src_t src_t; static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4]) @@ -688,7 +694,7 @@ namespace typedef Npp32s level_t; enum {LEVEL_TYPE_CODE=CV_32SC1}; - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, const Npp32s* pLevels, int nLevels, Npp8u* pBuffer); }; template<> struct NppHistogramRangeFuncC1 @@ -697,7 +703,7 @@ namespace typedef Npp32f level_t; enum {LEVEL_TYPE_CODE=CV_32FC1}; - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist, const Npp32f* pLevels, int nLevels, Npp8u* pBuffer); }; template struct NppHistogramRangeFuncC4 @@ -706,7 +712,7 @@ namespace typedef Npp32s level_t; enum {LEVEL_TYPE_CODE=CV_32SC1}; - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer); }; template<> struct NppHistogramRangeFuncC4 @@ -715,19 +721,19 @@ namespace typedef Npp32f level_t; enum {LEVEL_TYPE_CODE=CV_32FC1}; - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4], const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer); }; - - template::func_ptr func, get_buf_size_c1_t get_buf_size> + + template::func_ptr func, get_buf_size_c1_t get_buf_size> struct NppHistogramRangeC1 - { + { typedef typename NppHistogramRangeFuncC1::src_t src_t; typedef typename NppHistogramRangeFuncC1::level_t level_t; enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels) - { + { CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1); hist.create(1, levels.cols - 1, CV_32S); @@ -743,10 +749,10 @@ namespace buffer.create(1, buf_size, CV_8U); nppSafeCall( func(src.ptr(), src.step, sz, hist.ptr(), levels.ptr(), levels.cols, buffer.ptr()) ); } - }; - template::func_ptr func, get_buf_size_c4_t get_buf_size> + }; + template::func_ptr func, get_buf_size_c4_t get_buf_size> struct NppHistogramRangeC4 - { + { typedef typename NppHistogramRangeFuncC4::src_t src_t; typedef typename NppHistogramRangeFuncC1::level_t level_t; enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; @@ -778,22 +784,27 @@ namespace buffer.create(1, buf_size, CV_8U); nppSafeCall( func(src.ptr(), src.step, sz, pHist, pLevels, nLevels, buffer.ptr()) ); } - }; + }; } void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel) { +#if NPP_VERSION >= 32 Mat host_levels(1, nLevels, CV_32SC1); nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr(), nLevels, lowerLevel, upperLevel) ); levels.upload(host_levels); +#else + CV_Assert(!"This function doesn't supported"); +#endif } void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel) { +#if NPP_VERSION >= 32 CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 ); typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, int levels, int lowerLevel, int upperLevel); - static const hist_t hist_callers[] = + static const hist_t hist_callers[] = { NppHistogramEvenC1::hist, 0, @@ -802,14 +813,18 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerL }; hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel); +#else + CV_Assert(!"This function doesn't supported"); +#endif } void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4]) { +#if NPP_VERSION >= 32 CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 ); - + typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4]); - static const hist_t hist_callers[] = + static const hist_t hist_callers[] = { NppHistogramEvenC4::hist, 0, @@ -818,14 +833,18 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int l }; hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel); +#else + CV_Assert(!"This function doesn't supported"); +#endif } void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels) { +#if NPP_VERSION >= 32 CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1); typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels); - static const hist_t hist_callers[] = + static const hist_t hist_callers[] = { NppHistogramRangeC1::hist, 0, @@ -836,14 +855,18 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels) }; hist_callers[src.depth()](src, hist, levels); +#else + CV_Assert(!"This function doesn't supported"); +#endif } void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4]) { +#if NPP_VERSION >= 32 CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4); typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4]); - static const hist_t hist_callers[] = + static const hist_t hist_callers[] = { NppHistogramRangeC4::hist, 0, @@ -854,6 +877,9 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4 }; hist_callers[src.depth()](src, hist, levels); +#else + CV_Assert(!"This function doesn't supported"); +#endif } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 814c79c6b4..99db93af5d 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -77,12 +77,14 @@ namespace cv #else /* !defined (HAVE_CUDA) */ -namespace cv +#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR) + +namespace cv { namespace gpu { namespace matrix_operations - { + { void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0); @@ -162,9 +164,9 @@ namespace typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); }; - - template::func_ptr func> struct NppCvt - { + + template::func_ptr func> struct NppCvt + { typedef typename NPPTypeTraits::npp_type src_t; typedef typename NPPTypeTraits::npp_type dst_t; @@ -177,7 +179,7 @@ namespace } }; template::func_ptr func> struct NppCvt - { + { typedef typename NPPTypeTraits::npp_type dst_t; static void cvt(const GpuMat& src, GpuMat& dst) @@ -203,7 +205,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be rtype = type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - + int scn = channels(); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) @@ -224,7 +226,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be else { typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst); - static const convert_caller_t convert_callers[8][8][4] = + static const convert_caller_t convert_callers[8][8][4] = { { {0,0,0,0}, @@ -232,7 +234,11 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, +#if NPP_VERSION >= 32 {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, +#else + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, +#endif {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {0,0,0,0} }, @@ -251,8 +257,8 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {0,0,0,0}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {0,0,0,0} }, @@ -261,8 +267,8 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {0,0,0,0}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {0,0,0,0} }, @@ -277,10 +283,14 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be {0,0,0,0} }, { +#if NPP_VERSION >= 32 {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, +#else {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, +#endif + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, {0,0,0,0}, {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, @@ -325,9 +335,9 @@ namespace typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); }; - - template::func_ptr func> struct NppSet - { + + template::func_ptr func> struct NppSet + { typedef typename NPPTypeTraits::npp_type src_t; static void set(GpuMat& src, const Scalar& s) @@ -340,7 +350,7 @@ namespace } }; template::func_ptr func> struct NppSet - { + { typedef typename NPPTypeTraits::npp_type src_t; static void set(GpuMat& src, const Scalar& s) @@ -357,7 +367,7 @@ namespace { matrix_operations::set_to_without_mask(src, src.depth(), s.val, src.channels()); } - + template struct NppSetMaskFunc { typedef typename NPPTypeTraits::npp_type src_t; @@ -370,9 +380,9 @@ namespace typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); }; - + template::func_ptr func> struct NppSetMask - { + { typedef typename NPPTypeTraits::npp_type src_t; static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) @@ -385,7 +395,7 @@ namespace } }; template::func_ptr func> struct NppSetMask - { + { typedef typename NPPTypeTraits::npp_type src_t; static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) @@ -397,7 +407,7 @@ namespace nppSafeCall( func(nppS[0], src.ptr(), src.step, sz, mask.ptr(), mask.step) ); } }; - + void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask) { matrix_operations::set_to_with_mask(src, src.depth(), s.val, mask, src.channels()); @@ -409,7 +419,7 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) CV_Assert(mask.type() == CV_8UC1); CV_DbgAssert(!this->empty()); - + NppiSize sz; sz.width = cols; sz.height = rows; @@ -421,17 +431,34 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) { {NppSet::set,kernelSet,kernelSet,NppSet::set}, {kernelSet,kernelSet,kernelSet,kernelSet}, +#if NPP_VERSION >= 32 {NppSet::set,kernelSet,kernelSet,NppSet::set}, +#else + {kernelSet,kernelSet,kernelSet,kernelSet}, +#endif +#if NPP_VERSION >= 32 {NppSet::set,kernelSet,kernelSet,NppSet::set}, +#else + {kernelSet,kernelSet,kernelSet,kernelSet}, +#endif +#if NPP_VERSION >= 32 {NppSet::set,kernelSet,kernelSet,NppSet::set}, +#else + {NppSet::set,kernelSet,kernelSet,kernelSet}, +#endif +#if NPP_VERSION >= 32 {NppSet::set,kernelSet,kernelSet,NppSet::set}, +#else + {NppSet::set,kernelSet,kernelSet,kernelSet}, +#endif {kernelSet,kernelSet,kernelSet,kernelSet}, {0,0,0,0} }; - set_callers[depth()][channels()-1](*this, s); + set_callers[depth()][channels()-1](*this, s); } else { +#if NPP_VERSION >= 32 typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask); static const set_caller_t set_callers[8][4] = { @@ -445,6 +472,9 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) {0,0,0,0} }; set_callers[depth()][channels()-1](*this, s, mask); +#else + kernelSetMask(*this, s, mask); +#endif } return *this; @@ -550,7 +580,7 @@ bool cv::gpu::CudaMem::can_device_map_to_host() } void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type) -{ +{ if (_alloc_type == ALLOC_ZEROCOPY && !can_device_map_to_host()) cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__); @@ -561,7 +591,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type) release(); CV_DbgAssert( _rows >= 0 && _cols >= 0 ); if( _rows > 0 && _cols > 0 ) - { + { flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type; rows = _rows; cols = _cols; @@ -575,7 +605,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type) //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount)); alloc_type = _alloc_type; void *ptr; - + switch (alloc_type) { case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break; @@ -603,7 +633,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const } else cv::gpu::error("Zero-copy is not supported or memory was allocated without zero-copy flag", __FILE__, __LINE__); - + return res; }