diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index a7b39dd1c9..3be2e052b2 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -2034,7 +2034,6 @@ namespace cv { namespace gpu { namespace device void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stream) { -#ifdef _WIN32 using namespace cv::gpu::device::pyramid; typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); @@ -2081,14 +2080,10 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre szLastLayer = szCurLayer; } -#else - throw_nogpu(); -#endif } void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream) const { -#ifdef _WIN32 using namespace cv::gpu::device::pyramid; typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream); @@ -2145,9 +2140,6 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream CV_Assert(func != 0); func(lastLayer, outImg, StreamAccessor::getStream(stream)); -#else - throw_nogpu(); -#endif } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/nvidia/core/NCVPyramid.cu b/modules/gpu/src/nvidia/core/NCVPyramid.cu index a75f3b0024..c9f5971338 100644 --- a/modules/gpu/src/nvidia/core/NCVPyramid.cu +++ b/modules/gpu/src/nvidia/core/NCVPyramid.cu @@ -48,8 +48,6 @@ #include "NCVPixelOperations.hpp" #include "opencv2/gpu/device/common.hpp" -#ifdef _WIN32 - template struct __average4_CN {static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);}; template struct __average4_CN { @@ -179,37 +177,6 @@ template static __host__ __device__ Tout _lerp(cons } -template -static T _interpLinear(const T &a, const T &b, Ncv32f d) -{ - typedef typename TConvBase2Vec::TVec TVFlt; - TVFlt tmp = _lerp(a, b, d); - return _pixDemoteClampZ(tmp); -} - - -template -static T _interpBilinear(const NCVMatrix &refLayer, Ncv32f x, Ncv32f y) -{ - Ncv32u xl = (Ncv32u)x; - Ncv32u xh = xl+1; - Ncv32f dx = x - xl; - Ncv32u yl = (Ncv32u)y; - Ncv32u yh = yl+1; - Ncv32f dy = y - yl; - T p00, p01, p10, p11; - p00 = refLayer.at(xl, yl); - p01 = xh < refLayer.width() ? refLayer.at(xh, yl) : p00; - p10 = yh < refLayer.height() ? refLayer.at(xl, yh) : p00; - p11 = (xh < refLayer.width() && yh < refLayer.height()) ? refLayer.at(xh, yh) : p00; - typedef typename TConvBase2Vec::TVec TVFlt; - TVFlt m_00_01 = _lerp(p00, p01, dx); - TVFlt m_10_11 = _lerp(p10, p11, dx); - TVFlt mixture = _lerp(m_00_01, m_10_11, dy); - return _pixDemoteClampZ(mixture); -} - - template __global__ void kernelDownsampleX2(T *d_src, Ncv32u srcPitch, @@ -342,6 +309,38 @@ namespace cv { namespace gpu { namespace device }}} +#ifdef _WIN32 + +template +static T _interpLinear(const T &a, const T &b, Ncv32f d) +{ + typedef typename TConvBase2Vec::TVec TVFlt; + TVFlt tmp = _lerp(a, b, d); + return _pixDemoteClampZ(tmp); +} + + +template +static T _interpBilinear(const NCVMatrix &refLayer, Ncv32f x, Ncv32f y) +{ + Ncv32u xl = (Ncv32u)x; + Ncv32u xh = xl+1; + Ncv32f dx = x - xl; + Ncv32u yl = (Ncv32u)y; + Ncv32u yh = yl+1; + Ncv32f dy = y - yl; + T p00, p01, p10, p11; + p00 = refLayer.at(xl, yl); + p01 = xh < refLayer.width() ? refLayer.at(xh, yl) : p00; + p10 = yh < refLayer.height() ? refLayer.at(xl, yh) : p00; + p11 = (xh < refLayer.width() && yh < refLayer.height()) ? refLayer.at(xh, yh) : p00; + typedef typename TConvBase2Vec::TVec TVFlt; + TVFlt m_00_01 = _lerp(p00, p01, dx); + TVFlt m_10_11 = _lerp(p10, p11, dx); + TVFlt mixture = _lerp(m_00_01, m_10_11, dy); + return _pixDemoteClampZ(mixture); +} + template NCVImagePyramid::NCVImagePyramid(const NCVMatrix &img, Ncv8u numLayers, diff --git a/modules/gpu/src/orb.cpp b/modules/gpu/src/orb.cpp index 0a02c3b3b0..d038efabc5 100644 --- a/modules/gpu/src/orb.cpp +++ b/modules/gpu/src/orb.cpp @@ -577,7 +577,8 @@ void cv::gpu::ORB_GPU::computeKeyPointsPyramid() ensureSizeIsEnough(3, keyPointsCount_[level], CV_32FC1, keyPointsPyr_[level]); - keyPointsCount_[level] = fastDetector_.getKeyPoints(keyPointsPyr_[level].rowRange(0, 2)); + GpuMat fastKpRange = keyPointsPyr_[level].rowRange(0, 2); + keyPointsCount_[level] = fastDetector_.getKeyPoints(fastKpRange); int n_features = n_features_per_level_[level]; @@ -664,7 +665,8 @@ void cv::gpu::ORB_GPU::mergeKeyPoints(GpuMat& keypoints) mergeLocation_gpu(keyPointsPyr_[level].ptr(0), keyPointsRange.ptr(0), keyPointsRange.ptr(1), keyPointsCount_[level], locScale, 0); - keyPointsPyr_[level].rowRange(1, 3).copyTo(keyPointsRange.rowRange(2, 4)); + GpuMat range = keyPointsRange.rowRange(2, 4); + keyPointsPyr_[level].rowRange(1, 3).copyTo(range); keyPointsRange.row(4).setTo(Scalar::all(level)); keyPointsRange.row(5).setTo(Scalar::all(params_.patch_size_ * sf));