From ea2f5b13918e0305ef54c6f71f05d06917b0e054 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Aug 2011 13:21:14 +0000 Subject: [PATCH] added BORDER_REFLECT and BORDER_WRAP support to gpu module switched to gpu::remap in opencv_stitching --- modules/gpu/src/cuda/filters.cu | 92 ++++- modules/gpu/src/cuda/imgproc.cu | 10 +- modules/gpu/src/cuda/internal_shared.hpp | 4 +- modules/gpu/src/filtering.cpp | 4 +- modules/gpu/src/imgproc.cpp | 30 +- .../opencv2/gpu/device/border_interpolate.hpp | 350 +++++++++++++++--- modules/gpu/test/test_imgproc.cpp | 23 +- modules/stitching/matchers.cpp | 8 - modules/stitching/warpers.cpp | 30 +- modules/stitching/warpers.hpp | 6 +- 10 files changed, 450 insertions(+), 107 deletions(-) diff --git a/modules/gpu/src/cuda/filters.cu b/modules/gpu/src/cuda/filters.cu index 3384249ad3..41f403a281 100644 --- a/modules/gpu/src/cuda/filters.cu +++ b/modules/gpu/src/cuda/filters.cu @@ -151,7 +151,7 @@ namespace cv { namespace gpu { namespace filters void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream) { typedef void (*caller_t)(const DevMem2D_& src, const DevMem2D_& dst, int anchor, cudaStream_t stream); - static const caller_t callers[3][17] = + static const caller_t callers[5][17] = { { 0, @@ -170,7 +170,7 @@ namespace cv { namespace gpu { namespace filters linearRowFilter_caller<13, T, D, BrdRowReflect101>, linearRowFilter_caller<14, T, D, BrdRowReflect101>, linearRowFilter_caller<15, T, D, BrdRowReflect101>, - linearRowFilter_caller<16, T, D, BrdRowReflect101>, + linearRowFilter_caller<16, T, D, BrdRowReflect101> }, { 0, @@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace filters linearRowFilter_caller<13, T, D, BrdRowReplicate>, linearRowFilter_caller<14, T, D, BrdRowReplicate>, linearRowFilter_caller<15, T, D, BrdRowReplicate>, - linearRowFilter_caller<16, T, D, BrdRowReplicate>, + linearRowFilter_caller<16, T, D, BrdRowReplicate> }, { 0, @@ -208,7 +208,45 @@ namespace cv { namespace gpu { namespace filters linearRowFilter_caller<13, T, D, BrdRowConstant>, linearRowFilter_caller<14, T, D, BrdRowConstant>, linearRowFilter_caller<15, T, D, BrdRowConstant>, - linearRowFilter_caller<16, T, D, BrdRowConstant>, + linearRowFilter_caller<16, T, D, BrdRowConstant> + }, + { + 0, + linearRowFilter_caller<1 , T, D, BrdRowReflect>, + linearRowFilter_caller<2 , T, D, BrdRowReflect>, + linearRowFilter_caller<3 , T, D, BrdRowReflect>, + linearRowFilter_caller<4 , T, D, BrdRowReflect>, + linearRowFilter_caller<5 , T, D, BrdRowReflect>, + linearRowFilter_caller<6 , T, D, BrdRowReflect>, + linearRowFilter_caller<7 , T, D, BrdRowReflect>, + linearRowFilter_caller<8 , T, D, BrdRowReflect>, + linearRowFilter_caller<9 , T, D, BrdRowReflect>, + linearRowFilter_caller<10, T, D, BrdRowReflect>, + linearRowFilter_caller<11, T, D, BrdRowReflect>, + linearRowFilter_caller<12, T, D, BrdRowReflect>, + linearRowFilter_caller<13, T, D, BrdRowReflect>, + linearRowFilter_caller<14, T, D, BrdRowReflect>, + linearRowFilter_caller<15, T, D, BrdRowReflect>, + linearRowFilter_caller<16, T, D, BrdRowReflect> + }, + { + 0, + linearRowFilter_caller<1 , T, D, BrdRowWrap>, + linearRowFilter_caller<2 , T, D, BrdRowWrap>, + linearRowFilter_caller<3 , T, D, BrdRowWrap>, + linearRowFilter_caller<4 , T, D, BrdRowWrap>, + linearRowFilter_caller<5 , T, D, BrdRowWrap>, + linearRowFilter_caller<6 , T, D, BrdRowWrap>, + linearRowFilter_caller<7 , T, D, BrdRowWrap>, + linearRowFilter_caller<8 , T, D, BrdRowWrap>, + linearRowFilter_caller<9 , T, D, BrdRowWrap>, + linearRowFilter_caller<10, T, D, BrdRowWrap>, + linearRowFilter_caller<11, T, D, BrdRowWrap>, + linearRowFilter_caller<12, T, D, BrdRowWrap>, + linearRowFilter_caller<13, T, D, BrdRowWrap>, + linearRowFilter_caller<14, T, D, BrdRowWrap>, + linearRowFilter_caller<15, T, D, BrdRowWrap>, + linearRowFilter_caller<16, T, D, BrdRowWrap> } }; @@ -292,7 +330,7 @@ namespace cv { namespace gpu { namespace filters void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream) { typedef void (*caller_t)(const DevMem2D_& src, const DevMem2D_& dst, int anchor, cudaStream_t stream); - static const caller_t callers[3][17] = + static const caller_t callers[5][17] = { { 0, @@ -311,7 +349,7 @@ namespace cv { namespace gpu { namespace filters linearColumnFilter_caller<13, T, D, BrdColReflect101>, linearColumnFilter_caller<14, T, D, BrdColReflect101>, linearColumnFilter_caller<15, T, D, BrdColReflect101>, - linearColumnFilter_caller<16, T, D, BrdColReflect101>, + linearColumnFilter_caller<16, T, D, BrdColReflect101> }, { 0, @@ -330,7 +368,7 @@ namespace cv { namespace gpu { namespace filters linearColumnFilter_caller<13, T, D, BrdColReplicate>, linearColumnFilter_caller<14, T, D, BrdColReplicate>, linearColumnFilter_caller<15, T, D, BrdColReplicate>, - linearColumnFilter_caller<16, T, D, BrdColReplicate>, + linearColumnFilter_caller<16, T, D, BrdColReplicate> }, { 0, @@ -349,7 +387,45 @@ namespace cv { namespace gpu { namespace filters linearColumnFilter_caller<13, T, D, BrdColConstant>, linearColumnFilter_caller<14, T, D, BrdColConstant>, linearColumnFilter_caller<15, T, D, BrdColConstant>, - linearColumnFilter_caller<16, T, D, BrdColConstant>, + linearColumnFilter_caller<16, T, D, BrdColConstant> + }, + { + 0, + linearColumnFilter_caller<1 , T, D, BrdColReflect>, + linearColumnFilter_caller<2 , T, D, BrdColReflect>, + linearColumnFilter_caller<3 , T, D, BrdColReflect>, + linearColumnFilter_caller<4 , T, D, BrdColReflect>, + linearColumnFilter_caller<5 , T, D, BrdColReflect>, + linearColumnFilter_caller<6 , T, D, BrdColReflect>, + linearColumnFilter_caller<7 , T, D, BrdColReflect>, + linearColumnFilter_caller<8 , T, D, BrdColReflect>, + linearColumnFilter_caller<9 , T, D, BrdColReflect>, + linearColumnFilter_caller<10, T, D, BrdColReflect>, + linearColumnFilter_caller<11, T, D, BrdColReflect>, + linearColumnFilter_caller<12, T, D, BrdColReflect>, + linearColumnFilter_caller<13, T, D, BrdColReflect>, + linearColumnFilter_caller<14, T, D, BrdColReflect>, + linearColumnFilter_caller<15, T, D, BrdColReflect>, + linearColumnFilter_caller<16, T, D, BrdColReflect> + }, + { + 0, + linearColumnFilter_caller<1 , T, D, BrdColWrap>, + linearColumnFilter_caller<2 , T, D, BrdColWrap>, + linearColumnFilter_caller<3 , T, D, BrdColWrap>, + linearColumnFilter_caller<4 , T, D, BrdColWrap>, + linearColumnFilter_caller<5 , T, D, BrdColWrap>, + linearColumnFilter_caller<6 , T, D, BrdColWrap>, + linearColumnFilter_caller<7 , T, D, BrdColWrap>, + linearColumnFilter_caller<8 , T, D, BrdColWrap>, + linearColumnFilter_caller<9 , T, D, BrdColWrap>, + linearColumnFilter_caller<10, T, D, BrdColWrap>, + linearColumnFilter_caller<11, T, D, BrdColWrap>, + linearColumnFilter_caller<12, T, D, BrdColWrap>, + linearColumnFilter_caller<13, T, D, BrdColWrap>, + linearColumnFilter_caller<14, T, D, BrdColWrap>, + linearColumnFilter_caller<15, T, D, BrdColWrap>, + linearColumnFilter_caller<16, T, D, BrdColWrap>, } }; diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index b5f3b6b128..40f0d9937c 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -120,10 +120,10 @@ namespace cv { namespace gpu { namespace imgproc { typedef void (*caller_t)(const DevMem2D_& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_& dst, T borderValue); - static const caller_t callers[2][3] = + static const caller_t callers[2][5] = { - { remap_caller, remap_caller, remap_caller }, - { remap_caller, remap_caller, remap_caller } + { remap_caller, remap_caller, remap_caller, remap_caller, remap_caller }, + { remap_caller, remap_caller, remap_caller, remap_caller, remap_caller } }; typename VecTraits::elem_type brd[] = {(typename VecTraits::elem_type)borderValue[0], (typename VecTraits::elem_type)borderValue[1], (typename VecTraits::elem_type)borderValue[2], (typename VecTraits::elem_type)borderValue[3]}; @@ -1089,7 +1089,7 @@ namespace cv { namespace gpu { namespace imgproc static const caller_t callers[] = { - pyrDown_caller, pyrDown_caller, pyrDown_caller + pyrDown_caller, pyrDown_caller, pyrDown_caller, pyrDown_caller, pyrDown_caller }; callers[borderType](static_cast< DevMem2D_ >(src), static_cast< DevMem2D_ >(dst), stream); @@ -1219,7 +1219,7 @@ namespace cv { namespace gpu { namespace imgproc static const caller_t callers[] = { - pyrUp_caller, pyrUp_caller, pyrUp_caller + pyrUp_caller, pyrUp_caller, pyrUp_caller, pyrUp_caller, pyrUp_caller }; callers[borderType](static_cast< DevMem2D_ >(src), static_cast< DevMem2D_ >(dst), stream); diff --git a/modules/gpu/src/cuda/internal_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp index 9a3086b67b..1d13735f99 100644 --- a/modules/gpu/src/cuda/internal_shared.hpp +++ b/modules/gpu/src/cuda/internal_shared.hpp @@ -70,7 +70,9 @@ namespace cv { BORDER_REFLECT101_GPU = 0, BORDER_REPLICATE_GPU, - BORDER_CONSTANT_GPU + BORDER_CONSTANT_GPU, + BORDER_REFLECT_GPU, + BORDER_WRAP_GPU }; // Converts CPU border extrapolation mode into GPU internal analogue. diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 2d1a934101..0144d4b4f8 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -718,7 +718,7 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, nppFilter1D_callers[CV_MAT_CN(srcType)])); } - CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT); + CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); @@ -833,7 +833,7 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds nppFilter1D_callers[CV_MAT_CN(bufType)])); } - CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT); + CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 20782c6fb9..f65eee1938 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -133,7 +133,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR); - CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT); + CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType)); @@ -1228,24 +1228,26 @@ namespace bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType) { - if (cpuBorderType == cv::BORDER_REFLECT101) + switch (cpuBorderType) { + case cv::BORDER_REFLECT101: gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU; return true; - } - - if (cpuBorderType == cv::BORDER_REPLICATE) - { + case cv::BORDER_REPLICATE: gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU; return true; - } - - if (cpuBorderType == cv::BORDER_CONSTANT) - { + case cv::BORDER_CONSTANT: gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU; return true; - } - + case cv::BORDER_REFLECT: + gpuBorderType = cv::gpu::BORDER_REFLECT_GPU; + return true; + case cv::BORDER_WRAP: + gpuBorderType = cv::gpu::BORDER_WRAP_GPU; + return true; + default: + return false; + }; return false; } @@ -1647,7 +1649,7 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& st CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); - CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT); + CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); @@ -1683,7 +1685,7 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stre CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); - CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT); + CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP); int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); diff --git a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp index 2ac09a0a30..53b8189f77 100644 --- a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp +++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp @@ -246,55 +246,31 @@ namespace cv { namespace gpu { namespace device { return ::max(y, 0); } - __device__ __forceinline__ float idx_row_low(float y) const - { - return ::fmax(y, 0.0f); - } __device__ __forceinline__ int idx_row_high(int y) const { return ::min(y, last_row); } - __device__ __forceinline__ float idx_row_high(float y) const - { - return ::fmin(y, last_row); - } __device__ __forceinline__ int idx_row(int y) const { return idx_row_low(idx_row_high(y)); } - __device__ __forceinline__ float idx_row(float y) const - { - return idx_row_low(idx_row_high(y)); - } __device__ __forceinline__ int idx_col_low(int x) const { return ::max(x, 0); } - __device__ __forceinline__ float idx_col_low(float x) const - { - return ::fmax(x, 0); - } __device__ __forceinline__ int idx_col_high(int x) const { return ::min(x, last_col); } - __device__ __forceinline__ float idx_col_high(float x) const - { - return ::fmin(x, last_col); - } __device__ __forceinline__ int idx_col(int x) const { return idx_col_low(idx_col_high(x)); } - __device__ __forceinline__ float idx_col(float x) const - { - return idx_col_low(idx_col_high(x)); - } template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const { @@ -421,55 +397,31 @@ namespace cv { namespace gpu { namespace device { return ::abs(y); } - __device__ __forceinline__ float idx_row_low(float y) const - { - return ::fabs(y); - } __device__ __forceinline__ int idx_row_high(int y) const { return last_row - ::abs(last_row - y); } - __device__ __forceinline__ float idx_row_high(float y) const - { - return last_row - ::fabs(last_row - y); - } __device__ __forceinline__ int idx_row(int y) const { return idx_row_low(idx_row_high(y)); } - __device__ __forceinline__ float idx_row(float y) const - { - return idx_row_low(idx_row_high(y)); - } __device__ __forceinline__ int idx_col_low(int x) const { return ::abs(x); } - __device__ __forceinline__ float idx_col_low(float x) const - { - return ::fabs(x); - } __device__ __forceinline__ int idx_col_high(int x) const { return last_col - ::abs(last_col - x); } - __device__ __forceinline__ float idx_col_high(float x) const - { - return last_col - ::fabs(last_col - x); - } __device__ __forceinline__ int idx_col(int x) const { return idx_col_low(idx_col_high(x)); } - __device__ __forceinline__ float idx_col(float x) const - { - return idx_col_low(idx_col_high(x)); - } template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const { @@ -485,6 +437,308 @@ namespace cv { namespace gpu { namespace device const int last_col; }; + ////////////////////////////////////////////////////////////// + // BrdReflect + + template struct BrdRowReflect + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::abs(x) - (x < 0); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return last_col - ::abs(last_col - x) + (x > last_col); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const + { + return -last_col <= mini && maxi <= 2 * last_col; + } + + const int last_col; + }; + + template struct BrdColReflect + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::abs(y) - (y < 0); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return last_row - ::abs(last_row - y) + (y > last_row); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const + { + return -last_row <= mini && maxi <= 2 * last_row; + } + + const int last_row; + }; + + template struct BrdReflect + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReflect(int height, int width) : + last_row(height - 1), last_col(width - 1) + { + } + template + __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : + last_row(height - 1), last_col(width - 1) + { + } + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::abs(y) - (y < 0); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return last_row - ::abs(last_row - y) + (y > last_row); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::abs(x) - (x < 0); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return last_col - ::abs(last_col - x) + (x > last_col); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + const int last_row; + const int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdWrap + + template struct BrdRowWrap + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {} + template __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (x < width) * x + (x >= width) * (x % width); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(idx_col_low(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const + { + return true; + } + + const int width; + }; + + template struct BrdColWrap + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {} + template __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return (y < height) * y + (y >= height) * (y % height); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(idx_row_low(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const + { + return true; + } + + const int height; + }; + + template struct BrdWrap + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) : + height(height_), width(width_) + { + } + template + __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) : + height(height_), width(width_) + { + } + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return (y < height) * y + (y >= height) * (y % height); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(idx_row_low(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (x < width) * x + (x >= width) * (x % width); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(idx_col_low(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + const int height; + const int width; + }; + ////////////////////////////////////////////////////////////// // BorderReader diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 8236c16f04..5e26581425 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -192,7 +192,6 @@ struct Remap : testing::TestWithParam< std::tr1::tuple 0) - { - DeviceInfo info; - if (info.majorVersion() >= 2 && cv::getNumberOfCPUs() < 4) - use_gpu = true; - } - - if (use_gpu) impl_ = new GpuMatcher(match_conf); else impl_ = new CpuMatcher(match_conf); diff --git a/modules/stitching/warpers.cpp b/modules/stitching/warpers.cpp index aceedac53c..e9d9cd9ce2 100644 --- a/modules/stitching/warpers.cpp +++ b/modules/stitching/warpers.cpp @@ -118,8 +118,14 @@ Point PlaneWarperGpu::warp(const Mat &src, float focal, const cv::Mat &R, cv::Ma gpu::buildWarpPlaneMaps(src.size(), Rect(dst_tl, Point(dst_br.x+1, dst_br.y+1)), R, focal, projector_.scale, projector_.plane_dist, d_xmap_, d_ymap_); - dst.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type()); - remap(src, dst, Mat(d_xmap_), Mat(d_ymap_), interp_mode, border_mode); + gpu::ensureSizeIsEnough(src.size(), src.type(), d_src_); + d_src_.upload(src); + + gpu::ensureSizeIsEnough(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type(), d_dst_); + + gpu::remap(d_src_, d_dst_, d_xmap_, d_ymap_, interp_mode, border_mode); + + d_dst_.download(dst); return dst_tl; } @@ -183,8 +189,14 @@ Point SphericalWarperGpu::warp(const Mat &src, float focal, const Mat &R, Mat &d gpu::buildWarpSphericalMaps(src.size(), Rect(dst_tl, Point(dst_br.x+1, dst_br.y+1)), R, focal, projector_.scale, d_xmap_, d_ymap_); - dst.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type()); - remap(src, dst, Mat(d_xmap_), Mat(d_ymap_), interp_mode, border_mode); + gpu::ensureSizeIsEnough(src.size(), src.type(), d_src_); + d_src_.upload(src); + + gpu::ensureSizeIsEnough(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type(), d_dst_); + + gpu::remap(d_src_, d_dst_, d_xmap_, d_ymap_, interp_mode, border_mode); + + d_dst_.download(dst); return dst_tl; } @@ -204,8 +216,14 @@ Point CylindricalWarperGpu::warp(const Mat &src, float focal, const Mat &R, Mat gpu::buildWarpCylindricalMaps(src.size(), Rect(dst_tl, Point(dst_br.x+1, dst_br.y+1)), R, focal, projector_.scale, d_xmap_, d_ymap_); - dst.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type()); - remap(src, dst, Mat(d_xmap_), Mat(d_ymap_), interp_mode, border_mode); + gpu::ensureSizeIsEnough(src.size(), src.type(), d_src_); + d_src_.upload(src); + + gpu::ensureSizeIsEnough(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type(), d_dst_); + + gpu::remap(d_src_, d_dst_, d_xmap_, d_ymap_, interp_mode, border_mode); + + d_dst_.download(dst); return dst_tl; } diff --git a/modules/stitching/warpers.hpp b/modules/stitching/warpers.hpp index 47a162e6c7..b39fb1a958 100644 --- a/modules/stitching/warpers.hpp +++ b/modules/stitching/warpers.hpp @@ -122,7 +122,7 @@ public: int interp_mode, int border_mode); private: - cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_; + cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_, d_src_; }; @@ -153,7 +153,7 @@ public: int interp_mode, int border_mode); private: - cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_; + cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_, d_src_; }; @@ -186,7 +186,7 @@ public: int interp_mode, int border_mode); private: - cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_; + cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_, d_src_; }; #include "warpers_inl.hpp"