From edd68e588b3c1c182b731f7e68567de706b0ae8d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 23 Jul 2010 07:06:33 +0000 Subject: [PATCH] Added implementation of remap on GPU. Minor fixes of convertTo. Added opencv_imgproc to gpu_test dependencies. --- modules/gpu/include/opencv2/gpu/gpu.hpp | 4 + modules/gpu/src/cuda/imgproc.cu | 85 +++++++++++++++++ modules/gpu/src/cuda/matrix_operations.cu | 106 +++++++++++----------- modules/gpu/src/imgproc_gpu.cpp | 75 +++++++++++++++ tests/gpu/CMakeLists.txt | 4 +- tests/gpu/src/convert_to.cpp | 15 +-- 6 files changed, 219 insertions(+), 70 deletions(-) create mode 100644 modules/gpu/src/cuda/imgproc.cu create mode 100644 modules/gpu/src/imgproc_gpu.cpp diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index ca6c9ad8df..6fd835a4a2 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -321,6 +321,10 @@ namespace cv friend struct StreamAccessor; }; + ////////////////////////////// Image processing ////////////////////////////// + + void CV_EXPORTS remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst); + //////////////////////////////// StereoBM_GPU //////////////////////////////// class CV_EXPORTS StereoBM_GPU diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu new file mode 100644 index 0000000000..5093cf654f --- /dev/null +++ b/modules/gpu/src/cuda/imgproc.cu @@ -0,0 +1,85 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "cuda_shared.hpp" + +using namespace cv::gpu; + +namespace imgproc +{ + texture tex; + + __global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height) + { + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; + if (x < width && y < height) + { + int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/ + + float xcoo = mapx[idx]; + float ycoo = mapy[idx]; + + out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex, xcoo, ycoo)); + } + } +} + +namespace cv { namespace gpu { namespace impl { + extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_& xmap, const DevMem2D_& ymap, DevMem2D dst, size_t width, size_t height) + { + dim3 block(16, 16, 1); + dim3 grid(1, 1, 1); + grid.x = divUp( width, block.x); + grid.y = divUp(height, block.y); + + ::imgproc::tex.filterMode = cudaFilterModeLinear; + ::imgproc::tex.addressMode[0] = ::imgproc::tex.addressMode[1] = cudaAddressModeWrap; + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, ::imgproc::tex, src.ptr, desc, width, height, src.step) ); + + ::imgproc::kernel_remap<<>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, width, height); + + cudaSafeCall( cudaThreadSynchronize() ); + cudaSafeCall( cudaUnbindTexture(::imgproc::tex) ); + } +}}} \ No newline at end of file diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index df68b997cd..ee758d798c 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -109,32 +109,32 @@ namespace mat_operators /////////////////////////////////////////////////////////////////////////// template - struct CalcTraits + struct ScaleTraits { - __device__ static DT calc(T src, double alpha, double beta) + __device__ static DT scale(T src, double alpha, double beta) { return (DT)__double2int_rn(alpha * src + beta); } }; template - struct CalcTraits + struct ScaleTraits { - __device__ static float calc(T src, double alpha, double beta) + __device__ static float scale(T src, double alpha, double beta) { return (float)(alpha * src + beta); } }; template - struct CalcTraits + struct ScaleTraits { - __device__ static double calc(T src, double alpha, double beta) + __device__ static double scale(T src, double alpha, double beta) { return alpha * src + beta; } }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=1}; @@ -142,7 +142,7 @@ namespace mat_operators typedef DT write_type; }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=4}; @@ -150,7 +150,7 @@ namespace mat_operators typedef char4 write_type; }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=4}; @@ -158,7 +158,7 @@ namespace mat_operators typedef char4 write_type; }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=4}; @@ -166,7 +166,7 @@ namespace mat_operators typedef char4 write_type; }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=2}; @@ -174,7 +174,7 @@ namespace mat_operators typedef short2 write_type; }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=2}; @@ -182,56 +182,48 @@ namespace mat_operators typedef short2 write_type; }; template - struct ConverterTraits + struct ReadWriteTraits { enum {shift=2}; typedef int2 read_type; typedef short2 write_type; }; - - template - struct Converter - { - __device__ static void convert(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta) - { - size_t x = threadIdx.x + blockIdx.x * blockDim.x; - size_t y = threadIdx.y + blockIdx.y * blockDim.y; - if (y < height) - { - const T* src = (const T*)(srcmat + src_step * y); - DT* dst = (DT*)(dstmat + dst_step * y); - if ((x * ConverterTraits::shift) + ConverterTraits::shift - 1 < width) - { - typename ConverterTraits::read_type srcn_el = ((const typename ConverterTraits::read_type*)src)[x]; - typename ConverterTraits::write_type dstn_el; - - const T* src1_el = (const T*) &srcn_el; - DT* dst1_el = (DT*) &dstn_el; - - for (int i = 0; i < ConverterTraits::shift; ++i) - dst1_el[i] = CalcTraits::calc(src1_el[i], alpha, beta); - - ((typename ConverterTraits::write_type*)dst)[x] = dstn_el; - } - else - { - for (int i = 0; i < ConverterTraits::shift - 1; ++i) - if ((x * ConverterTraits::shift) + i < width) - dst[(x * ConverterTraits::shift) + i] = CalcTraits::calc(src[(x * ConverterTraits::shift) + i], alpha, beta); - } - } - } - __host__ static inline dim3 calcGrid(size_t width, size_t height, dim3 block) - { - return dim3(divUp(width, block.x * ConverterTraits::shift), divUp(height, block.y)); - } - }; template __global__ static void kernel_convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta) { - Converter::convert(srcmat, src_step, dstmat, dst_step, width, height, alpha, beta); + typedef typename ReadWriteTraits::read_type read_type; + typedef typename ReadWriteTraits::write_type write_type; + const int shift = ReadWriteTraits::shift; + + const size_t x = threadIdx.x + blockIdx.x * blockDim.x; + const size_t y = threadIdx.y + blockIdx.y * blockDim.y; + + if (y < height) + { + const T* src = (const T*)(srcmat + src_step * y); + DT* dst = (DT*)(dstmat + dst_step * y); + if ((x * shift) + shift - 1 < width) + { + read_type srcn_el = ((read_type*)src)[x]; + write_type dstn_el; + + const T* src1_el = (const T*) &srcn_el; + DT* dst1_el = (DT*) &dstn_el; + + for (int i = 0; i < shift; ++i) + dst1_el[i] = ScaleTraits::scale(src1_el[i], alpha, beta); + + ((write_type*)dst)[x] = dstn_el; + } + else + { + for (int i = 0; i < shift - 1; ++i) + if ((x * shift) + i < width) + dst[(x * shift) + i] = ScaleTraits::scale(src[(x * shift) + i], alpha, beta); + } + } } } // namespace mat_operators @@ -373,10 +365,14 @@ namespace cv template void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta) { + const int shift = ::mat_operators::ReadWriteTraits::shift; + dim3 block(32, 8); - dim3 grid = ::mat_operators::Converter::calcGrid(width, height, block); + dim3 grid(divUp(width, block.x * shift), divUp(height, block.y)); + ::mat_operators::kernel_convert_to<<>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta); - cudaSafeCall( cudaThreadSynchronize() ); + + cudaSafeCall( cudaThreadSynchronize() ); } extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta) @@ -409,7 +405,7 @@ namespace cv CvtFunc func = tab[sdepth][ddepth]; if (func == 0) - cv::gpu::error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__); + cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__); func(src, dst, width, height, alpha, beta); } } // namespace impl diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp new file mode 100644 index 0000000000..7874e0bb47 --- /dev/null +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -0,0 +1,75 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) + +namespace cv +{ + namespace gpu + { + remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); } + } + +} + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace impl { + extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_& xmap, const DevMem2D_& ymap, DevMem2D dst, size_t width, size_t height); +}}} + + +void cv::gpu::remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst) +{ + CV_Assert((!xmap.data || xmap.size() == ymap.size())); + dst.create(xmap.size(), src.type()); + CV_Assert(dst.data != src.data ); + + impl::remap_gpu(src, xmap, ymap, dst, dst.cols, dst.rows); +} + +#endif /* !defined (HAVE_CUDA) */ \ No newline at end of file diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 1069df5db0..d20c65020b 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -36,10 +36,10 @@ set_target_properties(${the_target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/" ) -add_dependencies(${the_target} opencv_ts opencv_gpu opencv_highgui) +add_dependencies(${the_target} opencv_ts opencv_gpu opencv_highgui opencv_imgproc) # Add the required libraries for linking: -target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} opencv_ts opencv_gpu opencv_highgui) +target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} opencv_ts opencv_gpu opencv_highgui opencv_imgproc) enable_testing() get_target_property(LOC ${the_target} LOCATION) diff --git a/tests/gpu/src/convert_to.cpp b/tests/gpu/src/convert_to.cpp index b2e231a065..d9fa61d126 100644 --- a/tests/gpu/src/convert_to.cpp +++ b/tests/gpu/src/convert_to.cpp @@ -50,22 +50,11 @@ void CV_GpuMatOpConvertTo::run( int /* start_from */) Mat cpumatdst; GpuMat gpumatdst; - //TickMeter tm; - //tm.start(); - //for(int i = 0; i < 50; ++i) - cpumatsrc.convertTo(cpumatdst, dst_type, alpha, beta); - //tm.stop(); - //cout << "SRC_TYPE=" << types_str[i] << "C" << c << " DST_TYPE=" << types_str[j] << endl << "\tCPU FPS = " << 50.0/tm.getTimeSec() << endl; - - //tm.reset(); + cpumatsrc.convertTo(cpumatdst, dst_type, alpha, beta); try { - //tm.start(); - //for(int i = 0; i < 50; ++i) - gpumatsrc.convertTo(gpumatdst, dst_type, alpha, beta); - //tm.stop(); - //cout << "\tGPU FPS = " << 50.0/tm.getTimeSec() << endl; + gpumatsrc.convertTo(gpumatdst, dst_type, alpha, beta); } catch(cv::Exception& e) {