From 5434a9a5ecfe04b7585f4347cd0226f6bb6ff38a Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 26 Mar 2012 09:19:33 +0000 Subject: [PATCH] Fixed gpu::matchTemplate for correct handling of big templates. Added tests --- modules/gpu/src/cuda/match_template.cu | 18 +++++---- modules/gpu/src/cuda/matrix_reductions.cu | 2 +- modules/gpu/src/match_template.cpp | 8 ++-- modules/gpu/test/test_imgproc.cpp | 46 +++++++++++++++++++++++ 4 files changed, 61 insertions(+), 13 deletions(-) diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpu/src/cuda/match_template.cu index afa1d1f7c0..3d4f8eb43b 100644 --- a/modules/gpu/src/cuda/match_template.cu +++ b/modules/gpu/src/cuda/match_template.cu @@ -216,7 +216,7 @@ namespace cv { namespace gpu { namespace device // Prepared_SQDIFF template - __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep image_sqsum, unsigned int templ_sqsum, DevMem2Df result) + __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep image_sqsum, unsigned long long templ_sqsum, DevMem2Df result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -232,7 +232,7 @@ namespace cv { namespace gpu { namespace device } template - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream) + void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream) { const dim3 threads(32, 8); const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); @@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, DevMem2Df result, int cn, + void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream) { - typedef void (*caller_t)(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream); + typedef void (*caller_t)(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream); static const caller_t callers[] = { @@ -284,7 +284,9 @@ namespace cv { namespace gpu { namespace device template - __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const PtrStep image_sqsum, unsigned int templ_sqsum, DevMem2Df result) + __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( + int w, int h, const PtrStep image_sqsum, + unsigned long long templ_sqsum, DevMem2Df result) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -301,7 +303,7 @@ namespace cv { namespace gpu { namespace device } template - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, + void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream) { const dim3 threads(32, 8); @@ -315,10 +317,10 @@ namespace cv { namespace gpu { namespace device } - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, + void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream) { - typedef void (*caller_t)(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream); + typedef void (*caller_t)(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream); static const caller_t callers[] = { 0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4> diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 6d3d7c5e62..fbf5ce5240 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -1739,7 +1739,7 @@ namespace cv { namespace gpu { namespace device template void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn) { - typedef typename SumType::R R; + typedef double R; dim3 threads, grid; estimateThreadCfg(src.cols, src.rows, threads, grid); diff --git a/modules/gpu/src/match_template.cpp b/modules/gpu/src/match_template.cpp index 7066586471..59b97e7192 100644 --- a/modules/gpu/src/match_template.cpp +++ b/modules/gpu/src/match_template.cpp @@ -62,10 +62,10 @@ namespace cv { namespace gpu { namespace device void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream); void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream); - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, DevMem2Df result, + void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream); - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned int templ_sqsum, DevMem2Df result, + void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_ image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_ image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream); @@ -248,7 +248,7 @@ namespace GpuMat img_sqsum; sqrIntegral(image.reshape(1), img_sqsum, stream); - unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0]; + unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; matchTemplate_CCORR_8U(image, templ, result, stream); matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream)); @@ -260,7 +260,7 @@ namespace GpuMat img_sqsum; sqrIntegral(image.reshape(1), img_sqsum, stream); - unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0]; + unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; matchTemplate_CCORR_8U(image, templ, result, stream); matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream)); diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index bd24f7d139..a4691e4fa8 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -2775,6 +2775,52 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, Combine( ALL_DEVICES, Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png"))))); + +class MatchTemplate_CanFindBigTemplate : public TestWithParam +{ + virtual void SetUp() + { + cv::gpu::setDevice(GetParam().deviceID()); + } +}; + +TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED) +{ + cv::Mat scene = readImage("matchtemplate/scene.jpg"); + cv::Mat templ = readImage("matchtemplate/template.jpg"); + + cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result; + cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF_NORMED); + + double minVal; + cv::Point minLoc; + cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0); + + ASSERT_GE(minVal, 0); + ASSERT_LT(minVal, 1e-3); + ASSERT_EQ(344, minLoc.x); + ASSERT_EQ(0, minLoc.y); +} + +TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF) +{ + cv::Mat scene = readImage("matchtemplate/scene.jpg"); + cv::Mat templ = readImage("matchtemplate/template.jpg"); + + cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result; + cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF); + + double minVal; + cv::Point minLoc; + cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0); + + ASSERT_GE(minVal, 0); + ASSERT_EQ(344, minLoc.x); + ASSERT_EQ(0, minLoc.y); +} + +INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES); + //////////////////////////////////////////////////////////////////////////// // MulSpectrums