From f7b40cdc63ec3e9f8631ac73fab8d4e2b98e8913 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 26 Mar 2013 11:51:02 +0800 Subject: [PATCH 1/7] Add a macro to call additional barrier function on the fly --- modules/nonfree/src/opencl/surf.cl | 222 +++++++++++++++++++------ modules/nonfree/src/surf.ocl.cpp | 2 +- modules/nonfree/test/test_surf.ocl.cpp | 7 +- 3 files changed, 178 insertions(+), 53 deletions(-) diff --git a/modules/nonfree/src/opencl/surf.cl b/modules/nonfree/src/opencl/surf.cl index e917864d73..140a4d746c 100644 --- a/modules/nonfree/src/opencl/surf.cl +++ b/modules/nonfree/src/opencl/surf.cl @@ -747,21 +747,42 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc #define op(A, B) (*A)+(B) data[tid] = *partial_reduction; barrier(CLK_LOCAL_MEM_FENCE); - +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif if (tid < 16) + { data[tid] = *partial_reduction = op(partial_reduction, data[tid + 16]); +#if WAVE_SIZE < 16 + } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) + { +#endif data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8 ]); +#if WAVE_SIZE < 8 + } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) + { +#endif data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4 ]); +#if WAVE_SIZE < 4 + } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) + { +#endif data[tid] = *partial_reduction = op(partial_reduction, data[tid + 2 ]); +#if WAVE_SIZE < 2 + } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) + { +#endif data[tid] = *partial_reduction = op(partial_reduction, data[tid + 1 ]); + } +#undef WAVE_SIZE #undef op } @@ -1087,44 +1108,67 @@ void reduce_sum25( int tid ) { +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif // first step is to reduce from 25 to 16 - if (tid < 9) // use 9 threads + if (tid < 9) { sdata1[tid] += sdata1[tid + 16]; sdata2[tid] += sdata2[tid + 16]; sdata3[tid] += sdata3[tid + 16]; sdata4[tid] += sdata4[tid + 16]; +#if WAVE_SIZE < 16 } - - // sum (reduce) from 16 to 1 (unrolled - aligned to a half-warp) + barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { +#endif sdata1[tid] += sdata1[tid + 8]; - sdata1[tid] += sdata1[tid + 4]; - sdata1[tid] += sdata1[tid + 2]; - sdata1[tid] += sdata1[tid + 1]; sdata2[tid] += sdata2[tid + 8]; - sdata2[tid] += sdata2[tid + 4]; - sdata2[tid] += sdata2[tid + 2]; - sdata2[tid] += sdata2[tid + 1]; sdata3[tid] += sdata3[tid + 8]; - sdata3[tid] += sdata3[tid + 4]; - sdata3[tid] += sdata3[tid + 2]; - sdata3[tid] += sdata3[tid + 1]; sdata4[tid] += sdata4[tid + 8]; +#if WAVE_SIZE < 8 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 4) + { +#endif + sdata1[tid] += sdata1[tid + 4]; + sdata2[tid] += sdata2[tid + 4]; + sdata3[tid] += sdata3[tid + 4]; sdata4[tid] += sdata4[tid + 4]; +#if WAVE_SIZE < 4 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 2) + { +#endif + sdata1[tid] += sdata1[tid + 2]; + sdata2[tid] += sdata2[tid + 2]; + sdata3[tid] += sdata3[tid + 2]; sdata4[tid] += sdata4[tid + 2]; +#if WAVE_SIZE < 2 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 1) + { +#endif + sdata1[tid] += sdata1[tid + 1]; + sdata2[tid] += sdata2[tid + 1]; + sdata3[tid] += sdata3[tid + 1]; sdata4[tid] += sdata4[tid + 1]; } +#undef WAVE_SIZE } __kernel void compute_descriptors64( IMAGE_INT8 imgTex, - volatile __global float * descriptors, + __global float * descriptors, __global const float * keypoints, int descriptors_step, int keypoints_step, @@ -1158,14 +1202,13 @@ __kernel sdyabs[tid] = fabs(sdy[tid]); // |dy| array } barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 25) - { + reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid); - } + barrier(CLK_LOCAL_MEM_FENCE); if (tid < 25) { - volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2); + __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2); // write dx, dy, |dx|, |dy| if (tid == 0) @@ -1180,7 +1223,7 @@ __kernel __kernel void compute_descriptors128( IMAGE_INT8 imgTex, - __global volatile float * descriptors, + __global float * descriptors, __global float * keypoints, int descriptors_step, int keypoints_step, @@ -1229,13 +1272,15 @@ __kernel sd2[tid] = sdx[tid]; sdabs2[tid] = fabs(sdx[tid]); } - //barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid); - //barrier(CLK_LOCAL_MEM_FENCE); - - volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3); + barrier(CLK_LOCAL_MEM_FENCE); + __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3); + if (tid < 25) + { // write dx (dy >= 0), |dx| (dy >= 0), dx (dy < 0), |dx| (dy < 0) if (tid == 0) { @@ -1259,11 +1304,14 @@ __kernel sd2[tid] = sdy[tid]; sdabs2[tid] = fabs(sdy[tid]); } - //barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid); - //barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 25) + { // write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy| (dx < 0) if (tid == 0) { @@ -1274,6 +1322,103 @@ __kernel } } } +void reduce_sum128(volatile __local float* smem, int tid) +{ +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif + if (tid < 64) + { + smem[tid] += smem[tid + 64]; +#if WAVE_SIZE < 64 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 32) + { +#endif + smem[tid] += smem[tid + 32]; +#if WAVE_SIZE < 32 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) + { +#endif + smem[tid] += smem[tid + 16]; +#if WAVE_SIZE < 16 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) + { +#endif + smem[tid] += smem[tid + 8]; +#if WAVE_SIZE < 8 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 4) + { +#endif + smem[tid] += smem[tid + 4]; +#if WAVE_SIZE < 4 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 2) + { +#endif + smem[tid] += smem[tid + 2]; +#if WAVE_SIZE < 2 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 1) + { +#endif + smem[tid] += smem[tid + 1]; + } +} +void reduce_sum64(volatile __local float* smem, int tid) +{ +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif + if (tid < 32) + { + smem[tid] += smem[tid + 32]; +#if WAVE_SIZE < 32 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) + { +#endif + smem[tid] += smem[tid + 16]; +#if WAVE_SIZE < 16 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) + { +#endif + smem[tid] += smem[tid + 8]; +#if WAVE_SIZE < 8 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 4) + { +#endif + smem[tid] += smem[tid + 4]; +#if WAVE_SIZE < 4 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 2) + { +#endif + smem[tid] += smem[tid + 2]; +#if WAVE_SIZE < 2 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 1) + { +#endif + smem[tid] += smem[tid + 1]; + } +} __kernel void normalize_descriptors128(__global float * descriptors, int descriptors_step) @@ -1288,22 +1433,10 @@ __kernel sqDesc[get_local_id(0)] = lookup * lookup; barrier(CLK_LOCAL_MEM_FENCE); - if (get_local_id(0) < 64) - sqDesc[get_local_id(0)] += sqDesc[get_local_id(0) + 64]; + reduce_sum128(sqDesc, get_local_id(0)); barrier(CLK_LOCAL_MEM_FENCE); - // reduction to get total - if (get_local_id(0) < 32) - { - volatile __local float* smem = sqDesc; - smem[get_local_id(0)] += smem[get_local_id(0) + 32]; - smem[get_local_id(0)] += smem[get_local_id(0) + 16]; - smem[get_local_id(0)] += smem[get_local_id(0) + 8]; - smem[get_local_id(0)] += smem[get_local_id(0) + 4]; - smem[get_local_id(0)] += smem[get_local_id(0) + 2]; - smem[get_local_id(0)] += smem[get_local_id(0) + 1]; - } // compute length (square root) volatile __local float len; @@ -1329,18 +1462,9 @@ __kernel sqDesc[get_local_id(0)] = lookup * lookup; barrier(CLK_LOCAL_MEM_FENCE); - // reduction to get total - if (get_local_id(0) < 32) - { - volatile __local float* smem = sqDesc; - smem[get_local_id(0)] += smem[get_local_id(0) + 32]; - smem[get_local_id(0)] += smem[get_local_id(0) + 16]; - smem[get_local_id(0)] += smem[get_local_id(0) + 8]; - smem[get_local_id(0)] += smem[get_local_id(0) + 4]; - smem[get_local_id(0)] += smem[get_local_id(0) + 2]; - smem[get_local_id(0)] += smem[get_local_id(0) + 1]; - } + reduce_sum64(sqDesc, get_local_id(0)); + barrier(CLK_LOCAL_MEM_FENCE); // compute length (square root) volatile __local float len; diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index d8336b9387..b72d132d64 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -75,7 +75,7 @@ namespace cv } -static inline int divUp(size_t total, size_t grain) +static inline size_t divUp(size_t total, size_t grain) { return (total + grain - 1) / grain; } diff --git a/modules/nonfree/test/test_surf.ocl.cpp b/modules/nonfree/test/test_surf.ocl.cpp index 76ed37de45..0d09cc8b93 100644 --- a/modules/nonfree/test/test_surf.ocl.cpp +++ b/modules/nonfree/test/test_surf.ocl.cpp @@ -144,9 +144,10 @@ PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright } }; -TEST_P(SURF, DISABLED_Detector) +TEST_P(SURF, Detector) { - cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE); + // the data path should be opencv/samples + cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "c/fruits.jpg", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); cv::ocl::SURF_OCL surf; @@ -179,7 +180,7 @@ TEST_P(SURF, DISABLED_Detector) TEST_P(SURF, DISABLED_Descriptor) { - cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE); + cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "c/fruits.jpg", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); cv::ocl::SURF_OCL surf; From 0c19a07bf4ae9fcdf7507d872288e2615d5ca74e Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 26 Mar 2013 15:36:49 +0800 Subject: [PATCH 2/7] Add a function to query ocl device info Currently the function only supports wavefront size query --- .../ocl/include/opencv2/ocl/private/util.hpp | 10 +++++ modules/ocl/src/initialization.cpp | 40 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 405d92ccd5..62e69a8a24 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -123,6 +123,16 @@ namespace cv // returns whether the current context supports image2d_t format or not bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); + // the enums are used to query device information + // currently only support wavefront size queries + enum DEVICE_INFO + { + WAVEFRONT_SIZE, //in AMD speak + WARP_SIZE = WAVEFRONT_SIZE //in nvidia speak + }; + //info should have been pre-allocated + void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info); + }//namespace ocl }//namespace cv diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index d3fc9c2a2c..763c965e92 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -353,6 +353,46 @@ namespace cv { return &(Context::getContext()->impl->clCmdQueue); } + + void queryDeviceInfo(DEVICE_INFO info_type, void* info) + { + static Info::Impl* impl = Context::getContext()->impl; + switch(info_type) + { + case WAVEFRONT_SIZE: + { +#ifndef CL_DEVICE_WAVEFRONT_WIDTH_AMD + openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], + CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0)); +#else + const int EXT_LEN = 4096 + 1 ; + char extends_set[EXT_LEN]; + size_t extends_size; + openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size)); + extends_set[EXT_LEN - 1] = 0; + if(std::string(extends_set).find("cl_nv_device_attribute_query") != std::string::npos) + { + openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], + CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0)); + } + else + { + // if no way left for us to query the warp size, we can get it from kernel group info + static const char * _kernel_string = "__kernel void test_func() {}"; + cl_kernel kernel; + kernel = openCLGetKernelFromSource(Context::getContext(), &_kernel_string, "test_func"); + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum], + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL)); + } +#endif + } + break; + default: + CV_Error(-1, "Invalid device info type"); + break; + } + } + void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size) { cl_int status; From 9698079ca51db652b050b53ab4de75948fc2be7b Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 26 Mar 2013 15:48:15 +0800 Subject: [PATCH 3/7] Pass warp size into SURF --- modules/nonfree/src/surf.ocl.cpp | 38 ++++++++++++++++++++------ modules/nonfree/test/test_surf.ocl.cpp | 2 +- modules/ocl/src/initialization.cpp | 2 +- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index b72d132d64..533d0c1aee 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -57,19 +57,29 @@ namespace cv ///////////////////////////OpenCL kernel strings/////////////////////////// extern const char *surf; - const char* noImage2dOption = "-D DISABLE_IMAGE2D"; + const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; + static char SURF_OPTIONS [1024] = ""; + static bool USE_IMAGE2d = false; static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth) { - if(support_image2d()) + char * pSURF_OPTIONS = SURF_OPTIONS; + static bool OPTION_INIT = false; + if(!OPTION_INIT) { - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth); - } - else + if( !USE_IMAGE2d ) { - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, noImage2dOption); + strcat(pSURF_OPTIONS, noImage2dOption); + pSURF_OPTIONS += strlen(noImage2dOption); + } + + size_t wave_size = 0; + queryDeviceInfo(DEVICE_INFO::WAVEFRONT_SIZE, &wave_size); + sprintf(pSURF_OPTIONS, " -D WAVE_SIZE=%d", static_cast(wave_size)); + OPTION_INIT = true; } + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS); } } } @@ -152,8 +162,20 @@ public: integral(img, surf_.sum); if(support_image2d()) { - bindImgTex(img, imgTex); - bindImgTex(surf_.sum, sumTex); + try + { + bindImgTex(img, imgTex); + bindImgTex(surf_.sum, sumTex); + USE_IMAGE2d = true; + } + catch (const cv::Exception& e) + { + USE_IMAGE2d = false; + if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217) + { + throw e; + } + } } maskSumTex = 0; diff --git a/modules/nonfree/test/test_surf.ocl.cpp b/modules/nonfree/test/test_surf.ocl.cpp index 0d09cc8b93..9f1b3f170d 100644 --- a/modules/nonfree/test/test_surf.ocl.cpp +++ b/modules/nonfree/test/test_surf.ocl.cpp @@ -178,7 +178,7 @@ TEST_P(SURF, Detector) EXPECT_GT(matchedRatio, 0.99); } -TEST_P(SURF, DISABLED_Descriptor) +TEST_P(SURF, Descriptor) { cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "c/fruits.jpg", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 763c965e92..b5eaae6e8b 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -361,7 +361,7 @@ namespace cv { case WAVEFRONT_SIZE: { -#ifndef CL_DEVICE_WAVEFRONT_WIDTH_AMD +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0)); #else From 7476bf5cd71b9a40909da523f642f3dc09da2a2a Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 26 Mar 2013 16:40:30 +0800 Subject: [PATCH 4/7] Fix compiler errors --- modules/nonfree/src/surf.ocl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 533d0c1aee..9d8fe65a12 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -69,14 +69,14 @@ namespace cv if(!OPTION_INIT) { if( !USE_IMAGE2d ) - { + { strcat(pSURF_OPTIONS, noImage2dOption); pSURF_OPTIONS += strlen(noImage2dOption); } size_t wave_size = 0; - queryDeviceInfo(DEVICE_INFO::WAVEFRONT_SIZE, &wave_size); - sprintf(pSURF_OPTIONS, " -D WAVE_SIZE=%d", static_cast(wave_size)); + queryDeviceInfo(WAVEFRONT_SIZE, &wave_size); + std::sprintf(pSURF_OPTIONS, " -D WAVE_SIZE=%d", static_cast(wave_size)); OPTION_INIT = true; } openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS); From 8ffc15371ddbac25d140bb797ebb73910397c89b Mon Sep 17 00:00:00 2001 From: peng xiao Date: Tue, 26 Mar 2013 17:23:38 +0800 Subject: [PATCH 5/7] Fix compiler errors --- modules/nonfree/src/surf.ocl.cpp | 1 + modules/ocl/src/initialization.cpp | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 9d8fe65a12..4acb4e36be 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -43,6 +43,7 @@ // //M*/ #include "precomp.hpp" +#include #ifdef HAVE_OPENCV_OCL diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index b5eaae6e8b..9a790f4ee2 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -362,9 +362,13 @@ namespace cv case WAVEFRONT_SIZE: { #ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], - CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0)); -#else + try + { + openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], + CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0)); + } + catch(const cv::Exception&) +#elif defined (CL_DEVICE_WARP_SIZE_NV) const int EXT_LEN = 4096 + 1 ; char extends_set[EXT_LEN]; size_t extends_size; @@ -376,6 +380,7 @@ namespace cv CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0)); } else +#endif { // if no way left for us to query the warp size, we can get it from kernel group info static const char * _kernel_string = "__kernel void test_func() {}"; @@ -384,7 +389,7 @@ namespace cv openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum], CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL)); } -#endif + } break; default: From ad58c084a95a5e8076f25f1bd9b16211f902b5d8 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 27 Mar 2013 08:56:31 +0800 Subject: [PATCH 6/7] Fix compiler errors --- modules/nonfree/src/surf.ocl.cpp | 2 +- modules/nonfree/test/test_surf.ocl.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 4acb4e36be..78864c6f96 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -43,7 +43,7 @@ // //M*/ #include "precomp.hpp" -#include +#include #ifdef HAVE_OPENCV_OCL diff --git a/modules/nonfree/test/test_surf.ocl.cpp b/modules/nonfree/test/test_surf.ocl.cpp index 9f1b3f170d..d5b06fcbec 100644 --- a/modules/nonfree/test/test_surf.ocl.cpp +++ b/modules/nonfree/test/test_surf.ocl.cpp @@ -147,7 +147,7 @@ PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright TEST_P(SURF, Detector) { // the data path should be opencv/samples - cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "c/fruits.jpg", cv::IMREAD_GRAYSCALE); + cv::Mat image = cv::imread("../../../samples/c/fruits.jpg", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); cv::ocl::SURF_OCL surf; @@ -180,7 +180,7 @@ TEST_P(SURF, Detector) TEST_P(SURF, Descriptor) { - cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "c/fruits.jpg", cv::IMREAD_GRAYSCALE); + cv::Mat image = cv::imread("../../../samples/c/fruits.jpg", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); cv::ocl::SURF_OCL surf; From f2ecf4f905455c637c96da7db9f579058acd745e Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 27 Mar 2013 13:25:08 +0800 Subject: [PATCH 7/7] Disable ocl::SURF accurate test --- modules/nonfree/test/test_surf.ocl.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/nonfree/test/test_surf.ocl.cpp b/modules/nonfree/test/test_surf.ocl.cpp index d5b06fcbec..76ed37de45 100644 --- a/modules/nonfree/test/test_surf.ocl.cpp +++ b/modules/nonfree/test/test_surf.ocl.cpp @@ -144,10 +144,9 @@ PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright } }; -TEST_P(SURF, Detector) +TEST_P(SURF, DISABLED_Detector) { - // the data path should be opencv/samples - cv::Mat image = cv::imread("../../../samples/c/fruits.jpg", cv::IMREAD_GRAYSCALE); + cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); cv::ocl::SURF_OCL surf; @@ -178,9 +177,9 @@ TEST_P(SURF, Detector) EXPECT_GT(matchedRatio, 0.99); } -TEST_P(SURF, Descriptor) +TEST_P(SURF, DISABLED_Descriptor) { - cv::Mat image = cv::imread("../../../samples/c/fruits.jpg", cv::IMREAD_GRAYSCALE); + cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); cv::ocl::SURF_OCL surf;