From 5c38519c65c1c0de59157805a56025d4af9285da Mon Sep 17 00:00:00 2001 From: Konstantin Matskevich Date: Fri, 31 Jan 2014 08:46:27 +0400 Subject: [PATCH] some changes --- .../objdetect/include/opencv2/objdetect.hpp | 4 +- .../objdetect/perf/opencl/perf_hogdetect.cpp | 2 +- modules/objdetect/src/hog.cpp | 103 +++++++++++------- .../test/opencl/test_hogdetector.cpp | 14 --- 4 files changed, 69 insertions(+), 54 deletions(-) diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index e811c8ba52..4857897031 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -327,7 +327,7 @@ public: Size padding = Size(), const std::vector& searchLocations=std::vector()) const; //ocl - virtual bool ocl_detect(const UMat& img, std::vector &hits, + virtual bool ocl_detect(InputArray img, std::vector &hits, double hitThreshold = 0, Size winStride = Size()) const; //with result weights output CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, @@ -360,7 +360,7 @@ public: CV_PROP double L2HysThreshold; CV_PROP bool gammaCorrection; CV_PROP std::vector svmDetector; - CV_PROP std::vector oclSvmDetector; + UMat oclSvmDetector; CV_PROP int nlevels; diff --git a/modules/objdetect/perf/opencl/perf_hogdetect.cpp b/modules/objdetect/perf/opencl/perf_hogdetect.cpp index cdb9ed2eb9..5d1d095950 100644 --- a/modules/objdetect/perf/opencl/perf_hogdetect.cpp +++ b/modules/objdetect/perf/opencl/perf_hogdetect.cpp @@ -85,7 +85,7 @@ OCL_PERF_TEST(HOGFixture, HOG) OCL_TEST_CYCLE() hog.detectMultiScale(src, found_locations); std::sort(found_locations.begin(), found_locations.end(), RectLess()); -// SANITY_CHECK(found_locations, 1 + DBL_EPSILON); + SANITY_CHECK(found_locations, 1 + DBL_EPSILON); } } diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp index ae24bdde24..3b3322543b 100644 --- a/modules/objdetect/src/hog.cpp +++ b/modules/objdetect/src/hog.cpp @@ -113,10 +113,7 @@ void HOGDescriptor::setSVMDetector(InputArray _svmDetector) { _svmDetector.getMat().convertTo(svmDetector, CV_32F); - std::vector detector; - _svmDetector.getMat().copyTo(detector); - - std::vector detector_reordered(detector.size()); + Mat detector_reordered(1, (int)svmDetector.size(), CV_32FC1); size_t block_hist_size = getBlockHistogramSize(blockSize, cellSize, nbins); cv::Size blocks_per_img = numPartsWithin(winSize, blockSize, blockStride); @@ -124,12 +121,12 @@ void HOGDescriptor::setSVMDetector(InputArray _svmDetector) for (int i = 0; i < blocks_per_img.height; ++i) for (int j = 0; j < blocks_per_img.width; ++j) { - const float *src = &detector[0] + (j * blocks_per_img.height + i) * block_hist_size; - float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size; + const float *src = &svmDetector[0] + (j * blocks_per_img.height + i) * block_hist_size; + float *dst = (float*)detector_reordered.data + (i * blocks_per_img.width + j) * block_hist_size; for (size_t k = 0; k < block_hist_size; ++k) dst[k] = src[k]; } - Mat(detector_reordered).convertTo(oclSvmDetector, CV_32F); + detector_reordered.copyTo(oclSvmDetector); CV_Assert(checkDetectorSize()); } @@ -1119,14 +1116,16 @@ static bool ocl_computeGradient(InputArray img, UMat grad, UMat qangle, int nbin static bool ocl_compute_hists(int nbins, int block_stride_x, int block_stride_y, int height, int width, UMat grad, UMat qangle, UMat gauss_w_lut, UMat block_hists, size_t block_hist_size) { + ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc); + if(k.empty()) + return false; bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; cv::String opts; if(is_cpu) opts = "-D CPU "; else - opts = cv::format("-D WAVE_SIZE=%d", 32); - - ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); + opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple()); + k.create("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); if(k.empty()) return false; @@ -1177,13 +1176,6 @@ static int power_2up(unsigned int n) static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_y, int height, int width, UMat block_hists, float threshold) { - bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; - cv::String opts; - if(is_cpu) - opts = "-D CPU "; - else - opts = cv::format("-D WAVE_SIZE=%d", 32); - int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; @@ -1194,12 +1186,22 @@ static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_ size_t localThreads[3] = { 1, 1, 1 }; int idx = 0; + bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; + cv::String opts; ocl::Kernel k; if ( nbins == 9 ) { + k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, ""); + if(k.empty()) + return false; + if(is_cpu) + opts = "-D CPU "; + else + opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple()); k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); if(k.empty()) return false; + int blocks_in_group = NTHREADS / block_hist_size; nthreads = blocks_in_group * block_hist_size; int num_groups = (img_block_width * img_block_height + blocks_in_group - 1)/blocks_in_group; @@ -1208,9 +1210,17 @@ static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_ } else { + k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, ""); + if(k.empty()) + return false; + if(is_cpu) + opts = "-D CPU "; + else + opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple()); k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); if(k.empty()) return false; + nthreads = power_2up(block_hist_size); globalThreads[0] = img_block_width * nthreads; globalThreads[1] = img_block_height; @@ -1377,9 +1387,8 @@ void HOGDescriptor::compute(InputArray _img, std::vector& descriptors, padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height); Size paddedImgSize(imgSize.width + padding.width*2, imgSize.height + padding.height*2); - if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat() && + CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat(), ocl_compute(_img, winStride, descriptors, DESCR_FORMAT_COL_BY_COL)) - return; Mat img = _img.getMat(); HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride); @@ -1605,16 +1614,12 @@ private: static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x, int height, int width, - const UMat& block_hists, const std::vector& _detector, + const UMat& block_hists, UMat detector, float free_coef, float threshold, UMat& labels, Size descr_size, int block_hist_size) { int nthreads; bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; cv::String opts; - if(is_cpu) - opts = "-D CPU "; - else - opts = cv::format("-D WAVE_SIZE=%d", 32); ocl::Kernel k; int idx = 0; @@ -1622,22 +1627,45 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y { case 180: nthreads = 180; + k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, ""); + if(k.empty()) + return false; + if(is_cpu) + opts = "-D CPU "; + else + opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple()); k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); if(k.empty()) return false; idx = k.set(idx, descr_size.width); idx = k.set(idx, descr_size.height); break; + case 252: nthreads = 256; + k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, ""); + if(k.empty()) + return false; + if(is_cpu) + opts = "-D CPU "; + else + opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple()); k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); if(k.empty()) return false; idx = k.set(idx, descr_size.width); idx = k.set(idx, descr_size.height); break; + default: nthreads = 256; + k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, ""); + if(k.empty()) + return false; + if(is_cpu) + opts = "-D CPU "; + else + opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple()); k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts); if(k.empty()) return false; @@ -1655,8 +1683,6 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 }; size_t localThreads[3] = { nthreads, 1, 1 }; - UMat detector(_detector, true); - idx = k.set(idx, block_hist_size); idx = k.set(idx, img_win_width); idx = k.set(idx, img_block_width); @@ -1671,7 +1697,7 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y return k.run(2, globalThreads, localThreads, false); } -bool HOGDescriptor::ocl_detect(const UMat& img, std::vector &hits, +bool HOGDescriptor::ocl_detect(InputArray img, std::vector &hits, double hit_threshold, Size win_stride) const { hits.clear(); @@ -1743,20 +1769,21 @@ bool HOGDescriptor::ocl_detectMultiScale(InputArray _img, std::vector &fou { std::vector all_candidates; std::vector locations; - UMat img = _img.getUMat(), image_scale; - image_scale.create(img.size(), img.type()); + UMat image_scale; + Size imgSize = _img.size(); + image_scale.create(imgSize, _img.type()); for (size_t i = 0; i 1 && winStride.width % blockStride.width == 0 && - winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat() && - ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold)) - return; + if(winStride == Size()) + winStride = blockStride; + + CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 && + winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat(), + ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold)); std::vector allCandidates; std::vector tempScales; diff --git a/modules/objdetect/test/opencl/test_hogdetector.cpp b/modules/objdetect/test/opencl/test_hogdetector.cpp index a9e4e9ffd7..8568352b69 100644 --- a/modules/objdetect/test/opencl/test_hogdetector.cpp +++ b/modules/objdetect/test/opencl/test_hogdetector.cpp @@ -110,20 +110,6 @@ OCL_TEST_P(HOG, Detect) OCL_OFF(hog.detectMultiScale(img, cpu_found, 0, Size(8, 8), Size(0, 0), 1.05, 6)); OCL_ON(hog.detectMultiScale(uimg, gpu_found, 0, Size(8, 8), Size(0, 0), 1.05, 6)); - for (size_t i = 0; i < cpu_found.size(); i++) - { - Rect r = cpu_found[i]; - rectangle(img, r.tl(), r.br(), Scalar(0, 255, 0), 3); - } - imshow("cpu", img); - Mat imgs(img); - for (size_t i = 0; i < gpu_found.size(); i++) - { - Rect r = cpu_found[i]; - rectangle(imgs, r.tl(), r.br(), Scalar(0, 255, 0), 3); - } - imshow("gpu", imgs); - waitKey(0); EXPECT_LT(checkRectSimilarity(img.size(), cpu_found, gpu_found), 1.0); }