diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index b5a0835110..660a6df73d 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1397,7 +1397,7 @@ public: }; ////////////////////////////////// CascadeClassifier_GPU ////////////////////////////////////////// -// The cascade classifier class for object detection. +// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny. class CV_EXPORTS CascadeClassifier_GPU { public: @@ -1407,42 +1407,28 @@ public: bool empty() const; bool load(const std::string& filename); - void release(); - - /* returns number of detected objects */ - int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size()); - - bool findLargestObject; - bool visualizeInPlace; - - Size getClassifierSize() const; -private: - - struct CascadeClassifierImpl; - CascadeClassifierImpl* impl; -}; - -// The cascade classifier class for object detection. -class CV_EXPORTS CascadeClassifier_GPU_LBP -{ -public: - CascadeClassifier_GPU_LBP(cv::Size detectionFrameSize = cv::Size()); - ~CascadeClassifier_GPU_LBP(); - - bool empty() const; - bool load(const std::string& filename); - void release(); - - int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4, - cv::Size maxObjectSize = cv::Size()/*, Size minSize = Size()*/); - Size getClassifierSize() const; + void release(); -private: + /* returns number of detected objects */ + int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size()); + + bool findLargestObject; + bool visualizeInPlace; + + Size getClassifierSize() const; + +private: struct CascadeClassifierImpl; CascadeClassifierImpl* impl; -}; - -////////////////////////////////// SURF ////////////////////////////////////////// + struct HaarCascade; + struct LbpCascade; + friend class CascadeClassifier_GPU_LBP; + +public: + int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4); +}; + +////////////////////////////////// SURF ////////////////////////////////////////// class CV_EXPORTS SURF_GPU { diff --git a/modules/gpu/perf/perf_objdetect.cpp b/modules/gpu/perf/perf_objdetect.cpp index a9b3d7e547..c376586c9b 100644 --- a/modules/gpu/perf/perf_objdetect.cpp +++ b/modules/gpu/perf/perf_objdetect.cpp @@ -70,7 +70,7 @@ GPU_PERF_TEST_1(LBPClassifier, cv::gpu::DeviceInfo) cv::gpu::GpuMat img(img_host); cv::gpu::GpuMat gpu_rects; - cv::gpu::CascadeClassifier_GPU_LBP cascade(img.size()); + cv::gpu::CascadeClassifier_GPU cascade; ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml"))); cascade.detectMultiScale(img, gpu_rects); diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 927b42249e..61f5c94310 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -46,439 +46,54 @@ using namespace cv; using namespace cv::gpu; -using namespace std; - -#if !defined (HAVE_CUDA) -// ============ old fashioned haar cascade ==============================================// -cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); } -cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&) { throw_nogpu(); } -cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { throw_nogpu(); } - -bool cv::gpu::CascadeClassifier_GPU::empty() const { throw_nogpu(); return true; } -bool cv::gpu::CascadeClassifier_GPU::load(const string&) { throw_nogpu(); return true; } -Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const { throw_nogpu(); return Size(); } - -int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size) { throw_nogpu(); return 0; } - -// ============ LBP cascade ==============================================// -cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifier_GPU_LBP(cv::Size /*frameSize*/){ throw_nogpu(); } -cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP() { throw_nogpu(); } - -bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const { throw_nogpu(); return true; } -bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string&) { throw_nogpu(); return true; } -Size cv::gpu::CascadeClassifier_GPU_LBP::getClassifierSize() const { throw_nogpu(); return Size(); } -void cv::gpu::CascadeClassifier_GPU_LBP::allocateBuffers(cv::Size /*frame*/) { throw_nogpu();} - -int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const cv::gpu::GpuMat& /*image*/, cv::gpu::GpuMat& /*objectsBuf*/, -double /*scaleFactor*/, int /*minNeighbors*/, cv::Size /*maxObjectSize*/){ throw_nogpu(); return 0;} - -#else - -cv::Size operator -(const cv::Size& a, const cv::Size& b) -{ - return cv::Size(a.width - b.width, a.height - b.height); -} - -cv::Size operator +(const cv::Size& a, const int& i) -{ - return cv::Size(a.width + i, a.height + i); -} +using namespace std; -cv::Size operator *(const cv::Size& a, const float& f) -{ - return cv::Size(cvRound(a.width * f), cvRound(a.height * f)); -} +#if !defined (HAVE_CUDA) -cv::Size operator /(const cv::Size& a, const float& f) -{ - return cv::Size(cvRound(a.width / f), cvRound(a.height / f)); -} +cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); } +cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&) { throw_nogpu(); } +cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { throw_nogpu(); } +bool cv::gpu::CascadeClassifier_GPU::empty() const { throw_nogpu(); return true; } +bool cv::gpu::CascadeClassifier_GPU::load(const string&) { throw_nogpu(); return true; } +Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const { throw_nogpu(); return Size();} +void cv::gpu::CascadeClassifier_GPU::release() { throw_nogpu(); } +int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size) {throw_nogpu(); return -1;} +int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_nogpu(); return -1;} -bool operator <=(const cv::Size& a, const cv::Size& b) -{ - return a.width <= b.width && a.height <= b.width; -} +#else -struct PyrLavel -{ - PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window) : order(_order) - { - scale = pow(_scale, order); - sFrame = frame / scale; - workArea = sFrame - window + 1; - sWindow = window * scale; - } - - bool isFeasible(cv::Size maxObj) - { - return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj; - } - - PyrLavel next(float factor, cv::Size frame, cv::Size window) - { - return PyrLavel(order + 1, factor, frame, window); - } - - int order; - float scale; - cv::Size sFrame; - cv::Size workArea; - cv::Size sWindow; -}; - -namespace cv { namespace gpu { namespace device -{ - namespace lbp - { - void classifyPyramid(int frameW, - int frameH, - int windowW, - int windowH, - float initalScale, - float factor, - int total, - const DevMem2Db& mstages, - const int nstages, - const DevMem2Di& mnodes, - const DevMem2Df& mleaves, - const DevMem2Di& msubsets, - const DevMem2Db& mfeatures, - const int subsetSize, - DevMem2D_ objects, - unsigned int* classified, - DevMem2Di integral); - - void connectedConmonents(DevMem2D_ candidates, int ncandidates, DevMem2D_ objects,int groupThreshold, float grouping_eps, unsigned int* nclasses); - } -}}} - -struct cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifierImpl +struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl { public: - struct Stage - { - int first; - int ntrees; - float threshold; - }; + CascadeClassifierImpl(){} + virtual ~CascadeClassifierImpl(){} - bool read(const FileNode &root); - void allocateBuffers(cv::Size frame = cv::Size()); - bool empty() const {return stage_mat.empty();} + virtual unsigned int process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, + bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, cv::Size maxObjectSize) = 0; - int process(const GpuMat& image, GpuMat& objects, double scaleFactor, int groupThreshold, cv::Size maxObjectSize); - -private: - - enum stage { BOOST = 0 }; - enum feature { LBP = 0 }; - - static const stage stageType = BOOST; - static const feature featureType = LBP; - - cv::Size NxM; - bool isStumps; - int ncategories; - int subsetSize; - int nodeStep; - - // gpu representation of classifier - GpuMat stage_mat; - GpuMat trees_mat; - GpuMat nodes_mat; - GpuMat leaves_mat; - GpuMat subsets_mat; - GpuMat features_mat; - - GpuMat integral; - GpuMat integralBuffer; - GpuMat resuzeBuffer; - - GpuMat candidates; - static const int integralFactor = 4; + virtual cv::Size getClassifierCvSize() const = 0; + virtual bool read(const string& classifierAsXml) = 0; }; -void cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifierImpl::allocateBuffers(cv::Size frame) -{ - if (frame == cv::Size()) - return; - - if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows) - { - resuzeBuffer.create(frame, CV_8UC1); - - integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1); - NcvSize32u roiSize; - roiSize.width = frame.width; - roiSize.height = frame.height; - - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); - - Ncv32u bufSize; - ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); - integralBuffer.create(1, bufSize, CV_8UC1); - - candidates.create(1 , frame.width >> 1, CV_32SC4); - } -} - -// currently only stump based boost classifiers are supported -bool CascadeClassifier_GPU_LBP::CascadeClassifierImpl::read(const FileNode &root) -{ - const char *GPU_CC_STAGE_TYPE = "stageType"; - const char *GPU_CC_FEATURE_TYPE = "featureType"; - const char *GPU_CC_BOOST = "BOOST"; - const char *GPU_CC_LBP = "LBP"; - const char *GPU_CC_MAX_CAT_COUNT = "maxCatCount"; - const char *GPU_CC_HEIGHT = "height"; - const char *GPU_CC_WIDTH = "width"; - const char *GPU_CC_STAGE_PARAMS = "stageParams"; - const char *GPU_CC_MAX_DEPTH = "maxDepth"; - const char *GPU_CC_FEATURE_PARAMS = "featureParams"; - const char *GPU_CC_STAGES = "stages"; - const char *GPU_CC_STAGE_THRESHOLD = "stageThreshold"; - const float GPU_THRESHOLD_EPS = 1e-5f; - const char *GPU_CC_WEAK_CLASSIFIERS = "weakClassifiers"; - const char *GPU_CC_INTERNAL_NODES = "internalNodes"; - const char *GPU_CC_LEAF_VALUES = "leafValues"; - const char *GPU_CC_FEATURES = "features"; - const char *GPU_CC_RECT = "rect"; - - std::string stageTypeStr = (string)root[GPU_CC_STAGE_TYPE]; - CV_Assert(stageTypeStr == GPU_CC_BOOST); - - string featureTypeStr = (string)root[GPU_CC_FEATURE_TYPE]; - CV_Assert(featureTypeStr == GPU_CC_LBP); - - NxM.width = (int)root[GPU_CC_WIDTH]; - NxM.height = (int)root[GPU_CC_HEIGHT]; - CV_Assert( NxM.height > 0 && NxM.width > 0 ); - - isStumps = ((int)(root[GPU_CC_STAGE_PARAMS][GPU_CC_MAX_DEPTH]) == 1) ? true : false; - CV_Assert(isStumps); - - FileNode fn = root[GPU_CC_FEATURE_PARAMS]; - if (fn.empty()) - return false; - - ncategories = fn[GPU_CC_MAX_CAT_COUNT]; - - subsetSize = (ncategories + 31) / 32; - nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 ); - - fn = root[GPU_CC_STAGES]; - if (fn.empty()) - return false; - - std::vector stages; - stages.reserve(fn.size()); - - std::vector cl_trees; - std::vector cl_nodes; - std::vector cl_leaves; - std::vector subsets; - - FileNodeIterator it = fn.begin(), it_end = fn.end(); - for (size_t si = 0; it != it_end; si++, ++it ) - { - FileNode fns = *it; - Stage st; - st.threshold = (float)fns[GPU_CC_STAGE_THRESHOLD] - GPU_THRESHOLD_EPS; - - fns = fns[GPU_CC_WEAK_CLASSIFIERS]; - if (fns.empty()) - return false; - - st.ntrees = (int)fns.size(); - st.first = (int)cl_trees.size(); - - stages.push_back(st);// (int, int, float) - - cl_trees.reserve(stages[si].first + stages[si].ntrees); - - // weak trees - FileNodeIterator it1 = fns.begin(), it1_end = fns.end(); - for ( ; it1 != it1_end; ++it1 ) - { - FileNode fnw = *it1; - - FileNode internalNodes = fnw[GPU_CC_INTERNAL_NODES]; - FileNode leafValues = fnw[GPU_CC_LEAF_VALUES]; - if ( internalNodes.empty() || leafValues.empty() ) - return false; - - int nodeCount = (int)internalNodes.size()/nodeStep; - cl_trees.push_back(nodeCount); - - cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3); - cl_leaves.reserve(cl_leaves.size() + leafValues.size()); - - if( subsetSize > 0 ) - subsets.reserve(subsets.size() + nodeCount * subsetSize); - - // nodes - FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end(); - - for( ; iIt != iEnd; ) - { - cl_nodes.push_back((int)*(iIt++)); - cl_nodes.push_back((int)*(iIt++)); - cl_nodes.push_back((int)*(iIt++)); - - if( subsetSize > 0 ) - for( int j = 0; j < subsetSize; j++, ++iIt ) - subsets.push_back((int)*iIt); - } - - // leaves - iIt = leafValues.begin(), iEnd = leafValues.end(); - for( ; iIt != iEnd; ++iIt ) - cl_leaves.push_back((float)*iIt); - } - } - - fn = root[GPU_CC_FEATURES]; - if( fn.empty() ) - return false; - std::vector features; - features.reserve(fn.size() * 4); - FileNodeIterator f_it = fn.begin(), f_end = fn.end(); - for (; f_it != f_end; ++f_it) - { - FileNode rect = (*f_it)[GPU_CC_RECT]; - FileNodeIterator r_it = rect.begin(); - features.push_back(saturate_cast((int)*(r_it++))); - features.push_back(saturate_cast((int)*(r_it++))); - features.push_back(saturate_cast((int)*(r_it++))); - features.push_back(saturate_cast((int)*(r_it++))); - } - - // copy data structures on gpu - stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, (uchar*)&(stages[0]) )); - trees_mat.upload(cv::Mat(cl_trees).reshape(1,1)); - nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1)); - leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1)); - subsets_mat.upload(cv::Mat(subsets).reshape(1,1)); - features_mat.upload(cv::Mat(features).reshape(4,1)); - - return true; -} - -int cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifierImpl::process(const GpuMat& image, GpuMat& objects, double scaleFactor, int groupThreshold, cv::Size maxObjectSize) -{ - CV_Assert(!empty() && scaleFactor > 1 && image.depth() == CV_8U); - - const int defaultObjSearchNum = 100; - const float grouping_eps = 0.2f; - - if( !objects.empty() && objects.depth() == CV_32S) - objects.reshape(4, 1); - else - objects.create(1 , image.cols >> 4, CV_32SC4); - - // used for debug - // candidates.setTo(cv::Scalar::all(0)); - // objects.setTo(cv::Scalar::all(0)); - - if (maxObjectSize == cv::Size()) - maxObjectSize = image.size(); - - allocateBuffers(image.size()); - - unsigned int classified = 0; - GpuMat dclassified(1, 1, CV_32S); - cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); - - PyrLavel level(0, 1.0f, image.size(), NxM); - - while (level.isFeasible(maxObjectSize)) - { - int acc = level.sFrame.width + 1; - float iniScale = level.scale; - - cv::Size area = level.workArea; - int step = 1 + (level.scale <= 2.f); - - int total = 0, prev = 0; - - while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize)) - { - // create sutable matrix headers - GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height)); - GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1)); - GpuMat buff = integralBuffer; - - // generate integral for scale - gpu::resize(image, src, level.sFrame, 0, 0, CV_INTER_LINEAR); - gpu::integralBuffered(src, sint, buff); - - // calculate job - int totalWidth = level.workArea.width / step; - // totalWidth = ((totalWidth + WARP_MASK) / WARP_SIZE) << WARP_LOG; - - total += totalWidth * (level.workArea.height / step); - - // go to next pyramide level - level = level.next(scaleFactor, image.size(), NxM); - area = level.workArea; - - step = (1 + (level.scale <= 2.f)); - prev = acc; - acc += level.sFrame.width + 1; - } - - device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, - leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr(), integral); - } - - if (groupThreshold <= 0 || objects.empty()) - return 0; - - cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); - device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr()); - - // candidates.copyTo(objects); - cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); - cudaSafeCall( cudaDeviceSynchronize() ); - return classified; -} - -cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifier_GPU_LBP(cv::Size detectionFrameSize) : impl(new CascadeClassifierImpl()) { (*impl).allocateBuffers(detectionFrameSize); } -cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP(){ delete impl; } - - -bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const +struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl { - return (*impl).empty(); -} +public: + HaarCascade() : lastAllocatedFrameSize(-1, -1) + { + ncvSetDebugOutputHandler(NCVDebugOutputHandler); + } -bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml) -{ - FileStorage fs(classifierAsXml, FileStorage::READ); - return fs.isOpened() ? (*impl).read(fs.getFirstTopLevelNode()) : false; -} + bool read(const string& filename) + { + ncvSafeCall( load(filename) ); + return true; + } -int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& objects, double scaleFactor, int groupThreshold, cv::Size maxObjectSize) -{ - return (*impl).process(image, objects, scaleFactor, groupThreshold, maxObjectSize); -} - -// ============ old fashioned haar cascade ==============================================// -struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl -{ - CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1) - { - ncvSetDebugOutputHandler(NCVDebugOutputHandler); - ncvSafeCall( load(filename) ); - } - - - NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, - bool findLargestObject, bool visualizeInPlace, NcvSize32u ncvMinSize, - /*out*/unsigned int& numDetections) - { - calculateMemReqsAndAllocate(src.size()); + NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, + bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, + /*out*/unsigned int& numDetections) + { + calculateMemReqsAndAllocate(src.size()); NCVMemPtr src_beg; src_beg.ptr = (void*)src.ptr(); @@ -507,6 +122,8 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl roi.width = d_src.width(); roi.height = d_src.height(); + NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height); + Ncv32u flags = 0; flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0; flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace : 0; @@ -514,7 +131,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl ncvStat = ncvDetectObjectsMultiScale_device( d_src, roi, d_rects, numDetections, haar, *h_haarStages, *d_haarStages, *d_haarNodes, *d_haarFeatures, - ncvMinSize, + winMinSize, minNeighbors, scaleStep, 1, flags, @@ -525,16 +142,35 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl return NCV_SUCCESS; } + unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors, + bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size maxObjectSize) + { + CV_Assert( scaleFactor > 1 && image.depth() == CV_8U); - NcvSize32u getClassifierSize() const { return haar.ClassifierSize; } - cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); } + const int defaultObjSearchNum = 100; + if (objectsBuf.empty()) + { + objectsBuf.create(1, defaultObjSearchNum, DataType::type); + } + cv::Size ncvMinSize = this->getClassifierCvSize(); -private: + if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height) + { + ncvMinSize.width = minSize.width; + ncvMinSize.height = minSize.height; + } + unsigned int numDetections; + ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections)); - static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); } + return numDetections; + } + + cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); } +private: + static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); } NCVStatus load(const string& classifierFile) { @@ -581,7 +217,6 @@ private: return NCV_SUCCESS; } - NCVStatus calculateMemReqsAndAllocate(const Size& frameSize) { if (lastAllocatedFrameSize == frameSize) @@ -623,7 +258,6 @@ private: return NCV_SUCCESS; } - cudaDeviceProp devProp; NCVStatus ncvStat; @@ -644,55 +278,448 @@ private: Ptr gpuAllocator; Ptr cpuAllocator; + + virtual ~HaarCascade(){} }; +cv::Size operator -(const cv::Size& a, const cv::Size& b) +{ + return cv::Size(a.width - b.width, a.height - b.height); +} -cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() : findLargestObject(false), visualizeInPlace(false), impl(0) {} -cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename) : findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); } -cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); } -bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; } -void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } } +cv::Size operator +(const cv::Size& a, const int& i) +{ + return cv::Size(a.width + i, a.height + i); +} +cv::Size operator *(const cv::Size& a, const float& f) +{ + return cv::Size(cvRound(a.width * f), cvRound(a.height * f)); +} -bool cv::gpu::CascadeClassifier_GPU::load(const string& filename) +cv::Size operator /(const cv::Size& a, const float& f) { - release(); - impl = new CascadeClassifierImpl(filename); - return !this->empty(); + return cv::Size(cvRound(a.width / f), cvRound(a.height / f)); +} + +bool operator <=(const cv::Size& a, const cv::Size& b) +{ + return a.width <= b.width && a.height <= b.width; } +struct PyrLavel +{ + PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize) + { + do + { + order = _order; + scale = pow(_scale, order); + sFrame = frame / scale; + workArea = sFrame - window + 1; + sWindow = window * scale; + _order++; + } while (sWindow <= minObjectSize); + } + + bool isFeasible(cv::Size maxObj) + { + return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj; + } + + PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize) + { + return PyrLavel(order + 1, factor, frame, window, minObjectSize); + } + + int order; + float scale; + cv::Size sFrame; + cv::Size workArea; + cv::Size sWindow; +}; + +namespace cv { namespace gpu { namespace device +{ + namespace lbp + { + void classifyPyramid(int frameW, + int frameH, + int windowW, + int windowH, + float initalScale, + float factor, + int total, + const DevMem2Db& mstages, + const int nstages, + const DevMem2Di& mnodes, + const DevMem2Df& mleaves, + const DevMem2Di& msubsets, + const DevMem2Db& mfeatures, + const int subsetSize, + DevMem2D_ objects, + unsigned int* classified, + DevMem2Di integral); + + void connectedConmonents(DevMem2D_ candidates, int ncandidates, DevMem2D_ objects,int groupThreshold, float grouping_eps, unsigned int* nclasses); + } +}}} + +struct cv::gpu::CascadeClassifier_GPU::LbpCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl +{ +public: + struct Stage + { + int first; + int ntrees; + float threshold; + }; + + LbpCascade(){} + virtual ~LbpCascade(){} + + virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool findLargestObject, + bool visualizeInPlace, cv::Size minObjectSize, cv::Size maxObjectSize) + { + CV_Assert(scaleFactor > 1 && image.depth() == CV_8U); + + const int defaultObjSearchNum = 100; + const float grouping_eps = 0.2f; + + if( !objects.empty() && objects.depth() == CV_32S) + objects.reshape(4, 1); + else + objects.create(1 , image.cols >> 4, CV_32SC4); + + // used for debug + // candidates.setTo(cv::Scalar::all(0)); + // objects.setTo(cv::Scalar::all(0)); + + if (maxObjectSize == cv::Size()) + maxObjectSize = image.size(); + + allocateBuffers(image.size()); + + unsigned int classified = 0; + GpuMat dclassified(1, 1, CV_32S); + cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); + + PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize); + + while (level.isFeasible(maxObjectSize)) + { + int acc = level.sFrame.width + 1; + float iniScale = level.scale; + + cv::Size area = level.workArea; + int step = 1 + (level.scale <= 2.f); + + int total = 0, prev = 0; + + while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize)) + { + // create sutable matrix headers + GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height)); + GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1)); + GpuMat buff = integralBuffer; + + // generate integral for scale + gpu::resize(image, src, level.sFrame, 0, 0, CV_INTER_LINEAR); + gpu::integralBuffered(src, sint, buff); + + // calculate job + int totalWidth = level.workArea.width / step; + total += totalWidth * (level.workArea.height / step); + + // go to next pyramide level + level = level.next(scaleFactor, image.size(), NxM, minObjectSize); + area = level.workArea; + + step = (1 + (level.scale <= 2.f)); + prev = acc; + acc += level.sFrame.width + 1; + } + + device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, + leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr(), integral); + } + + if (groupThreshold <= 0 || objects.empty()) + return 0; + + cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); + device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr()); + + cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); + cudaSafeCall( cudaDeviceSynchronize() ); + return classified; + } + + virtual cv::Size getClassifierCvSize() const { return NxM; } + + bool read(const string& classifierAsXml) + { + FileStorage fs(classifierAsXml, FileStorage::READ); + return fs.isOpened() ? read(fs.getFirstTopLevelNode()) : false; + } + +private: + + void allocateBuffers(cv::Size frame) + { + if (frame == cv::Size()) + return; + + if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows) + { + resuzeBuffer.create(frame, CV_8UC1); + + integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1); + NcvSize32u roiSize; + roiSize.width = frame.width; + roiSize.height = frame.height; + + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); + + Ncv32u bufSize; + ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); + integralBuffer.create(1, bufSize, CV_8UC1); + + candidates.create(1 , frame.width >> 1, CV_32SC4); + } + } + + bool read(const FileNode &root) + { + const char *GPU_CC_STAGE_TYPE = "stageType"; + const char *GPU_CC_FEATURE_TYPE = "featureType"; + const char *GPU_CC_BOOST = "BOOST"; + const char *GPU_CC_LBP = "LBP"; + const char *GPU_CC_MAX_CAT_COUNT = "maxCatCount"; + const char *GPU_CC_HEIGHT = "height"; + const char *GPU_CC_WIDTH = "width"; + const char *GPU_CC_STAGE_PARAMS = "stageParams"; + const char *GPU_CC_MAX_DEPTH = "maxDepth"; + const char *GPU_CC_FEATURE_PARAMS = "featureParams"; + const char *GPU_CC_STAGES = "stages"; + const char *GPU_CC_STAGE_THRESHOLD = "stageThreshold"; + const float GPU_THRESHOLD_EPS = 1e-5f; + const char *GPU_CC_WEAK_CLASSIFIERS = "weakClassifiers"; + const char *GPU_CC_INTERNAL_NODES = "internalNodes"; + const char *GPU_CC_LEAF_VALUES = "leafValues"; + const char *GPU_CC_FEATURES = "features"; + const char *GPU_CC_RECT = "rect"; + + std::string stageTypeStr = (string)root[GPU_CC_STAGE_TYPE]; + CV_Assert(stageTypeStr == GPU_CC_BOOST); + + string featureTypeStr = (string)root[GPU_CC_FEATURE_TYPE]; + CV_Assert(featureTypeStr == GPU_CC_LBP); + + NxM.width = (int)root[GPU_CC_WIDTH]; + NxM.height = (int)root[GPU_CC_HEIGHT]; + CV_Assert( NxM.height > 0 && NxM.width > 0 ); + + isStumps = ((int)(root[GPU_CC_STAGE_PARAMS][GPU_CC_MAX_DEPTH]) == 1) ? true : false; + CV_Assert(isStumps); + + FileNode fn = root[GPU_CC_FEATURE_PARAMS]; + if (fn.empty()) + return false; + + ncategories = fn[GPU_CC_MAX_CAT_COUNT]; + + subsetSize = (ncategories + 31) / 32; + nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 ); + + fn = root[GPU_CC_STAGES]; + if (fn.empty()) + return false; + + std::vector stages; + stages.reserve(fn.size()); + + std::vector cl_trees; + std::vector cl_nodes; + std::vector cl_leaves; + std::vector subsets; + + FileNodeIterator it = fn.begin(), it_end = fn.end(); + for (size_t si = 0; it != it_end; si++, ++it ) + { + FileNode fns = *it; + Stage st; + st.threshold = (float)fns[GPU_CC_STAGE_THRESHOLD] - GPU_THRESHOLD_EPS; + + fns = fns[GPU_CC_WEAK_CLASSIFIERS]; + if (fns.empty()) + return false; + + st.ntrees = (int)fns.size(); + st.first = (int)cl_trees.size(); + + stages.push_back(st);// (int, int, float) + + cl_trees.reserve(stages[si].first + stages[si].ntrees); + + // weak trees + FileNodeIterator it1 = fns.begin(), it1_end = fns.end(); + for ( ; it1 != it1_end; ++it1 ) + { + FileNode fnw = *it1; + + FileNode internalNodes = fnw[GPU_CC_INTERNAL_NODES]; + FileNode leafValues = fnw[GPU_CC_LEAF_VALUES]; + if ( internalNodes.empty() || leafValues.empty() ) + return false; + + int nodeCount = (int)internalNodes.size()/nodeStep; + cl_trees.push_back(nodeCount); + + cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3); + cl_leaves.reserve(cl_leaves.size() + leafValues.size()); + + if( subsetSize > 0 ) + subsets.reserve(subsets.size() + nodeCount * subsetSize); + + // nodes + FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end(); + + for( ; iIt != iEnd; ) + { + cl_nodes.push_back((int)*(iIt++)); + cl_nodes.push_back((int)*(iIt++)); + cl_nodes.push_back((int)*(iIt++)); + + if( subsetSize > 0 ) + for( int j = 0; j < subsetSize; j++, ++iIt ) + subsets.push_back((int)*iIt); + } + + // leaves + iIt = leafValues.begin(), iEnd = leafValues.end(); + for( ; iIt != iEnd; ++iIt ) + cl_leaves.push_back((float)*iIt); + } + } + + fn = root[GPU_CC_FEATURES]; + if( fn.empty() ) + return false; + std::vector features; + features.reserve(fn.size() * 4); + FileNodeIterator f_it = fn.begin(), f_end = fn.end(); + for (; f_it != f_end; ++f_it) + { + FileNode rect = (*f_it)[GPU_CC_RECT]; + FileNodeIterator r_it = rect.begin(); + features.push_back(saturate_cast((int)*(r_it++))); + features.push_back(saturate_cast((int)*(r_it++))); + features.push_back(saturate_cast((int)*(r_it++))); + features.push_back(saturate_cast((int)*(r_it++))); + } + + // copy data structures on gpu + stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, (uchar*)&(stages[0]) )); + trees_mat.upload(cv::Mat(cl_trees).reshape(1,1)); + nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1)); + leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1)); + subsets_mat.upload(cv::Mat(subsets).reshape(1,1)); + features_mat.upload(cv::Mat(features).reshape(4,1)); + + return true; + } + + enum stage { BOOST = 0 }; + enum feature { LBP = 1, HAAR = 2 }; + static const stage stageType = BOOST; + static const feature featureType = LBP; + + cv::Size NxM; + bool isStumps; + int ncategories; + int subsetSize; + int nodeStep; + + // gpu representation of classifier + GpuMat stage_mat; + GpuMat trees_mat; + GpuMat nodes_mat; + GpuMat leaves_mat; + GpuMat subsets_mat; + GpuMat features_mat; + + GpuMat integral; + GpuMat integralBuffer; + GpuMat resuzeBuffer; + + GpuMat candidates; + static const int integralFactor = 4; +}; + +cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() +: findLargestObject(false), visualizeInPlace(false), impl(0) {} + +cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename) +: findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); } + +cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); } + +void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } } + +bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; } Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const { return this->empty() ? Size() : impl->getClassifierCvSize(); } - int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize) { - CV_Assert( scaleFactor > 1 && image.depth() == CV_8U); CV_Assert( !this->empty()); + return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size()); +} - const int defaultObjSearchNum = 100; - if (objectsBuf.empty()) +int cv::gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors) +{ + CV_Assert( !this->empty()); + return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize); +} + +bool cv::gpu::CascadeClassifier_GPU::load(const string& filename) +{ + release(); + + std::string fext = filename.substr(filename.find_last_of(".") + 1); + std::transform(fext.begin(), fext.end(), fext.begin(), ::tolower); + + if (fext == "nvbin") { - objectsBuf.create(1, defaultObjSearchNum, DataType::type); + impl = new HaarCascade(); + return impl->read(filename); } - NcvSize32u ncvMinSize = impl->getClassifierSize(); + FileStorage fs(filename, FileStorage::READ); - if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height) + if (!fs.isOpened()) { - ncvMinSize.width = minSize.width; - ncvMinSize.height = minSize.height; + impl = new HaarCascade(); + return impl->read(filename); } - unsigned int numDetections; - ncvSafeCall( impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections) ); + const char *GPU_CC_LBP = "LBP"; + string featureTypeStr = (string)fs.getFirstTopLevelNode()["featureType"]; + if (featureTypeStr == GPU_CC_LBP) + impl = new LbpCascade(); + else + impl = new HaarCascade(); - return numDetections; + impl->read(filename); + return !this->empty(); } +////////////////////////////////////////////////////////////////////////////////////////////////////// struct RectConvert { @@ -708,7 +735,6 @@ struct RectConvert } }; - void groupRectangles(std::vector &hypotheses, int groupThreshold, double eps, std::vector *weights) { vector rects(hypotheses.size()); diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu index bbbe0bf449..e96692cefb 100644 --- a/modules/gpu/src/cuda/lbp.cu +++ b/modules/gpu/src/cuda/lbp.cu @@ -290,6 +290,7 @@ namespace cv { namespace gpu { namespace device { const int block = 128; int grid = divUp(workAmount, block); + cudaFuncSetCacheConfig(lbp_cascade, cudaFuncCachePreferL1); Cascade cascade((Stage*)mstages.ptr(), nstages, (ClNode*)mnodes.ptr(), mleaves.ptr(), msubsets.ptr(), (uchar4*)mfeatures.ptr(), subsetSize); lbp_cascade<<>>(cascade, frameW, frameH, windowW, windowH, initialScale, factor, workAmount, integral.ptr(), integral.step / sizeof(int), objects, classified); } diff --git a/modules/gpu/test/test_objdetect.cpp b/modules/gpu/test/test_objdetect.cpp index a7db7667be..927762b2b0 100644 --- a/modules/gpu/test/test_objdetect.cpp +++ b/modules/gpu/test/test_objdetect.cpp @@ -302,13 +302,13 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::gpu::DeviceInfo, int) TEST_P(LBP_Read_classifier, Accuracy) { - cv::gpu::CascadeClassifier_GPU_LBP classifier; + cv::gpu::CascadeClassifier_GPU classifier; std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml"; ASSERT_TRUE(classifier.load(classifierXmlPath)); } -INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier, - testing::Combine(ALL_DEVICES, testing::Values(0))); +INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier, + testing::Combine(ALL_DEVICES, testing::Values(0))); PARAM_TEST_CASE(LBP_classify, cv::gpu::DeviceInfo, int) @@ -344,7 +344,7 @@ TEST_P(LBP_classify, Accuracy) for (; it != rects.end(); ++it) cv::rectangle(markedImage, *it, CV_RGB(0, 0, 255)); - cv::gpu::CascadeClassifier_GPU_LBP gpuClassifier; + cv::gpu::CascadeClassifier_GPU gpuClassifier; ASSERT_TRUE(gpuClassifier.load(classifierXmlPath)); cv::gpu::GpuMat gpu_rects; @@ -352,23 +352,23 @@ TEST_P(LBP_classify, Accuracy) int count = gpuClassifier.detectMultiScale(tested, gpu_rects); cv::Mat downloaded(gpu_rects); - const cv::Rect* faces = downloaded.ptr(); + const cv::Rect* faces = downloaded.ptr(); for (int i = 0; i < count; i++) { cv::Rect r = faces[i]; #if defined (LOG_CASCADE_STATISTIC) - std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl; -#endif + std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl; cv::rectangle(markedImage, r , CV_RGB(255, 0, 0)); +#endif } #if defined (LOG_CASCADE_STATISTIC) - cv::imshow("Res", markedImage); cv::waitKey(); + cv::imshow("Res", markedImage); cv::waitKey(); #endif } INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_classify, - testing::Combine(ALL_DEVICES, testing::Values(0))); + testing::Combine(ALL_DEVICES, testing::Values(0))); } // namespace