diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index db228a69b8..8362282b0d 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1552,12 +1552,14 @@ public: enum {PEDESTRIAN = 0}; }; + enum { NO_REJECT = 1, DOLLAR = 2, /*PASCAL = 4,*/ DEFAULT = NO_REJECT}; + // An empty cascade will be created. // Param minScale is a minimum scale relative to the original size of the image on which cascade will be applyed. // Param minScale is a maximum scale relative to the original size of the image on which cascade will be applyed. // Param scales is a number of scales from minScale to maxScale. // Param rejfactor is used for NMS. - SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55, const int rejfactor = 1); + SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55, const int rejCriteria = 1); virtual ~SCascade(); @@ -1595,7 +1597,7 @@ private: double maxScale; int scales; - int rejfactor; + int rejCriteria; }; ////////////////////////////////// SURF ////////////////////////////////////////// diff --git a/modules/gpu/src/cuda/icf-sc.cu b/modules/gpu/src/cuda/icf-sc.cu index 27d60e6372..5334441d8d 100644 --- a/modules/gpu/src/cuda/icf-sc.cu +++ b/modules/gpu/src/cuda/icf-sc.cu @@ -41,9 +41,10 @@ //M*/ #include + #include -#include #include +#include namespace cv { namespace gpu { namespace device { namespace icf { @@ -79,6 +80,70 @@ namespace icf { } } + __device__ __forceinline__ float overlapArea(const Detection &a, const Detection &b) + { + int w = ::min(a.x + a.w, b.x + b.w) - ::max(a.x, b.x); + int h = ::min(a.y + a.h, b.y + b.h) - ::max(a.y, b.y); + + return (w < 0 || h < 0)? 0.f : (float)(w * h); + } + + __global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps) + { + const int idx = threadIdx.x; + const int total = *n; + + for (int i = idx; i < total; i += 192) + { + const Detection& a = detections[i]; + bool excluded = false; + + for (int j = i + 1; j < total; ++j) + { + const Detection& b = detections[j]; + float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h); + + if (ovl > 0.65f) + { + int suppessed = (a.confidence > b.confidence)? j : i; + overlaps[suppessed] = 1; + excluded = excluded || (suppessed == i); + } + + if (__all(excluded)) break; + } + } + } + + __global__ void collect(const uint* n, const Detection* detections, uchar* overlaps) + { + const int idx = threadIdx.x; + const int total = *n; + + for (int i = idx; i < total; i += 192) + { + if (!overlaps[i]) + { + const Detection& det = detections[i]; + // printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence ); + } + } + } + + void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections) + { + int block = 192; + int grid = 1; + + overlap<<>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0)); + collect<<>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0)); + // if (!stream) + { + cudaSafeCall( cudaGetLastError()); + cudaSafeCall( cudaDeviceSynchronize()); + } + } + template struct PrefixSum { diff --git a/modules/gpu/src/gpu_init.cpp b/modules/gpu/src/gpu_init.cpp index f25bc2ceb0..773a8b64e9 100644 --- a/modules/gpu/src/gpu_init.cpp +++ b/modules/gpu/src/gpu_init.cpp @@ -46,10 +46,10 @@ namespace cv { namespace gpu { CV_INIT_ALGORITHM(SCascade, "CascadeDetector.SCascade", - obj.info()->addParam(obj, "minScale", obj.minScale); - obj.info()->addParam(obj, "maxScale", obj.maxScale); - obj.info()->addParam(obj, "scales", obj.scales); - obj.info()->addParam(obj, "rejfactor", obj.rejfactor)); + obj.info()->addParam(obj, "minScale", obj.minScale); + obj.info()->addParam(obj, "maxScale", obj.maxScale); + obj.info()->addParam(obj, "scales", obj.scales); + obj.info()->addParam(obj, "rejCriteria", obj.rejCriteria)); bool initModule_gpu(void) { diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index d5a8e84814..35bd72e552 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -85,6 +85,8 @@ namespace cv { namespace gpu { namespace device { namespace icf { void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, const int fw, const int fh, const int bins, cudaStream_t stream); + + void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections); } namespace imgproc { @@ -309,6 +311,8 @@ struct cv::gpu::SCascade::Fields hogluv.create((fh / shr) * HOG_LUV_BINS + 1, fw / shr + 1, CV_32SC1); hogluv.setTo(cv::Scalar::all(0)); + overlaps.create(1, 5000, CV_8UC1); + return true; } @@ -437,7 +441,15 @@ private: } } +#include public: + void suppress(GpuMat& ndetections, GpuMat& objects) + { + ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps); + overlaps.setTo(0); + device::icf::suppress(objects, overlaps, ndetections); + // std::cout << cv::Mat(overlaps) << std::endl; + } // scales range float minScale; @@ -469,6 +481,9 @@ public: // 161x121x10 GpuMat hogluv; + // used for area overlap computing during + GpuMat overlaps; + // Cascade from xml GpuMat octaves; GpuMat stages; @@ -478,6 +493,8 @@ public: GpuMat sobelBuf; + GpuMat collected; + std::vector voctaves; DeviceInfo info; @@ -494,7 +511,7 @@ public: }; cv::gpu::SCascade::SCascade(const double mins, const double maxs, const int sc, const int rjf) -: fields(0), minScale(mins), maxScale(maxs), scales(sc), rejfactor(rjf) {} +: fields(0), minScale(mins), maxScale(maxs), scales(sc), rejCriteria(rjf) {} cv::gpu::SCascade::~SCascade() { delete fields; } @@ -534,6 +551,9 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ cudaStream_t stream = StreamAccessor::getStream(s); flds.detect(rois, tmp, objects, stream); + + // if (rejCriteria != NO_REJECT) + flds.suppress(tmp, objects); } void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const