From 34bfd7ef51b5d056ec35f9b60ced23b5a7a1811b Mon Sep 17 00:00:00 2001 From: Li Peng Date: Wed, 3 Jan 2018 21:43:48 +0800 Subject: [PATCH 1/2] add ocl implementation of proposal layer Signed-off-by: Li Peng --- modules/dnn/src/layers/proposal_layer.cpp | 95 +++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 8da4c47cf3..8fee7fa727 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -148,11 +148,89 @@ public: deltasPermute->finalize(layerInputs, layerOutputs); } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + std::vector internals; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + internals_.getUMatVector(internals); + + CV_Assert(inputs.size() == 3); + CV_Assert(internals.size() == 3); + const UMat& scores = inputs[0]; + const UMat& bboxDeltas = inputs[1]; + const UMat& imInfo = inputs[2]; + UMat& priorBoxes = internals[0]; + UMat& permuttedScores = internals[1]; + UMat& permuttedDeltas = internals[2]; + + CV_Assert(imInfo.total() >= 2); + // We've chosen the smallest data type because we need just a shape from it. + Mat szMat; + imInfo.copyTo(szMat); + int rows = (int)szMat.at(0); + int cols = (int)szMat.at(1); + umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1); + umat_fakeImageBlob.setTo(0); + + // Generate prior boxes. + std::vector layerInputs(2), layerOutputs(1, priorBoxes); + layerInputs[0] = scores; + layerInputs[1] = umat_fakeImageBlob; + priorBoxLayer->forward(layerInputs, layerOutputs, internals); + + // Permute scores. + layerInputs.assign(1, getObjectScores(scores)); + layerOutputs.assign(1, permuttedScores); + scoresPermute->forward(layerInputs, layerOutputs, internals); + + // Permute deltas. + layerInputs.assign(1, bboxDeltas); + layerOutputs.assign(1, permuttedDeltas); + deltasPermute->forward(layerInputs, layerOutputs, internals); + + // Sort predictions by scores and apply NMS. DetectionOutputLayer allocates + // output internally because of different number of objects after NMS. + layerInputs.resize(4); + layerInputs[0] = permuttedDeltas; + layerInputs[1] = permuttedScores; + layerInputs[2] = priorBoxes; + layerInputs[3] = umat_fakeImageBlob; + + layerOutputs[0] = UMat(); + detectionOutputLayer->forward(layerInputs, layerOutputs, internals); + + // DetectionOutputLayer produces 1x1xNx7 output where N might be less or + // equal to keepTopAfterNMS. We fill the rest by zeros. + const int numDets = layerOutputs[0].total() / 7; + CV_Assert(numDets <= keepTopAfterNMS); + + MatShape s = shape(numDets, 7); + UMat src = layerOutputs[0].reshape(1, s.size(), &s[0]).colRange(3, 7); + UMat dst = outputs[0].rowRange(0, numDets); + src.copyTo(dst.colRange(1, 5)); + dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too. + + if (numDets < keepTopAfterNMS) + outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0); + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -226,6 +304,20 @@ private: return slice(m, Range::all(), Range(channels / 2, channels)); } +#ifdef HAVE_OPENCL + static UMat getObjectScores(const UMat& m) + { + CV_Assert(m.dims == 4); + CV_Assert(m.size[0] == 1); + int channels = m.size[1]; + CV_Assert((channels & 1) == 0); + + Range r = Range(channels / 2, channels); + Range ranges[4] = { Range::all(), r, Range::all(), Range::all() }; + return m(&ranges[0]); + } +#endif + Ptr priorBoxLayer; Ptr detectionOutputLayer; @@ -233,6 +325,9 @@ private: Ptr scoresPermute; uint32_t keepTopAfterNMS; Mat fakeImageBlob; +#ifdef HAVE_OPENCL + UMat umat_fakeImageBlob; +#endif }; From 1073175c77885c6954ebfd96cfdaa3dc15cbc46f Mon Sep 17 00:00:00 2001 From: Li Peng Date: Wed, 3 Jan 2018 23:54:11 +0800 Subject: [PATCH 2/2] add opencl option for resnet_ssd_face sample Signed-off-by: Li Peng --- samples/dnn/resnet_ssd_face.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/samples/dnn/resnet_ssd_face.cpp b/samples/dnn/resnet_ssd_face.cpp index 7a30768ba7..5fe8b8def1 100644 --- a/samples/dnn/resnet_ssd_face.cpp +++ b/samples/dnn/resnet_ssd_face.cpp @@ -30,6 +30,7 @@ const char* params "{ model | | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" "{ camera_device | 0 | camera device number }" "{ video | | video or image for detection }" + "{ opencl | false | enable OpenCL }" "{ min_confidence | 0.5 | min confidence }"; int main(int argc, char** argv) @@ -62,6 +63,11 @@ int main(int argc, char** argv) exit(-1); } + if (parser.get("opencl")) + { + net.setPreferableTarget(DNN_TARGET_OPENCL); + } + VideoCapture cap; if (parser.get("video").empty()) {