From 66feea6cacee0ecc97e18a18ded4fa28fa3448f4 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Wed, 29 Nov 2017 22:35:02 +0800 Subject: [PATCH] region layer ocl implementation Signed-off-by: Li Peng --- modules/dnn/src/layers/region_layer.cpp | 73 ++++++++++++++++ modules/dnn/src/opencl/region.cl | 109 ++++++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 modules/dnn/src/opencl/region.cl diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index bc12e8b1be..94993fa58f 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -44,6 +44,7 @@ #include #include #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -114,11 +115,83 @@ public: } } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + + if (useSoftmaxTree) { // Yolo 9000 + CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented"); + return false; + } + + CV_Assert(inputs.size() >= 1); + int const cell_size = classes + coords + 1; + UMat blob_umat = blobs[0].getUMat(ACCESS_READ); + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + UMat& inpBlob = inputs[ii]; + UMat& outBlob = outputs[ii]; + + int rows = inpBlob.size[1]; + int cols = inpBlob.size[2]; + + ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc); + size_t global = rows*cols*anchors; + logistic_kernel.set(0, (int)global); + logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob)); + logistic_kernel.set(2, (int)cell_size); + logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob)); + logistic_kernel.run(1, &global, NULL, false); + + if (useSoftmax) + { + // Yolo v2 + // softmax activation for Probability, for each grid cell (X x Y x Anchor-index) + ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc); + size_t nthreads = rows*cols*anchors; + softmax_kernel.set(0, (int)nthreads); + softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob)); + softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat)); + softmax_kernel.set(3, (int)cell_size); + softmax_kernel.set(4, (int)classes); + softmax_kernel.set(5, (int)classfix); + softmax_kernel.set(6, (int)rows); + softmax_kernel.set(7, (int)cols); + softmax_kernel.set(8, (int)anchors); + softmax_kernel.set(9, (float)thresh); + softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob)); + if (!softmax_kernel.run(1, &nthreads, NULL, false)) + return false; + } + + if (nmsThreshold > 0) { + Mat mat = outBlob.getMat(ACCESS_WRITE); + float *dstData = mat.ptr(); + do_nms_sort(dstData, rows*cols*anchors, nmsThreshold); + //do_nms(dstData, rows*cols*anchors, nmsThreshold); + } + + } + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/opencl/region.cl b/modules/dnn/src/opencl/region.cl new file mode 100644 index 0000000000..d33ac782c4 --- /dev/null +++ b/modules/dnn/src/opencl/region.cl @@ -0,0 +1,109 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float + +__kernel void logistic_activ(const int count, + __global const Dtype* src, + const int cell_size, + __global Dtype* dst) +{ + for (int i = get_global_id(0); i < count; i += get_global_size(0)) + { + int index = cell_size * i; + Dtype x = src[index + 4]; + dst[index + 4] = 1.f / (1.f + exp(-x)); + } +} + +__kernel void softmax_activ(const int count, + __global const Dtype* src, + __global const Dtype* biasData, + const int cell_size, + const int classes, + const int classfix, + const int rows, + const int cols, + const int anchors, + const float thresh, + __global Dtype* dst) +{ + for (int index = get_global_id(0); index < count; index += get_global_size(0)) + { + int box_index = index * cell_size; + float largest = -FLT_MAX; + __global const Dtype *input = src + box_index + 5; + __global Dtype *output = dst + box_index + 5; + + for (int i = 0; i < classes; ++i) + largest = fmax(largest, input[i]); + + float sum = 0; + for (int i = 0; i < classes; ++i) + { + float e = exp((input[i] - largest)); + sum += e; + output[i] = e; + } + + int y = index / anchors / cols; + int x = index / anchors % cols; + int a = index - anchors * (x + y * cols); + float scale = dst[box_index + 4]; + if (classfix == -1 && scale < .5) scale = 0; + + float v1 = src[box_index + 0]; + float v2 = src[box_index + 1]; + float l1 = 1.f / (1.f + exp(-v1)); + float l2 = 1.f / (1.f + exp(-v2)); + + dst[box_index + 0] = (x + l1) / cols; + dst[box_index + 1] = (y + l2) / rows; + dst[box_index + 2] = exp(src[box_index + 2]) * biasData[2 * a] / cols; + dst[box_index + 3] = exp(src[box_index + 3]) * biasData[2 * a + 1] / rows; + + for (int i = 0; i < classes; ++i) + { + float prob = scale * output[i] / sum; + output[i] = (prob > thresh) ? prob : 0; + } + } +}