diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 18758b98bf..aba3af8b0d 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -43,6 +43,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" #include +#include "opencl_kernels_dnn.hpp" namespace cv { @@ -171,11 +172,59 @@ public: } } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + if (inputs[0].dims < 4) + return false; + + const UMat& inpMat = inputs[0]; + for (size_t i = 0; i < outputs.size(); i++) + { + int groups = outputs[i].size[0]; + int channels = outputs[i].size[1]; + int rows = outputs[i].size[2]; + int cols = outputs[i].size[3]; + + int number = (cols % 8 == 0) ? 8 : ((cols % 4 == 0) ? 4 : 1); + String buildopt = format("-DNUM=%d ", number); + String kname = format("slice%d", number); + ocl::Kernel kernel(kname.c_str(), ocl::dnn::slice_oclsrc, buildopt); + size_t global[] = { (size_t)groups * channels, (size_t)rows * cols / number }; + int idx = 0; + kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inpMat)); + kernel.set(idx++, (int)(inpMat.size[2] * inpMat.size[3])); + kernel.set(idx++, (int)inpMat.size[3]); + kernel.set(idx++, (int)global[0]); + kernel.set(idx++, (int)(rows * cols)); + kernel.set(idx++, (int)cols); + kernel.set(idx++, (int)sliceRanges[i][2].start); + kernel.set(idx++, (int)sliceRanges[i][3].start); + kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i])); + bool ret = kernel.run(2, global, NULL, false); + if (!ret) + return false; + } + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/opencl/slice.cl b/modules/dnn/src/opencl/slice.cl new file mode 100644 index 0000000000..81a71489be --- /dev/null +++ b/modules/dnn/src/opencl/slice.cl @@ -0,0 +1,87 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#define Dtype float +#define Dtype4 float4 +#define Dtype8 float8 + +#if NUM == 8 + #define load(src, index) vload8(0, src + index) + #define store(vec, dst, index) vstore8(vec, 0, dst + index) + #define vec_type Dtype8 + #define SLICE slice8 +#elif NUM == 4 + #define load(src, index) vload4(0, src + index) + #define store(vec, dst, index) vstore4(vec, 0, dst + index) + #define vec_type Dtype4 + #define SLICE slice4 +#elif NUM == 1 + #define load(src, index) src[index] + #define store(vec, dst, index) dst[index] = vec + #define vec_type Dtype + #define SLICE slice1 +#endif + +__kernel void SLICE(__global const Dtype* src, + const int src_plane_size, + const int src_cols, + const int channels, + const int dst_plane_size, + const int dst_cols, + const int row_offset, + const int col_offset, + __global Dtype* dst) +{ + int x = get_global_id(0); + int y = get_global_id(1) * NUM; + + if ((x >= channels) || (y >= dst_plane_size)) + return; + + int row = y / dst_cols + row_offset; + int col = y % dst_cols + col_offset; + + int src_index = x * src_plane_size + row * src_cols + col; + int dst_index = x * dst_plane_size + y; + vec_type val = load(src, src_index); + store(val, dst, dst_index); +} diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 73f7c3594f..a178add7f8 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -367,11 +367,14 @@ OCL_TEST(Layer_Test_PReLU, Accuracy) // ); //} -static void test_Reshape_Split_Slice_layers() +static void test_Reshape_Split_Slice_layers(int targetId) { Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt")); ASSERT_FALSE(net.empty()); + net.setPreferableBackend(DNN_BACKEND_DEFAULT); + net.setPreferableTarget(targetId); + Mat input(6, 12, CV_32F); RNG rng(0); rng.fill(input, RNG::UNIFORM, -1, 1); @@ -384,7 +387,12 @@ static void test_Reshape_Split_Slice_layers() TEST(Layer_Test_Reshape_Split_Slice, Accuracy) { - test_Reshape_Split_Slice_layers(); + test_Reshape_Split_Slice_layers(DNN_TARGET_CPU); +} + +OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy) +{ + test_Reshape_Split_Slice_layers(DNN_TARGET_OPENCL); } TEST(Layer_Conv_Elu, Accuracy)