slice layer ocl support
Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
f1c52e426b
commit
7a4c5e9421
@ -43,6 +43,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -171,11 +172,59 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
if (inputs[0].dims < 4)
|
||||
return false;
|
||||
|
||||
const UMat& inpMat = inputs[0];
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
int groups = outputs[i].size[0];
|
||||
int channels = outputs[i].size[1];
|
||||
int rows = outputs[i].size[2];
|
||||
int cols = outputs[i].size[3];
|
||||
|
||||
int number = (cols % 8 == 0) ? 8 : ((cols % 4 == 0) ? 4 : 1);
|
||||
String buildopt = format("-DNUM=%d ", number);
|
||||
String kname = format("slice%d", number);
|
||||
ocl::Kernel kernel(kname.c_str(), ocl::dnn::slice_oclsrc, buildopt);
|
||||
size_t global[] = { (size_t)groups * channels, (size_t)rows * cols / number };
|
||||
int idx = 0;
|
||||
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel.set(idx++, (int)(inpMat.size[2] * inpMat.size[3]));
|
||||
kernel.set(idx++, (int)inpMat.size[3]);
|
||||
kernel.set(idx++, (int)global[0]);
|
||||
kernel.set(idx++, (int)(rows * cols));
|
||||
kernel.set(idx++, (int)cols);
|
||||
kernel.set(idx++, (int)sliceRanges[i][2].start);
|
||||
kernel.set(idx++, (int)sliceRanges[i][3].start);
|
||||
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
|
||||
bool ret = kernel.run(2, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
||||
87
modules/dnn/src/opencl/slice.cl
Normal file
87
modules/dnn/src/opencl/slice.cl
Normal file
@ -0,0 +1,87 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
#define Dtype4 float4
|
||||
#define Dtype8 float8
|
||||
|
||||
#if NUM == 8
|
||||
#define load(src, index) vload8(0, src + index)
|
||||
#define store(vec, dst, index) vstore8(vec, 0, dst + index)
|
||||
#define vec_type Dtype8
|
||||
#define SLICE slice8
|
||||
#elif NUM == 4
|
||||
#define load(src, index) vload4(0, src + index)
|
||||
#define store(vec, dst, index) vstore4(vec, 0, dst + index)
|
||||
#define vec_type Dtype4
|
||||
#define SLICE slice4
|
||||
#elif NUM == 1
|
||||
#define load(src, index) src[index]
|
||||
#define store(vec, dst, index) dst[index] = vec
|
||||
#define vec_type Dtype
|
||||
#define SLICE slice1
|
||||
#endif
|
||||
|
||||
__kernel void SLICE(__global const Dtype* src,
|
||||
const int src_plane_size,
|
||||
const int src_cols,
|
||||
const int channels,
|
||||
const int dst_plane_size,
|
||||
const int dst_cols,
|
||||
const int row_offset,
|
||||
const int col_offset,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * NUM;
|
||||
|
||||
if ((x >= channels) || (y >= dst_plane_size))
|
||||
return;
|
||||
|
||||
int row = y / dst_cols + row_offset;
|
||||
int col = y % dst_cols + col_offset;
|
||||
|
||||
int src_index = x * src_plane_size + row * src_cols + col;
|
||||
int dst_index = x * dst_plane_size + y;
|
||||
vec_type val = load(src, src_index);
|
||||
store(val, dst, dst_index);
|
||||
}
|
||||
@ -367,11 +367,14 @@ OCL_TEST(Layer_Test_PReLU, Accuracy)
|
||||
// );
|
||||
//}
|
||||
|
||||
static void test_Reshape_Split_Slice_layers()
|
||||
static void test_Reshape_Split_Slice_layers(int targetId)
|
||||
{
|
||||
Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
|
||||
ASSERT_FALSE(net.empty());
|
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_DEFAULT);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
Mat input(6, 12, CV_32F);
|
||||
RNG rng(0);
|
||||
rng.fill(input, RNG::UNIFORM, -1, 1);
|
||||
@ -384,7 +387,12 @@ static void test_Reshape_Split_Slice_layers()
|
||||
|
||||
TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
|
||||
{
|
||||
test_Reshape_Split_Slice_layers();
|
||||
test_Reshape_Split_Slice_layers(DNN_TARGET_CPU);
|
||||
}
|
||||
|
||||
OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
|
||||
{
|
||||
test_Reshape_Split_Slice_layers(DNN_TARGET_OPENCL);
|
||||
}
|
||||
|
||||
TEST(Layer_Conv_Elu, Accuracy)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user