slice layer ocl support
Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
f1c52e426b
commit
7a4c5e9421
@ -43,6 +43,7 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include <opencv2/dnn/shape_utils.hpp>
|
#include <opencv2/dnn/shape_utils.hpp>
|
||||||
|
#include "opencl_kernels_dnn.hpp"
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
@ -171,11 +172,59 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||||
|
{
|
||||||
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inputs_.getUMatVector(inputs);
|
||||||
|
outputs_.getUMatVector(outputs);
|
||||||
|
|
||||||
|
if (inputs[0].dims < 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const UMat& inpMat = inputs[0];
|
||||||
|
for (size_t i = 0; i < outputs.size(); i++)
|
||||||
|
{
|
||||||
|
int groups = outputs[i].size[0];
|
||||||
|
int channels = outputs[i].size[1];
|
||||||
|
int rows = outputs[i].size[2];
|
||||||
|
int cols = outputs[i].size[3];
|
||||||
|
|
||||||
|
int number = (cols % 8 == 0) ? 8 : ((cols % 4 == 0) ? 4 : 1);
|
||||||
|
String buildopt = format("-DNUM=%d ", number);
|
||||||
|
String kname = format("slice%d", number);
|
||||||
|
ocl::Kernel kernel(kname.c_str(), ocl::dnn::slice_oclsrc, buildopt);
|
||||||
|
size_t global[] = { (size_t)groups * channels, (size_t)rows * cols / number };
|
||||||
|
int idx = 0;
|
||||||
|
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||||
|
kernel.set(idx++, (int)(inpMat.size[2] * inpMat.size[3]));
|
||||||
|
kernel.set(idx++, (int)inpMat.size[3]);
|
||||||
|
kernel.set(idx++, (int)global[0]);
|
||||||
|
kernel.set(idx++, (int)(rows * cols));
|
||||||
|
kernel.set(idx++, (int)cols);
|
||||||
|
kernel.set(idx++, (int)sliceRanges[i][2].start);
|
||||||
|
kernel.set(idx++, (int)sliceRanges[i][3].start);
|
||||||
|
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
|
||||||
|
bool ret = kernel.run(2, global, NULL, false);
|
||||||
|
if (!ret)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
87
modules/dnn/src/opencl/slice.cl
Normal file
87
modules/dnn/src/opencl/slice.cl
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#define Dtype float
|
||||||
|
#define Dtype4 float4
|
||||||
|
#define Dtype8 float8
|
||||||
|
|
||||||
|
#if NUM == 8
|
||||||
|
#define load(src, index) vload8(0, src + index)
|
||||||
|
#define store(vec, dst, index) vstore8(vec, 0, dst + index)
|
||||||
|
#define vec_type Dtype8
|
||||||
|
#define SLICE slice8
|
||||||
|
#elif NUM == 4
|
||||||
|
#define load(src, index) vload4(0, src + index)
|
||||||
|
#define store(vec, dst, index) vstore4(vec, 0, dst + index)
|
||||||
|
#define vec_type Dtype4
|
||||||
|
#define SLICE slice4
|
||||||
|
#elif NUM == 1
|
||||||
|
#define load(src, index) src[index]
|
||||||
|
#define store(vec, dst, index) dst[index] = vec
|
||||||
|
#define vec_type Dtype
|
||||||
|
#define SLICE slice1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__kernel void SLICE(__global const Dtype* src,
|
||||||
|
const int src_plane_size,
|
||||||
|
const int src_cols,
|
||||||
|
const int channels,
|
||||||
|
const int dst_plane_size,
|
||||||
|
const int dst_cols,
|
||||||
|
const int row_offset,
|
||||||
|
const int col_offset,
|
||||||
|
__global Dtype* dst)
|
||||||
|
{
|
||||||
|
int x = get_global_id(0);
|
||||||
|
int y = get_global_id(1) * NUM;
|
||||||
|
|
||||||
|
if ((x >= channels) || (y >= dst_plane_size))
|
||||||
|
return;
|
||||||
|
|
||||||
|
int row = y / dst_cols + row_offset;
|
||||||
|
int col = y % dst_cols + col_offset;
|
||||||
|
|
||||||
|
int src_index = x * src_plane_size + row * src_cols + col;
|
||||||
|
int dst_index = x * dst_plane_size + y;
|
||||||
|
vec_type val = load(src, src_index);
|
||||||
|
store(val, dst, dst_index);
|
||||||
|
}
|
||||||
@ -367,11 +367,14 @@ OCL_TEST(Layer_Test_PReLU, Accuracy)
|
|||||||
// );
|
// );
|
||||||
//}
|
//}
|
||||||
|
|
||||||
static void test_Reshape_Split_Slice_layers()
|
static void test_Reshape_Split_Slice_layers(int targetId)
|
||||||
{
|
{
|
||||||
Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
|
Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
|
||||||
ASSERT_FALSE(net.empty());
|
ASSERT_FALSE(net.empty());
|
||||||
|
|
||||||
|
net.setPreferableBackend(DNN_BACKEND_DEFAULT);
|
||||||
|
net.setPreferableTarget(targetId);
|
||||||
|
|
||||||
Mat input(6, 12, CV_32F);
|
Mat input(6, 12, CV_32F);
|
||||||
RNG rng(0);
|
RNG rng(0);
|
||||||
rng.fill(input, RNG::UNIFORM, -1, 1);
|
rng.fill(input, RNG::UNIFORM, -1, 1);
|
||||||
@ -384,7 +387,12 @@ static void test_Reshape_Split_Slice_layers()
|
|||||||
|
|
||||||
TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
|
TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
|
||||||
{
|
{
|
||||||
test_Reshape_Split_Slice_layers();
|
test_Reshape_Split_Slice_layers(DNN_TARGET_CPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
|
||||||
|
{
|
||||||
|
test_Reshape_Split_Slice_layers(DNN_TARGET_OPENCL);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Layer_Conv_Elu, Accuracy)
|
TEST(Layer_Conv_Elu, Accuracy)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user