diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index a0103b3143..87a8b4defb 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1477,6 +1477,78 @@ Ptr Morph ::create(int op, int src_type, int dst_type, int max_width, int #define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n))) +static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, + int op, int actual_op = -1, InputArray _extraMat = noArray()) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + Size ksize = _kernel.size(); + + Mat kernel8u; + String processing; + + bool haveExtraMat = !_extraMat.empty(); + CV_Assert(actual_op <= 3 || haveExtraMat); + + _kernel.getMat().convertTo(kernel8u, CV_8U); + for (int y = 0; y < kernel8u.rows; ++y) + for (int x = 0; x < kernel8u.cols; ++x) + if (kernel8u.at(y, x) != 0) + processing += format("PROCESS(%d,%d)", y, x); + + if (anchor.x < 0) + anchor.x = ksize.width / 2; + if (anchor.y < 0) + anchor.y = ksize.height / 2; + + if (actual_op < 0) + actual_op = op; + + if (type != CV_8UC1 || + !((_src.offset() == 0) && (_src.step() % 4 == 0)) || + !((_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) || + !(anchor.x == 1 && anchor.y == 1) || + !(ksize.width == 3 && ksize.height == 3)) + return false; + + Size size = _src.size(); + size_t globalsize[2] = { 0, 0 }; + size_t localsize[2] = { 0, 0 }; + + globalsize[0] = size.width / 16; + globalsize[1] = size.height / 2; + + static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" }; + String opts = format("-D PROCESS_ELEM_=%s -D %s%s", processing.c_str(), op2str[op], + actual_op == op ? "" : cv::format(" -D %s", op2str[actual_op]).c_str()); + + ocl::Kernel k; + k.create("morph3x3_8UC1_cols16_rows2", cv::ocl::imgproc::morph3x3_oclsrc, opts); + + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(size, CV_MAKETYPE(depth, cn)); + if (!(_dst.offset() == 0 && _dst.step() % 4 == 0)) + return false; + UMat dst = _dst.getUMat(); + UMat extraMat = _extraMat.getUMat(); + + int idxArg = k.set(0, ocl::KernelArg::PtrReadOnly(src)); + idxArg = k.set(idxArg, (int)src.step); + idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); + idxArg = k.set(idxArg, (int)dst.step); + idxArg = k.set(idxArg, (int)dst.rows); + idxArg = k.set(idxArg, (int)dst.cols); + + if (haveExtraMat) + { + idxArg = k.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(extraMat)); + } + + return k.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false); +} + static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType, int op, int actual_op = -1, InputArray _extraMat = noArray()) { @@ -1676,6 +1748,9 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, #endif ) { + if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat)) + return true; + if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat)) return true; } diff --git a/modules/imgproc/src/opencl/morph3x3.cl b/modules/imgproc/src/opencl/morph3x3.cl new file mode 100644 index 0000000000..3dde505167 --- /dev/null +++ b/modules/imgproc/src/opencl/morph3x3.cl @@ -0,0 +1,119 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifdef OP_ERODE +#define OP(m1, m2) min(m1, m2) +#define VAL UCHAR_MAX +#endif + +#ifdef OP_DILATE +#define OP(m1, m2) max(m1, m2) +#define VAL 0 +#endif + +#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT +#define EXTRA_PARAMS , __global const uchar * matptr, int mat_step, int mat_offset +#else +#define EXTRA_PARAMS +#endif + +#define PROCESS(_y, _x) \ + line_out[0] = OP(line_out[0], arr[_x + 3 * _y]); \ + line_out[1] = OP(line_out[1], arr[_x + 3 * (_y + 1)]); + +#define PROCESS_ELEM \ + line_out[0] = (uchar16)VAL; \ + line_out[1] = (uchar16)VAL; \ + PROCESS_ELEM_ + +__kernel void morph3x3_8UC1_cols16_rows2(__global const uint* src, int src_step, + __global uint* dst, int dst_step, + int rows, int cols + EXTRA_PARAMS) +{ + int block_x = get_global_id(0); + int y = get_global_id(1) * 2; + int ssx = 1, dsx = 1; + + if ((block_x * 16) >= cols || y >= rows) return; + + uchar a; uchar16 b; uchar c; + uchar d; uchar16 e; uchar f; + uchar g; uchar16 h; uchar i; + uchar j; uchar16 k; uchar l; + + uchar16 line[4]; + uchar16 line_out[2]; + + int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4); + line[0] = (y == 0) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index)); + line[1] = as_uchar16(vload4(0, src + src_index + (src_step / 4))); + line[2] = as_uchar16(vload4(0, src + src_index + 2 * (src_step / 4))); + line[3] = (y == (rows - 2)) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index + 3 * (src_step / 4))); + + __global uchar *src_p = (__global uchar *)src; + bool line_end = ((block_x + 1) * 16 == cols); + + src_index = block_x * 16 * ssx + (y - 1) * src_step; + + a = (block_x == 0 || y == 0) ? VAL : src_p[src_index - 1]; + b = line[0]; + c = (line_end || y == 0) ? VAL : src_p[src_index + 16]; + + d = (block_x == 0) ? VAL : src_p[src_index + src_step - 1]; + e = line[1]; + f = line_end ? VAL : src_p[src_index + src_step + 16]; + + g = (block_x == 0) ? VAL : src_p[src_index + 2 * src_step - 1]; + h = line[2]; + i = line_end ? VAL : src_p[src_index + 2 * src_step + 16]; + + j = (block_x == 0 || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step - 1]; + k = line[3]; + l = (line_end || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step + 16]; + + uchar16 arr[12]; + arr[0] = (uchar16)(a, b.s01234567, b.s89ab, b.scde); + arr[1] = b; + arr[2] = (uchar16)(b.s12345678, b.s9abc, b.sdef, c); + arr[3] = (uchar16)(d, e.s01234567, e.s89ab, e.scde); + arr[4] = e; + arr[5] = (uchar16)(e.s12345678, e.s9abc, e.sdef, f); + arr[6] = (uchar16)(g, h.s01234567, h.s89ab, h.scde); + arr[7] = h; + arr[8] = (uchar16)(h.s12345678, h.s9abc, h.sdef, i); + arr[9] = (uchar16)(j, k.s01234567, k.s89ab, k.scde); + arr[10] = k; + arr[11] = (uchar16)(k.s12345678, k.s9abc, k.sdef, l); + + PROCESS_ELEM; + + int dst_index = block_x * 4 * dsx + y * (dst_step / 4); + +#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT + int mat_index = y * mat_step + block_x * 16 * ssx + mat_offset; + uchar16 val0 = vload16(0, matptr + mat_index); + uchar16 val1 = vload16(0, matptr + mat_index + mat_step); + +#ifdef OP_GRADIENT + line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0)); + line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1)); + vstore4(as_uint4(line_out[0]), 0, dst + dst_index); + vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4)); +#elif defined OP_TOPHAT + line_out[0] = convert_uchar16_sat(convert_int16(val0) - convert_int16(line_out[0])); + line_out[1] = convert_uchar16_sat(convert_int16(val1) - convert_int16(line_out[1])); + vstore4(as_uint4(line_out[0]), 0, dst + dst_index); + vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4)); +#elif defined OP_BLACKHAT + line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0)); + line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1)); + vstore4(as_uint4(line_out[0]), 0, dst + dst_index); + vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4)); +#endif +#else + vstore4(as_uint4(line_out[0]), 0, dst + dst_index); + vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4)); +#endif +} diff --git a/modules/imgproc/test/ocl/test_filters.cpp b/modules/imgproc/test/ocl/test_filters.cpp index f92cc785a1..f3eb3a8e51 100644 --- a/modules/imgproc/test/ocl/test_filters.cpp +++ b/modules/imgproc/test/ocl/test_filters.cpp @@ -273,6 +273,85 @@ OCL_TEST_P(Dilate, Mat) } } +PARAM_TEST_CASE(MorphFilter3x3_cols16_rows2_Base, MatType, + int, // kernel size + Size, // dx, dy + BorderType, // border type + double, // optional parameter + bool, // roi or not + int) // width multiplier +{ + int type, borderType, ksize; + Size size; + double param; + bool useRoi; + int widthMultiple; + + TEST_DECLARE_INPUT_PARAMETER(src); + TEST_DECLARE_OUTPUT_PARAMETER(dst); + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + size = GET_PARAM(2); + borderType = GET_PARAM(3); + param = GET_PARAM(4); + useRoi = GET_PARAM(5); + widthMultiple = GET_PARAM(6); + } + + void random_roi() + { + size = Size(3, 3); + + Size roiSize = randomSize(size.width, MAX_VALUE, size.height, MAX_VALUE); + roiSize.width = std::max(size.width + 13, roiSize.width & (~0xf)); + roiSize.height = std::max(size.height + 1, roiSize.height & (~0x1)); + + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -60, 70); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } + + void Near() + { + Near(1, false); + } + + void Near(double threshold, bool relative) + { + if (relative) + OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold); + else + OCL_EXPECT_MATS_NEAR(dst, threshold); + } +}; + +typedef MorphFilter3x3_cols16_rows2_Base MorphFilter3x3_cols16_rows2; + +OCL_TEST_P(MorphFilter3x3_cols16_rows2, Mat) +{ + Size kernelSize(ksize, ksize); + int iterations = (int)param; + + for (int j = 0; j < test_loop_times; j++) + { + random_roi(); + Mat kernel = ksize==0 ? Mat() : randomMat(kernelSize, CV_8UC1, 0, 3); + + OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1, -1), iterations) ); + OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1, -1), iterations) ); + + Near(); + } +} + ///////////////////////////////////////////////////////////////////////////////////////////////// // MorphologyEx IMPLEMENT_PARAM_CLASS(MorphOp, int) @@ -429,6 +508,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine( Bool(), Values(1))); // not used +OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphFilter3x3_cols16_rows2, Combine( + Values((MatType)CV_8UC1), + Values(0, 3), // kernel size, 0 means kernel = Mat() + Values(Size(0, 0)), // not used + Values((BorderType)BORDER_CONSTANT), + Values(1.0, 2.0, 3.0), + Bool(), + Values(1))); // not used + OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine( Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(3, 5, 7), // kernel size