diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp index 60d66364da..27e3562340 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp @@ -1164,8 +1164,6 @@ GAPI_FLUID_KERNEL(GFluidFilter2D, cv::gapi::imgproc::GFilter2D, true) // //----------------------------- -enum Morphology { M_ERODE, M_DILATE }; - template static void run_morphology( Buffer& dst, const View & src, @@ -1175,6 +1173,10 @@ static void run_morphology( Buffer& dst, const cv::Point & /* anchor */, Morphology morphology) { + static_assert(std::is_same::value, "unsupported combination of types"); + + GAPI_Assert(M_ERODE == morphology || M_DILATE == morphology); + static const int maxLines = 9; GAPI_Assert(k_rows <= maxLines); @@ -1194,43 +1196,44 @@ static void run_morphology( Buffer& dst, int width = dst.length(); int chan = dst.meta().chan; - for (int w=0; w < width; w++) + // call optimized code, if 3x3 + if (3 == k_rows && 3 == k_cols) { - // TODO: make this cycle innermost - for (int c=0; c < chan; c++) - { - SRC result=0; - if (M_ERODE == morphology) - { - result = std::numeric_limits::max(); - } - else if (M_DILATE == morphology) - { - result = std::numeric_limits::min(); - } - else - CV_Error(cv::Error::StsBadArg, "unsupported morphology operation"); + run_morphology3x3_impl(out, in, width, chan, k, morphology); + return; + } - for (int i=0; i < k_rows; i++) - for (int j=0; j < k_cols; j++) + // reference: any size of k[] + int length = width * chan; + for (int l=0; l < length; l++) + { + SRC result; + if (M_ERODE == morphology) + { + result = std::numeric_limits::max(); + } + else // if (M_DILATE == morphology) + { + result = std::numeric_limits::min(); + } + + for (int i=0; i < k_rows; i++) + for (int j=0; j < k_cols; j++) + { + if ( k[k_cols*i + j] ) { - if ( k[k_cols*i + j] ) + if (M_ERODE == morphology) { - if (M_ERODE == morphology) - { - result = std::min(result, in[i][(w + j - border_x)*chan + c]); - } - else if (M_DILATE == morphology) - { - result = std::max(result, in[i][(w + j - border_x)*chan + c]); - } - else - CV_Error(cv::Error::StsBadArg, "unsupported morphology operation"); + result = (std::min)(result, in[i][l + (j - border_x)*chan]); + } + else // if (M_DILATE == morphology) + { + result = (std::max)(result, in[i][l + (j - border_x)*chan]); } } - - out[w*chan + c] = saturate(result, rintf); } + + out[l] = saturate(result, rintf); } } diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp index 0227a0a3c0..e9eebfac2a 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp @@ -111,6 +111,28 @@ RUN_FILTER2D_3X3_IMPL( float, float) #undef RUN_FILTER2D_3X3_IMPL +//----------------------------- +// +// Fluid kernels: Erode, Dilate +// +//----------------------------- + +#define RUN_MORPHOLOGY3X3_IMPL(T) \ +void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \ + const uchar k[], Morphology morphology) \ +{ \ + CV_CPU_DISPATCH(run_morphology3x3_impl, \ + (out, in, width, chan, k, morphology), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +RUN_MORPHOLOGY3X3_IMPL(uchar ) +RUN_MORPHOLOGY3X3_IMPL(ushort) +RUN_MORPHOLOGY3X3_IMPL( short) +RUN_MORPHOLOGY3X3_IMPL( float) + +#undef RUN_MORPHOLOGY3X3_IMPL + } // namespace fliud } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp index db5aeda96b..6116c4b74e 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp @@ -77,6 +77,25 @@ RUN_FILTER2D_3X3_IMPL( float, float) #undef RUN_FILTER2D_3X3_IMPL +//----------------------------- +// +// Fluid kernels: Erode, Dilate +// +//----------------------------- + +enum Morphology { M_ERODE, M_DILATE }; + +#define RUN_MORPHOLOGY3X3_IMPL(T) \ +void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \ + const uchar k[], Morphology morphology); + +RUN_MORPHOLOGY3X3_IMPL(uchar ) +RUN_MORPHOLOGY3X3_IMPL(ushort) +RUN_MORPHOLOGY3X3_IMPL( short) +RUN_MORPHOLOGY3X3_IMPL( float) + +#undef RUN_MORPHOLOGY3X3_IMPL + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp index 821a0ad5a9..cdd5e827ac 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp @@ -19,6 +19,8 @@ #include #include +#include +#include #include #ifdef __GNUC__ @@ -97,6 +99,23 @@ RUN_FILTER2D_3X3_IMPL( float, float) #undef RUN_FILTER2D_3X3_IMPL +//----------------------------- +// +// Fluid kernels: Erode, Dilate +// +//----------------------------- + +#define RUN_MORPHOLOGY3X3_IMPL(T) \ +void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \ + const uchar k[], Morphology morphology); + +RUN_MORPHOLOGY3X3_IMPL(uchar ) +RUN_MORPHOLOGY3X3_IMPL(ushort) +RUN_MORPHOLOGY3X3_IMPL( short) +RUN_MORPHOLOGY3X3_IMPL( float) + +#undef RUN_MORPHOLOGY3X3_IMPL + //---------------------------------------------------------------------- #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY @@ -1099,6 +1118,196 @@ RUN_FILTER2D_3X3_IMPL( float, float) #undef RUN_FILTER2D_3X3_IMPL +//----------------------------- +// +// Fluid kernels: Erode, Dilate +// +//----------------------------- + +template +static void run_morphology3x3_reference(T out[], const T *in[], int width, int chan, + const uchar k[]) +{ + constexpr int k_size = 3; + constexpr int border = (k_size - 1) / 2; + + const uchar kernel[3][3] = {{k[0], k[1], k[2]}, {k[3], k[4], k[5]}, {k[6], k[7], k[8]}}; + + const int length = width * chan; + const int shift = border * chan; + + for (int l=0; l < length; l++) + { + T result = M_ERODE == morphology? std::numeric_limits::max(): + std::numeric_limits::min(); + + if (M_ERODE == morphology) + { + result = kernel[0][0]? (std::min)(result, in[0][l - shift]): result; + result = kernel[0][1]? (std::min)(result, in[0][l ]): result; + result = kernel[0][2]? (std::min)(result, in[0][l + shift]): result; + + result = kernel[1][0]? (std::min)(result, in[1][l - shift]): result; + result = kernel[1][1]? (std::min)(result, in[1][l ]): result; + result = kernel[1][2]? (std::min)(result, in[1][l + shift]): result; + + result = kernel[2][0]? (std::min)(result, in[2][l - shift]): result; + result = kernel[2][1]? (std::min)(result, in[2][l ]): result; + result = kernel[2][2]? (std::min)(result, in[2][l + shift]): result; + } + else // if (M_DILATE == morphology) + { + result = kernel[0][0]? (std::max)(result, in[0][l - shift]): result; + result = kernel[0][1]? (std::max)(result, in[0][l ]): result; + result = kernel[0][2]? (std::max)(result, in[0][l + shift]): result; + + result = kernel[1][0]? (std::max)(result, in[1][l - shift]): result; + result = kernel[1][1]? (std::max)(result, in[1][l ]): result; + result = kernel[1][2]? (std::max)(result, in[1][l + shift]): result; + + result = kernel[2][0]? (std::max)(result, in[2][l - shift]): result; + result = kernel[2][1]? (std::max)(result, in[2][l ]): result; + result = kernel[2][2]? (std::max)(result, in[2][l + shift]): result; + } + + out[l] = result; + } +} + +#if CV_SIMD +template +static void run_morphology3x3_simd(T out[], const T *in[], int width, int chan, + const uchar k[], S setall) +{ + constexpr int k_size = 3; + constexpr int border = (k_size - 1) / 2; + + const uchar kernel[3][3] = {{k[0], k[1], k[2]}, {k[3], k[4], k[5]}, {k[6], k[7], k[8]}}; + + const int length = width * chan; + const int shift = border * chan; + + for (int l=0; l < length;) + { + constexpr int nlanes = VT::nlanes; + + // main part of output row + for (; l <= length - nlanes; l += nlanes) + { + VT r = M_ERODE == morphology? setall(std::numeric_limits::max()): + setall(std::numeric_limits::min()); + + if (M_ERODE == morphology) + { + if (kernel[0][0]) r = v_min(r, vx_load(&in[0][l - shift])); + if (kernel[0][1]) r = v_min(r, vx_load(&in[0][l ])); + if (kernel[0][2]) r = v_min(r, vx_load(&in[0][l + shift])); + + if (kernel[1][0]) r = v_min(r, vx_load(&in[1][l - shift])); + if (kernel[1][1]) r = v_min(r, vx_load(&in[1][l ])); + if (kernel[1][2]) r = v_min(r, vx_load(&in[1][l + shift])); + + if (kernel[2][0]) r = v_min(r, vx_load(&in[2][l - shift])); + if (kernel[2][1]) r = v_min(r, vx_load(&in[2][l ])); + if (kernel[2][2]) r = v_min(r, vx_load(&in[2][l + shift])); + } + else // if (M_DILATE == morphology) + { + if (kernel[0][0]) r = v_max(r, vx_load(&in[0][l - shift])); + if (kernel[0][1]) r = v_max(r, vx_load(&in[0][l ])); + if (kernel[0][2]) r = v_max(r, vx_load(&in[0][l + shift])); + + if (kernel[1][0]) r = v_max(r, vx_load(&in[1][l - shift])); + if (kernel[1][1]) r = v_max(r, vx_load(&in[1][l ])); + if (kernel[1][2]) r = v_max(r, vx_load(&in[1][l + shift])); + + if (kernel[2][0]) r = v_max(r, vx_load(&in[2][l - shift])); + if (kernel[2][1]) r = v_max(r, vx_load(&in[2][l ])); + if (kernel[2][2]) r = v_max(r, vx_load(&in[2][l + shift])); + } + + v_store(&out[l], r); + } + + // tail (if any) + if (l < length) + { + GAPI_DbgAssert(length >= nlanes); + l = length - nlanes; + } + } +} +#endif + +template +static void run_morphology3x3_code(T out[], const T *in[], int width, int chan, + const uchar k[]) +{ +#if CV_SIMD + int length = width * chan; + + // length variable may be unused if types do not match at 'if' statements below + (void) length; + + if (std::is_same::value && length >= v_float32::nlanes) + { + run_morphology3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan, k, vx_setall_f32); + return; + } + + if (std::is_same::value && length >= v_int16::nlanes) + { + run_morphology3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan, k, vx_setall_s16); + return; + } + + if (std::is_same::value && length >= v_uint16::nlanes) + { + run_morphology3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan, k, vx_setall_u16); + return; + } + + if (std::is_same::value && length >= v_uint8::nlanes) + { + run_morphology3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan, k, vx_setall_u8); + return; + } +#endif // CV_SIMD + + run_morphology3x3_reference(out, in, width, chan, k); +} + +#define RUN_MORPHOLOGY3X3_IMPL(T) \ +void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \ + const uchar k[], Morphology morphology) \ +{ \ + if (M_ERODE == morphology) \ + { \ + run_morphology3x3_code(out, in, width, chan, k); \ + } \ + else if (M_DILATE == morphology) \ + { \ + run_morphology3x3_code(out, in, width, chan, k); \ + } \ + else \ + CV_Error(cv::Error::StsBadArg, "unsupported morphology operation"); \ +} + +RUN_MORPHOLOGY3X3_IMPL(uchar ) +RUN_MORPHOLOGY3X3_IMPL(ushort) +RUN_MORPHOLOGY3X3_IMPL( short) +RUN_MORPHOLOGY3X3_IMPL( float) + +#undef RUN_MORPHOLOGY3X3_IMPL + //------------------------------------------------------------------------------ #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY