From 4ecbcf0885472631b21a459b722403f6a7efed04 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Jan 2020 15:06:34 +0300 Subject: [PATCH 1/4] imgproc: copy sumpixels.simd.hpp --- modules/imgproc/src/{sumpixels.cpp => sumpixels.simd.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/imgproc/src/{sumpixels.cpp => sumpixels.simd.hpp} (100%) mode change 100755 => 100644 diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.simd.hpp old mode 100755 new mode 100644 similarity index 100% rename from modules/imgproc/src/sumpixels.cpp rename to modules/imgproc/src/sumpixels.simd.hpp From c6a622542d59b8f949812038b8b24f664e580729 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 16 Jan 2020 15:07:48 +0300 Subject: [PATCH 2/4] imgproc: copy sumpixels.dispatch.cpp --- modules/imgproc/src/{sumpixels.cpp => sumpixels.dispatch.cpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/imgproc/src/{sumpixels.cpp => sumpixels.dispatch.cpp} (100%) diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.dispatch.cpp similarity index 100% rename from modules/imgproc/src/sumpixels.cpp rename to modules/imgproc/src/sumpixels.dispatch.cpp From b4316af83496e9e4dbfdf680244fa84bd0bc174e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 17 Jan 2020 16:49:46 +0300 Subject: [PATCH 3/4] imgproc: rename sumpixels.avx512_skx.{cpp,hpp} --- .../src/{sumpixels.avx512_skx.cpp => sumpixels.avx512_skx.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/imgproc/src/{sumpixels.avx512_skx.cpp => sumpixels.avx512_skx.hpp} (100%) diff --git a/modules/imgproc/src/sumpixels.avx512_skx.cpp b/modules/imgproc/src/sumpixels.avx512_skx.hpp similarity index 100% rename from modules/imgproc/src/sumpixels.avx512_skx.cpp rename to modules/imgproc/src/sumpixels.avx512_skx.hpp From 09b3383a7e1072ac831eaf71fc56a33941de4db9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 17 Jan 2020 16:54:08 +0300 Subject: [PATCH 4/4] imgproc: dispatch sumpixels (integral) --- modules/imgproc/CMakeLists.txt | 1 + modules/imgproc/src/sumpixels.avx512_skx.hpp | 19 +- modules/imgproc/src/sumpixels.dispatch.cpp | 479 ++++++------------- modules/imgproc/src/sumpixels.hpp | 25 - modules/imgproc/src/sumpixels.simd.hpp | 459 ++---------------- 5 files changed, 202 insertions(+), 781 deletions(-) delete mode 100644 modules/imgproc/src/sumpixels.hpp diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index f26ea0b3bf..a74c883cd3 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) +ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_add_dispatched_file(undistort SSE2 AVX2) ocv_define_module(imgproc opencv_core WRAP java python js) diff --git a/modules/imgproc/src/sumpixels.avx512_skx.hpp b/modules/imgproc/src/sumpixels.avx512_skx.hpp index 804b48d8c5..3c9c90c658 100644 --- a/modules/imgproc/src/sumpixels.avx512_skx.hpp +++ b/modules/imgproc/src/sumpixels.avx512_skx.hpp @@ -2,14 +2,13 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019, Intel Corporation, all rights reserved. -#include "precomp.hpp" -#include "sumpixels.hpp" +// Copyright (C) 2019-2020, Intel Corporation, all rights reserved. #include "opencv2/core/hal/intrin.hpp" +namespace cv { namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN -namespace cv { namespace { // Anonymous namespace to avoid exposing the implementation classes // @@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c } // end of anonymous namespace -namespace opt_AVX512_SKX { - -// This is the implementation for the external callers interface entry point. -// It should be the only function called into this file from outside -// Any new implementations should be directed from here +static void calculate_integral_avx512(const uchar *src, size_t _srcstep, double *sum, size_t _sumstep, double *sqsum, size_t _sqsumstep, int width, int height, int cn) { + CV_INSTRUMENT_REGION(); + switch(cn){ case 1: { IntegralCalculator< 1 > calculator; @@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep, } -} // end namespace opt_AVX512_SXK -} // end namespace cv +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // end namespace cv::hal diff --git a/modules/imgproc/src/sumpixels.dispatch.cpp b/modules/imgproc/src/sumpixels.dispatch.cpp index 89337f3507..b828ec70c0 100755 --- a/modules/imgproc/src/sumpixels.dispatch.cpp +++ b/modules/imgproc/src/sumpixels.dispatch.cpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved. +// Copyright (C) 2000-2020 Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2014, Itseez Inc., all rights reserved. // Third party copyrights are property of their respective owners. @@ -44,210 +44,157 @@ #include "precomp.hpp" #include "opencl_kernels_imgproc.hpp" #include "opencv2/core/hal/intrin.hpp" -#include "sumpixels.hpp" -namespace cv -{ +#include "sumpixels.simd.hpp" +#include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content -template -struct Integral_SIMD + +namespace cv { + +#ifdef HAVE_OPENCL + +static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) { - bool operator()(const T *, size_t, - ST *, size_t, - QT *, size_t, - ST *, size_t, - int, int, int) const - { + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( (_src.type() != CV_8UC1) || + !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F))) return false; - } -}; + static const int tileSize = 16; -template <> -struct Integral_SIMD { - Integral_SIMD() {}; + String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s", + ocl::typeToStr(sdepth), tileSize, + doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (kcols.empty()) + return false; - bool operator()(const uchar *src, size_t _srcstep, - double *sum, size_t _sumstep, - double *sqsum, size_t _sqsumstep, - double *tilted, size_t _tiltedstep, - int width, int height, int cn) const + UMat src = _src.getUMat(); + Size src_size = src.size(); + Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); + UMat buf(bufsize, sdepth); + kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf)); + size_t gt = src.cols, lt = tileSize; + if (!kcols.run(1, >, <, false)) + return false; + + ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (krows.empty()) + return false; + + Size sumsize(src_size.width + 1, src_size.height + 1); + _sum.create(sumsize, sdepth); + UMat sum = _sum.getUMat(); + + krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum)); + gt = src.rows; + return krows.run(1, >, <, false); +} + +static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) +{ + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) + return false; + + static const int tileSize = 16; + + String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", + ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), + tileSize, + doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + + ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (kcols.empty()) + return false; + + UMat src = _src.getUMat(); + Size src_size = src.size(); + Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); + UMat buf(bufsize, sdepth); + UMat buf_sq(bufsize, sqdepth); + kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); + size_t gt = src.cols, lt = tileSize; + if (!kcols.run(1, >, <, false)) + return false; + + ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); + if (krows.empty()) + return false; + + Size sumsize(src_size.width + 1, src_size.height + 1); + _sum.create(sumsize, sdepth); + UMat sum = _sum.getUMat(); + _sqsum.create(sumsize, sqdepth); + UMat sum_sq = _sqsum.getUMat(); + + krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); + gt = src.rows; + return krows.run(1, >, <, false); +} + +#endif // HAVE_OPENCL + +#ifdef HAVE_IPP + +static bool ipp_integral( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) +{ + CV_INSTRUMENT_REGION_IPP(); + + IppiSize size = {width, height}; + + if(cn > 1) + return false; + if(tilted) { -#if CV_TRY_AVX512_SKX - CV_UNUSED(_tiltedstep); - // TODO: Add support for 1 channel input (WIP) - if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){ - opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep, - sqsum, _sqsumstep, width, height, cn); - return true; - } -#else - // Avoid warnings in some builds - CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep); - CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep); - CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn); -#endif + CV_UNUSED(tstep); return false; } -}; - -#if CV_SIMD && CV_SIMD_WIDTH <= 64 - -template <> -struct Integral_SIMD -{ - Integral_SIMD() {} - - bool operator()(const uchar * src, size_t _srcstep, - int * sum, size_t _sumstep, - double * sqsum, size_t, - int * tilted, size_t, - int width, int height, int cn) const + if(!sqsum) { - if (sqsum || tilted || cn != 1) + if(depth == CV_8U && sdepth == CV_32S) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; + else if(depth == CV_8UC1 && sdepth == CV_32F) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; + else if(depth == CV_32FC1 && sdepth == CV_32F) + return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; + else return false; - - // the first iteration - memset(sum, 0, (width + 1) * sizeof(int)); - - // the others - for (int i = 0; i < height; ++i) - { - const uchar * src_row = src + _srcstep * i; - int * prev_sum_row = (int *)((uchar *)sum + _sumstep * i) + 1; - int * sum_row = (int *)((uchar *)sum + _sumstep * (i + 1)) + 1; - - sum_row[-1] = 0; - - v_int32 prev = vx_setzero_s32(); - int j = 0; - for ( ; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) - { - v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); - v_int32 el4l, el4h; -#if CV_AVX2 && CV_SIMD_WIDTH == 32 - __m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8)); - __m256i shmask = _mm256_set1_epi32(7); - el4l.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_low(vsum)), prev.val); - el4h.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_high(vsum)), _mm256_permutevar8x32_epi32(el4l.val, shmask)); - prev.val = _mm256_permutevar8x32_epi32(el4h.val, shmask); -#else - el8 += v_rotate_left<1>(el8); - el8 += v_rotate_left<2>(el8); -#if CV_SIMD_WIDTH >= 32 - el8 += v_rotate_left<4>(el8); -#if CV_SIMD_WIDTH == 64 - el8 += v_rotate_left<8>(el8); -#endif -#endif - v_expand(el8, el4l, el4h); - el4l += prev; - el4h += el4l; - - prev = v_broadcast_element(el4h); -#endif - v_store(sum_row + j , el4l + vx_load(prev_sum_row + j )); - v_store(sum_row + j + v_int32::nlanes, el4h + vx_load(prev_sum_row + j + v_int32::nlanes)); - } - - for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) - sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; - } - vx_cleanup(); - - return true; } -}; - -template <> -struct Integral_SIMD -{ - Integral_SIMD() {} - - bool operator()(const uchar * src, size_t _srcstep, - float * sum, size_t _sumstep, - double * sqsum, size_t, - float * tilted, size_t, - int width, int height, int cn) const + else { - if (sqsum || tilted || cn != 1) + if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) + return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; + else return false; - - // the first iteration - memset(sum, 0, (width + 1) * sizeof(int)); - - // the others - for (int i = 0; i < height; ++i) - { - const uchar * src_row = src + _srcstep * i; - float * prev_sum_row = (float *)((uchar *)sum + _sumstep * i) + 1; - float * sum_row = (float *)((uchar *)sum + _sumstep * (i + 1)) + 1; - - sum_row[-1] = 0; - - v_float32 prev = vx_setzero_f32(); - int j = 0; - for (; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) - { - v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); - v_float32 el4l, el4h; -#if CV_AVX2 && CV_SIMD_WIDTH == 32 - __m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); - vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8)); - __m256i shmask = _mm256_set1_epi32(7); - el4l.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_low(vsum))), prev.val); - el4h.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_high(vsum))), _mm256_permutevar8x32_ps(el4l.val, shmask)); - prev.val = _mm256_permutevar8x32_ps(el4h.val, shmask); -#else - el8 += v_rotate_left<1>(el8); - el8 += v_rotate_left<2>(el8); -#if CV_SIMD_WIDTH >= 32 - el8 += v_rotate_left<4>(el8); -#if CV_SIMD_WIDTH == 64 - el8 += v_rotate_left<8>(el8); -#endif -#endif - v_int32 el4li, el4hi; - v_expand(el8, el4li, el4hi); - el4l = v_cvt_f32(el4li) + prev; - el4h = v_cvt_f32(el4hi) + el4l; - - prev = v_broadcast_element(el4h); -#endif - v_store(sum_row + j , el4l + vx_load(prev_sum_row + j )); - v_store(sum_row + j + v_float32::nlanes, el4h + vx_load(prev_sum_row + j + v_float32::nlanes)); - } - - for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) - sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; - } - vx_cleanup(); - - return true; } -}; +} -#endif +#endif // HAVE_IPP -template +namespace hal { + +template static void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep, int width, int height, int cn ) { int x, y, k; - if (Integral_SIMD()(src, _srcstep, - sum, _sumstep, - sqsum, _sqsumstep, - tilted, _tiltedstep, - width, height, cn)) - return; - int srcstep = (int)(_srcstep/sizeof(T)); int sumstep = (int)(_sumstep/sizeof(ST)); int tiltedstep = (int)(_tiltedstep/sizeof(ST)); @@ -401,157 +348,36 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, } } - -#ifdef HAVE_OPENCL - -static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) +static bool integral_SIMD( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) { - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + CV_INSTRUMENT_REGION(); - if ( (_src.type() != CV_8UC1) || - !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F))) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum)); - gt = src.rows; - return krows.run(1, >, <, false); + CV_CPU_DISPATCH(integral_SIMD, (depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn), + CV_CPU_DISPATCH_MODES_ALL); } -static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) +void integral( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) { - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + CV_INSTRUMENT_REGION(); - if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), - tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - UMat buf_sq(bufsize, sqdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - _sqsum.create(sumsize, sqdepth); - UMat sum_sq = _sqsum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); - gt = src.rows; - return krows.run(1, >, <, false); -} - -#endif - -} - -#if defined(HAVE_IPP) -namespace cv -{ -static bool ipp_integral( - int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ - CV_INSTRUMENT_REGION_IPP(); - - IppiSize size = {width, height}; - - if(cn > 1) - return false; - if(tilted) - { - CV_UNUSED(tstep); - return false; - } - - if(!sqsum) - { - if(depth == CV_8U && sdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_8UC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_32FC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; - else - return false; - } - else - { - if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else - return false; - } -} -} -#endif - -namespace cv { namespace hal { - -void integral(int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn); CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)); + if (integral_SIMD(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)) + return; + #define ONE_CALL(A, B, C) integral_((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) @@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth, else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) ONE_CALL(double, double, double); else - CV_Error( CV_StsUnsupportedFormat, "" ); + CV_Error(Error::StsUnsupportedFormat, ""); #undef ONE_CALL } -}} // cv::hal:: +} // namespace hal -void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) +void integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) { CV_INSTRUMENT_REGION(); @@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output src.cols, src.rows, cn); } -void cv::integral( InputArray src, OutputArray sum, int sdepth ) +void integral( InputArray src, OutputArray sum, int sdepth ) { CV_INSTRUMENT_REGION(); integral( src, sum, noArray(), noArray(), sdepth ); } -void cv::integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) +void integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) { CV_INSTRUMENT_REGION(); integral( src, sum, sqsum, noArray(), sdepth, sqdepth ); } +} // namespace CV_IMPL void cvIntegral( const CvArr* image, CvArr* sumImage, diff --git a/modules/imgproc/src/sumpixels.hpp b/modules/imgproc/src/sumpixels.hpp deleted file mode 100644 index 8d5ab0a851..0000000000 --- a/modules/imgproc/src/sumpixels.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -// -// Copyright (C) 2019, Intel Corporation, all rights reserved. -#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP -#define OPENCV_IMGPROC_SUM_PIXELS_HPP - -namespace cv -{ - -namespace opt_AVX512_SKX -{ -#if CV_TRY_AVX512_SKX - void calculate_integral_avx512( - const uchar *src, size_t _srcstep, - double *sum, size_t _sumstep, - double *sqsum, size_t _sqsumstep, - int width, int height, int cn); - -#endif -} // end namespace opt_AVX512_SKX -} // end namespace cv - -#endif diff --git a/modules/imgproc/src/sumpixels.simd.hpp b/modules/imgproc/src/sumpixels.simd.hpp index 89337f3507..c8d60a0040 100644 --- a/modules/imgproc/src/sumpixels.simd.hpp +++ b/modules/imgproc/src/sumpixels.simd.hpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved. +// Copyright (C) 2000-2020 Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2014, Itseez Inc., all rights reserved. // Third party copyrights are property of their respective owners. @@ -41,13 +41,26 @@ // //M*/ -#include "precomp.hpp" -#include "opencl_kernels_imgproc.hpp" #include "opencv2/core/hal/intrin.hpp" -#include "sumpixels.hpp" -namespace cv -{ +#if CV_AVX512_SKX +#include "sumpixels.avx512_skx.hpp" +#endif + +namespace cv { namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +// forward declarations +bool integral_SIMD( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +namespace { template struct Integral_SIMD @@ -62,7 +75,7 @@ struct Integral_SIMD } }; - +#if CV_AVX512_SKX template <> struct Integral_SIMD { Integral_SIMD() {}; @@ -74,24 +87,19 @@ struct Integral_SIMD { double *tilted, size_t _tiltedstep, int width, int height, int cn) const { -#if CV_TRY_AVX512_SKX CV_UNUSED(_tiltedstep); // TODO: Add support for 1 channel input (WIP) - if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){ - opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep, - sqsum, _sqsumstep, width, height, cn); + if (!tilted && (cn <= 4)) + { + calculate_integral_avx512(src, _srcstep, sum, _sumstep, + sqsum, _sqsumstep, width, height, cn); return true; } -#else - // Avoid warnings in some builds - CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep); - CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep); - CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn); -#endif return false; } }; +#endif #if CV_SIMD && CV_SIMD_WIDTH <= 64 @@ -157,8 +165,6 @@ struct Integral_SIMD for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; } - vx_cleanup(); - return true; } }; @@ -226,333 +232,26 @@ struct Integral_SIMD for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; } - vx_cleanup(); - return true; } }; #endif -template -void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, - QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep, - int width, int height, int cn ) +} // namespace anon + +bool integral_SIMD( + int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn) { - int x, y, k; + CV_INSTRUMENT_REGION(); - if (Integral_SIMD()(src, _srcstep, - sum, _sumstep, - sqsum, _sqsumstep, - tilted, _tiltedstep, - width, height, cn)) - return; - - int srcstep = (int)(_srcstep/sizeof(T)); - int sumstep = (int)(_sumstep/sizeof(ST)); - int tiltedstep = (int)(_tiltedstep/sizeof(ST)); - int sqsumstep = (int)(_sqsumstep/sizeof(QT)); - - width *= cn; - - memset( sum, 0, (width+cn)*sizeof(sum[0])); - sum += sumstep + cn; - - if( sqsum ) - { - memset( sqsum, 0, (width+cn)*sizeof(sqsum[0])); - sqsum += sqsumstep + cn; - } - - if( tilted ) - { - memset( tilted, 0, (width+cn)*sizeof(tilted[0])); - tilted += tiltedstep + cn; - } - - if( sqsum == 0 && tilted == 0 ) - { - for( y = 0; y < height; y++, src += srcstep - cn, sum += sumstep - cn ) - { - for( k = 0; k < cn; k++, src++, sum++ ) - { - ST s = sum[-cn] = 0; - for( x = 0; x < width; x += cn ) - { - s += src[x]; - sum[x] = sum[x - sumstep] + s; - } - } - } - } - else if( tilted == 0 ) - { - for( y = 0; y < height; y++, src += srcstep - cn, - sum += sumstep - cn, sqsum += sqsumstep - cn ) - { - for( k = 0; k < cn; k++, src++, sum++, sqsum++ ) - { - ST s = sum[-cn] = 0; - QT sq = sqsum[-cn] = 0; - for( x = 0; x < width; x += cn ) - { - T it = src[x]; - s += it; - sq += (QT)it*it; - ST t = sum[x - sumstep] + s; - QT tq = sqsum[x - sqsumstep] + sq; - sum[x] = t; - sqsum[x] = tq; - } - } - } - } - else - { - AutoBuffer _buf(width+cn); - ST* buf = _buf.data(); - ST s; - QT sq; - for( k = 0; k < cn; k++, src++, sum++, tilted++, buf++ ) - { - sum[-cn] = tilted[-cn] = 0; - - for( x = 0, s = 0, sq = 0; x < width; x += cn ) - { - T it = src[x]; - buf[x] = tilted[x] = it; - s += it; - sq += (QT)it*it; - sum[x] = s; - if( sqsum ) - sqsum[x] = sq; - } - - if( width == cn ) - buf[cn] = 0; - - if( sqsum ) - { - sqsum[-cn] = 0; - sqsum++; - } - } - - for( y = 1; y < height; y++ ) - { - src += srcstep - cn; - sum += sumstep - cn; - tilted += tiltedstep - cn; - buf += -cn; - - if( sqsum ) - sqsum += sqsumstep - cn; - - for( k = 0; k < cn; k++, src++, sum++, tilted++, buf++ ) - { - T it = src[0]; - ST t0 = s = it; - QT tq0 = sq = (QT)it*it; - - sum[-cn] = 0; - if( sqsum ) - sqsum[-cn] = 0; - tilted[-cn] = tilted[-tiltedstep]; - - sum[0] = sum[-sumstep] + t0; - if( sqsum ) - sqsum[0] = sqsum[-sqsumstep] + tq0; - tilted[0] = tilted[-tiltedstep] + t0 + buf[cn]; - - for( x = cn; x < width - cn; x += cn ) - { - ST t1 = buf[x]; - buf[x - cn] = t1 + t0; - t0 = it = src[x]; - tq0 = (QT)it*it; - s += t0; - sq += tq0; - sum[x] = sum[x - sumstep] + s; - if( sqsum ) - sqsum[x] = sqsum[x - sqsumstep] + sq; - t1 += buf[x + cn] + t0 + tilted[x - tiltedstep - cn]; - tilted[x] = t1; - } - - if( width > cn ) - { - ST t1 = buf[x]; - buf[x - cn] = t1 + t0; - t0 = it = src[x]; - tq0 = (QT)it*it; - s += t0; - sq += tq0; - sum[x] = sum[x - sumstep] + s; - if( sqsum ) - sqsum[x] = sqsum[x - sqsumstep] + sq; - tilted[x] = t0 + t1 + tilted[x - tiltedstep - cn]; - buf[x] = t0; - } - - if( sqsum ) - sqsum++; - } - } - } -} - - -#ifdef HAVE_OPENCL - -static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth ) -{ - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( (_src.type() != CV_8UC1) || - !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F))) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum)); - gt = src.rows; - return krows.run(1, >, <, false); -} - -static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth ) -{ - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) ) - return false; - - static const int tileSize = 16; - - String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s", - ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth), - tileSize, - doubleSupport ? " -D DOUBLE_SUPPORT" : ""); - - ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (kcols.empty()) - return false; - - UMat src = _src.getUMat(); - Size src_size = src.size(); - Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize); - UMat buf(bufsize, sdepth); - UMat buf_sq(bufsize, sqdepth); - kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq)); - size_t gt = src.cols, lt = tileSize; - if (!kcols.run(1, >, <, false)) - return false; - - ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt); - if (krows.empty()) - return false; - - Size sumsize(src_size.width + 1, src_size.height + 1); - _sum.create(sumsize, sdepth); - UMat sum = _sum.getUMat(); - _sqsum.create(sumsize, sqdepth); - UMat sum_sq = _sqsum.getUMat(); - - krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq)); - gt = src.rows; - return krows.run(1, >, <, false); -} - -#endif - -} - -#if defined(HAVE_IPP) -namespace cv -{ -static bool ipp_integral( - int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ - CV_INSTRUMENT_REGION_IPP(); - - IppiSize size = {width, height}; - - if(cn > 1) - return false; - if(tilted) - { - CV_UNUSED(tstep); - return false; - } - - if(!sqsum) - { - if(depth == CV_8U && sdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_8UC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0; - else if(depth == CV_32FC1 && sdepth == CV_32F) - return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0; - else - return false; - } - else - { - if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F) - return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0; - else - return false; - } -} -} -#endif - -namespace cv { namespace hal { - -void integral(int depth, int sdepth, int sqdepth, - const uchar* src, size_t srcstep, - uchar* sum, size_t sumstep, - uchar* sqsum, size_t sqsumstep, - uchar* tilted, size_t tstep, - int width, int height, int cn) -{ - CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn); - CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)); - -#define ONE_CALL(A, B, C) integral_((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) +#define ONE_CALL(T, ST, QT) \ + return Integral_SIMD()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn) if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) ONE_CALL(uchar, int, double); @@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth, else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) ONE_CALL(double, double, double); else - CV_Error( CV_StsUnsupportedFormat, "" ); + return false; #undef ONE_CALL } +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END }} // cv::hal:: - -void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) -{ - CV_INSTRUMENT_REGION(); - - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - if( sdepth <= 0 ) - sdepth = depth == CV_8U ? CV_32S : CV_64F; - if ( sqdepth <= 0 ) - sqdepth = CV_64F; - sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth); - - CV_OCL_RUN(_sum.isUMat() && !_tilted.needed(), - (_sqsum.needed() ? ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth) : ocl_integral(_src, _sum, sdepth))); - - Size ssize = _src.size(), isize(ssize.width + 1, ssize.height + 1); - _sum.create( isize, CV_MAKETYPE(sdepth, cn) ); - Mat src = _src.getMat(), sum =_sum.getMat(), sqsum, tilted; - - if( _sqsum.needed() ) - { - _sqsum.create( isize, CV_MAKETYPE(sqdepth, cn) ); - sqsum = _sqsum.getMat(); - }; - - if( _tilted.needed() ) - { - _tilted.create( isize, CV_MAKETYPE(sdepth, cn) ); - tilted = _tilted.getMat(); - } - - hal::integral(depth, sdepth, sqdepth, - src.ptr(), src.step, - sum.ptr(), sum.step, - sqsum.ptr(), sqsum.step, - tilted.ptr(), tilted.step, - src.cols, src.rows, cn); -} - -void cv::integral( InputArray src, OutputArray sum, int sdepth ) -{ - CV_INSTRUMENT_REGION(); - - integral( src, sum, noArray(), noArray(), sdepth ); -} - -void cv::integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) -{ - CV_INSTRUMENT_REGION(); - - integral( src, sum, sqsum, noArray(), sdepth, sqdepth ); -} - - -CV_IMPL void -cvIntegral( const CvArr* image, CvArr* sumImage, - CvArr* sumSqImage, CvArr* tiltedSumImage ) -{ - cv::Mat src = cv::cvarrToMat(image), sum = cv::cvarrToMat(sumImage), sum0 = sum; - cv::Mat sqsum0, sqsum, tilted0, tilted; - cv::Mat *psqsum = 0, *ptilted = 0; - - if( sumSqImage ) - { - sqsum0 = sqsum = cv::cvarrToMat(sumSqImage); - psqsum = &sqsum; - } - - if( tiltedSumImage ) - { - tilted0 = tilted = cv::cvarrToMat(tiltedSumImage); - ptilted = &tilted; - } - cv::integral( src, sum, psqsum ? cv::_OutputArray(*psqsum) : cv::_OutputArray(), - ptilted ? cv::_OutputArray(*ptilted) : cv::_OutputArray(), sum.depth() ); - - CV_Assert( sum.data == sum0.data && sqsum.data == sqsum0.data && tilted.data == tilted0.data ); -} - -/* End of file. */