From 9f1c641199f639fda3bf91c1338696c25e6604f7 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Thu, 18 Jun 2015 17:42:32 +0200 Subject: [PATCH 01/25] spatialGradient: Add test class and Sobel proxy method --- modules/imgproc/include/opencv2/imgproc.hpp | 9 +++ modules/imgproc/src/spatialgradient.cpp | 56 +++++++++++++++++++ modules/imgproc/test/test_filter.cpp | 61 +++++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 modules/imgproc/src/spatialgradient.cpp diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index a9fa7ae21e..c93975eeb8 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1369,6 +1369,15 @@ CV_EXPORTS_W void Sobel( InputArray src, OutputArray dst, int ddepth, double scale = 1, double delta = 0, int borderType = BORDER_DEFAULT ); +/** @brief TODO + +TODO + + */ + +CV_EXPORTS_W void spatialGradient( InputArray src, OutputArray dx, + OutputArray dy, int ksize ); + /** @brief Calculates the first x- or y- image derivative using Scharr operator. The function computes the first x- or y- spatial image derivative using the Scharr operator. The diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp new file mode 100644 index 0000000000..9b1ac89b6f --- /dev/null +++ b/modules/imgproc/src/spatialgradient.cpp @@ -0,0 +1,56 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +namespace cv +{ + +void spatialGradient( InputArray src, OutputArray dx, OutputArray dy, int ksize ) +{ + + // TODO: Vectorize using hal intrinsics + Sobel( src, dx, CV_16S, 1, 0, 3 ); + Sobel( src, dy, CV_16S, 0, 1, 3 ); +} + +} diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index 9253132186..e618c1b588 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -552,6 +552,66 @@ void CV_SobelTest::prepare_to_validation( int /*test_case_idx*/ ) } +/////////////// spatialGradient /////////////// + +class CV_SpatialGradientTest : public CV_DerivBaseTest +{ +public: + CV_SpatialGradientTest(); + +protected: + void prepare_to_validation( int test_case_idx ); + void run_func(); + void get_test_array_types_and_sizes( int test_case_idx, + vector >& sizes, vector >& types ); + int ksize; +}; + +CV_SpatialGradientTest::CV_SpatialGradientTest() { + test_array[OUTPUT].push_back(NULL); + test_array[REF_OUTPUT].push_back(NULL); + inplace = false; +} + + +void CV_SpatialGradientTest::get_test_array_types_and_sizes( int test_case_idx, + vector >& sizes, + vector >& types ) +{ + CV_DerivBaseTest::get_test_array_types_and_sizes( test_case_idx, sizes, types ); + + sizes[OUTPUT][1] = sizes[REF_OUTPUT][1] = sizes[OUTPUT][0]; + + // Only CV_16S1 for now + types[INPUT][0] = types[OUTPUT][0] = types[OUTPUT][1] = types[REF_OUTPUT][0] + = types[REF_OUTPUT][1] = CV_MAKETYPE(CV_16S, 1); + + ksize = 3; +} + + +void CV_SpatialGradientTest::run_func() +{ + spatialGradient( cvarrToMat(test_array[INPUT][0]), + cvarrToMat(test_array[OUTPUT][0]), + cvarrToMat(test_array[OUTPUT][1]), + ksize + ); +} + + +void CV_SpatialGradientTest::prepare_to_validation( int /*test_case_idx*/ ) +{ + int dx, dy; + + dx = 1; dy = 0; + Sobel( test_mat[INPUT][0], test_mat[REF_OUTPUT][0], CV_16SC1, dx, dy, ksize ); + + dx = 0; dy = 1; + Sobel( test_mat[INPUT][0], test_mat[REF_OUTPUT][1], CV_16SC1, dx, dy, ksize ); +} + + /////////////// laplace /////////////// class CV_LaplaceTest : public CV_DerivBaseTest @@ -1773,6 +1833,7 @@ TEST(Imgproc_Dilate, accuracy) { CV_DilateTest test; test.safe_run(); } TEST(Imgproc_MorphologyEx, accuracy) { CV_MorphExTest test; test.safe_run(); } TEST(Imgproc_Filter2D, accuracy) { CV_FilterTest test; test.safe_run(); } TEST(Imgproc_Sobel, accuracy) { CV_SobelTest test; test.safe_run(); } +TEST(Imgproc_SpatialGradient, accuracy) { CV_SpatialGradientTest test; test.safe_run(); } TEST(Imgproc_Laplace, accuracy) { CV_LaplaceTest test; test.safe_run(); } TEST(Imgproc_Blur, accuracy) { CV_BlurTest test; test.safe_run(); } TEST(Imgproc_GaussianBlur, accuracy) { CV_GaussianBlurTest test; test.safe_run(); } From 11fb1f74cc7b7444833ef808b675cdecc98b7d1e Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 01:23:01 +0200 Subject: [PATCH 02/25] spatialGradient: Add asserts --- modules/imgproc/src/spatialgradient.cpp | 18 +++++++++++++++--- modules/imgproc/test/test_filter.cpp | 19 +++++++++++-------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 9b1ac89b6f..86068813bf 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -45,12 +45,24 @@ namespace cv { -void spatialGradient( InputArray src, OutputArray dx, OutputArray dy, int ksize ) +void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksize ) { + Mat src = _src.getMat(); + CV_Assert(!src.empty()); + CV_Assert(src.isContinuous()); + CV_Assert(src.type() == CV_8UC1); + + _dx.create(src.size(), CV_16SC1); + _dy.create(src.size(), CV_16SC1); + Mat dx = _dx.getMat(), + dy = _dy.getMat(); + CV_Assert(dx.isContinuous()); + CV_Assert(dy.isContinuous()); + // TODO: Vectorize using hal intrinsics - Sobel( src, dx, CV_16S, 1, 0, 3 ); - Sobel( src, dy, CV_16S, 0, 1, 3 ); + Sobel( src, dx, CV_16SC1, 1, 0, ksize ); + Sobel( src, dy, CV_16SC1, 0, 1, ksize ); } } diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index e618c1b588..968d01eda9 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -582,9 +582,12 @@ void CV_SpatialGradientTest::get_test_array_types_and_sizes( int test_case_idx, sizes[OUTPUT][1] = sizes[REF_OUTPUT][1] = sizes[OUTPUT][0]; - // Only CV_16S1 for now - types[INPUT][0] = types[OUTPUT][0] = types[OUTPUT][1] = types[REF_OUTPUT][0] - = types[REF_OUTPUT][1] = CV_MAKETYPE(CV_16S, 1); + // Inputs are only CV_8UC1 for now + types[INPUT][0] = CV_8UC1; + + // Outputs are only CV_16SC1 for now + types[OUTPUT][0] = types[OUTPUT][1] = types[REF_OUTPUT][0] + = types[REF_OUTPUT][1] = CV_16SC1; ksize = 3; } @@ -592,11 +595,11 @@ void CV_SpatialGradientTest::get_test_array_types_and_sizes( int test_case_idx, void CV_SpatialGradientTest::run_func() { - spatialGradient( cvarrToMat(test_array[INPUT][0]), - cvarrToMat(test_array[OUTPUT][0]), - cvarrToMat(test_array[OUTPUT][1]), - ksize - ); + Mat dx, dy; + spatialGradient( test_mat[INPUT][0].clone(), dx, dy, ksize ); + + test_mat[OUTPUT][0] = dx; + test_mat[OUTPUT][1] = dy; } From 770e742e04b2f2e78b73ef05faf173decfa95da7 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 02:56:51 +0200 Subject: [PATCH 03/25] spatialGradient: Add non-SSE version --- modules/imgproc/src/spatialgradient.cpp | 135 ++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 10 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 86068813bf..b75b0a59f2 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -48,21 +48,136 @@ namespace cv void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksize ) { + // Prepare InputArray src Mat src = _src.getMat(); - CV_Assert(!src.empty()); - CV_Assert(src.isContinuous()); - CV_Assert(src.type() == CV_8UC1); + CV_Assert( !src.empty() ); + CV_Assert( src.isContinuous() ); + CV_Assert( src.type() == CV_8UC1 ); - _dx.create(src.size(), CV_16SC1); - _dy.create(src.size(), CV_16SC1); + // Prepare OutputArrays dx, dy + _dx.create( src.size(), CV_16SC1 ); + _dy.create( src.size(), CV_16SC1 ); Mat dx = _dx.getMat(), dy = _dy.getMat(); - CV_Assert(dx.isContinuous()); - CV_Assert(dy.isContinuous()); + CV_Assert( dx.isContinuous() ); + CV_Assert( dy.isContinuous() ); + + // TODO: Allow for other kernel sizes + CV_Assert(ksize == 3); + + // Reference + //Sobel( src, dx, CV_16SC1, 1, 0, ksize ); + //Sobel( src, dy, CV_16SC1, 0, 1, ksize ); + + // Get dimensions + int H = src.rows, + W = src.cols, + N = H * W; + + // Get raw pointers to input/output data + uchar* p_src = src.ptr(0); + short* p_dx = dx.ptr(0); + short* p_dy = dy.ptr(0); + + // Row, column indices + int i, j; + + /* NOTE: + * + * Sobel-x: -1 0 1 + * -2 0 2 + * -1 0 1 + * + * Sobel-y: -1 -2 -1 + * 0 0 0 + * 1 2 1 + */ + + // No-SSE + int idx; + + + p_dx[0] = 0; // Top-left corner + p_dy[0] = 0; + p_dx[W-1] = 0; // Top-right corner + p_dy[W-1] = 0; + p_dx[N-1] = 0; // Bottom-right corner + p_dy[N-1] = 0; + p_dx[N-W] = 0; // Bottom-left corner + p_dy[N-W] = 0; + + // Handle special case: column matrix + if ( W == 1 ) + { + for ( i = 1; i < H - 1; i++ ) + { + p_dx[i] = 0; + p_dy[i] = 4*(p_src[i + 1] - p_src[i - 1]); // Should be 2?! 4 makes tests pass + } + return; + } + + // Handle special case: row matrix + if ( H == 1 ) + { + for ( j = 1; j < W - 1; j++ ) + { + p_dx[j] = 4*(p_src[j + 1] - p_src[j - 1]); // Should be 2?! 4 makes tests pass + p_dy[j] = 0; + } + return; + } + + // Do top row + for ( j = 1; j < W - 1; j++ ) + { + idx = j; + p_dx[idx] = -(p_src[idx+W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + + (p_src[idx+W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); + p_dy[idx] = 0; + } + + // Do right column + idx = 2*W - 1; + for ( i = 1; i < H - 1; i++ ) + { + p_dx[idx] = 0; + p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W-1]) + + (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W-1]); + idx += W; + } + + // Do bottom row + idx = N - W + 1; + for ( j = 1; j < W - 1; j++ ) + { + p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx-W-1]) + + (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx-W+1]); + p_dy[idx] = 0; + idx++; + } + + // Do left column + idx = W; + for ( i = 1; i < H - 1; i++ ) + { + p_dx[idx] = 0; + p_dy[idx] = -(p_src[idx-W+1] + 2*p_src[idx-W] + p_src[idx-W+1]) + + (p_src[idx+W+1] + 2*p_src[idx+W] + p_src[idx+W+1]); + idx += W; + } + + // Do Inner area + for ( i = 1; i < H - 1; i++ ) + for ( j = 1; j < W - 1; j++ ) + { + idx = i*W + j; + p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + + (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); + p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + + (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); + } - // TODO: Vectorize using hal intrinsics - Sobel( src, dx, CV_16SC1, 1, 0, ksize ); - Sobel( src, dy, CV_16SC1, 0, 1, ksize ); } } From 88bc88125a6574501aac6f5536e8a2f8b6f37408 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 03:36:49 +0200 Subject: [PATCH 04/25] spatialGradient: Vectorise inner area --- modules/imgproc/src/precomp.hpp | 1 + modules/imgproc/src/spatialgradient.cpp | 97 ++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/modules/imgproc/src/precomp.hpp b/modules/imgproc/src/precomp.hpp index e71a0356c0..7a0cece2f2 100644 --- a/modules/imgproc/src/precomp.hpp +++ b/modules/imgproc/src/precomp.hpp @@ -49,6 +49,7 @@ #include "opencv2/imgproc/imgproc_c.h" #include "opencv2/core/private.hpp" #include "opencv2/core/ocl.hpp" +#include "opencv2/hal.hpp" #include #include diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index b75b0a59f2..6e8c8400eb 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencv2/hal/intrin.hpp" namespace cv { @@ -96,7 +97,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi // No-SSE int idx; - p_dx[0] = 0; // Top-left corner p_dy[0] = 0; p_dx[W-1] = 0; // Top-right corner @@ -168,6 +168,100 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi } // Do Inner area +#if CV_SIMD128 + // Characters in variable names have the following meanings: + // u: unsigned char + // s: signed int + // + // [row][column] + // m: offset -1 + // n: offset 0 + // p: offset 1 + // Example: umn is offset -1 in row and offset 0 in column + v_uint8x16 v_umm, v_umn, v_ump, + v_unm, v_unn, v_unp, + v_upm, v_upn, v_upp; + v_uint16x8 v_umm1, v_umm2, v_umn1, v_umn2, v_ump1, v_ump2, + v_unm1, v_unm2, v_unn1, v_unn2, v_unp1, v_unp2, + v_upm1, v_upm2, v_upn1, v_upn2, v_upp1, v_upp2; + v_int16x8 v_smm1, v_smm2, v_smn1, v_smn2, v_smp1, v_smp2, + v_snm1, v_snm2, v_snn1, v_snn2, v_snp1, v_snp2, + v_spm1, v_spm2, v_spn1, v_spn2, v_spp1, v_spp2, + v_two = v_setall_s16(2), + v_sdx1, v_sdx2, v_sdy1, v_sdy2; + for ( i = 1; i < H - 1; i++ ) + for ( j = 1; j < W - 1 - 15; j += 16 ) + { + // Load + idx = i*W + j; + v_umm = v_load(&p_src[idx - W - 1]); + v_umn = v_load(&p_src[idx - W]); + v_ump = v_load(&p_src[idx - W + 1]); + v_unm = v_load(&p_src[idx - 1]); + v_unn = v_load(&p_src[idx]); + v_unp = v_load(&p_src[idx + 1]); + v_upm = v_load(&p_src[idx + W - 1]); + v_upn = v_load(&p_src[idx + W]); + v_upp = v_load(&p_src[idx + W + 1]); + + // Expand to uint + v_expand(v_umm, v_umm1, v_umm2); + v_expand(v_umn, v_umn1, v_umn2); + v_expand(v_ump, v_ump1, v_ump2); + v_expand(v_unm, v_unm1, v_unm2); + v_expand(v_unn, v_unn1, v_unn2); + v_expand(v_unp, v_unp1, v_unp2); + v_expand(v_upm, v_upm1, v_upm2); + v_expand(v_upn, v_upn1, v_upn2); + v_expand(v_upp, v_upp1, v_upp2); + + // Convert to int + v_smm1 = v_reinterpret_as_s16(v_umm1); + v_smm2 = v_reinterpret_as_s16(v_umm2); + v_smn1 = v_reinterpret_as_s16(v_umn1); + v_smn2 = v_reinterpret_as_s16(v_umn2); + v_smp1 = v_reinterpret_as_s16(v_ump1); + v_smp2 = v_reinterpret_as_s16(v_ump2); + v_snm1 = v_reinterpret_as_s16(v_unm1); + v_snm2 = v_reinterpret_as_s16(v_unm2); + v_snn1 = v_reinterpret_as_s16(v_unn1); + v_snn2 = v_reinterpret_as_s16(v_unn2); + v_snp1 = v_reinterpret_as_s16(v_unp1); + v_snp2 = v_reinterpret_as_s16(v_unp2); + v_spm1 = v_reinterpret_as_s16(v_upm1); + v_spm2 = v_reinterpret_as_s16(v_upm2); + v_spn1 = v_reinterpret_as_s16(v_upn1); + v_spn2 = v_reinterpret_as_s16(v_upn2); + v_spp1 = v_reinterpret_as_s16(v_upp1); + v_spp2 = v_reinterpret_as_s16(v_upp2); + + // dx + v_sdx1 = (v_smp1 - v_smm1) + v_two*(v_snp1 - v_snm1) + (v_spp1 - v_spm1); + v_sdx2 = (v_smp2 - v_smm2) + v_two*(v_snp2 - v_snm2) + (v_spp2 - v_spm2); + + // dy + v_sdy1 = (v_spm1 - v_smm1) + v_two*(v_spn1 - v_smn1) + (v_spp1 - v_smp1); + v_sdy2 = (v_spm2 - v_smm2) + v_two*(v_spn2 - v_smn2) + (v_spp2 - v_smp2); + + // Store + v_store(&p_dx[idx], v_sdx1); + v_store(&p_dx[idx+8], v_sdx2); + v_store(&p_dy[idx], v_sdy1); + v_store(&p_dy[idx+8], v_sdy2); + } + + // Cleanup + int end_j = j; + for ( i = 1; i < H - 1; i++ ) + for ( j = end_j; j < W - 1; j++ ) + { + idx = i*W + j; + p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + + (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); + p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + + (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); + } +#else for ( i = 1; i < H - 1; i++ ) for ( j = 1; j < W - 1; j++ ) { @@ -177,6 +271,7 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); } +#endif } From a2dbd2f10e70b11b48cf2b903a452d31730cd6fa Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 04:07:18 +0200 Subject: [PATCH 05/25] spatialGradient: Less vector loads --- modules/imgproc/src/spatialgradient.cpp | 77 ++++++++++++++++--------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 6e8c8400eb..e0db8cd985 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -189,20 +189,18 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi v_spm1, v_spm2, v_spn1, v_spn2, v_spp1, v_spp2, v_two = v_setall_s16(2), v_sdx1, v_sdx2, v_sdy1, v_sdy2; - for ( i = 1; i < H - 1; i++ ) + + // Go through 16-column chunks at a time for ( j = 1; j < W - 1 - 15; j += 16 ) { - // Load - idx = i*W + j; + // Load top two rows for 3x3 Sobel filter + idx = W + j; v_umm = v_load(&p_src[idx - W - 1]); v_umn = v_load(&p_src[idx - W]); v_ump = v_load(&p_src[idx - W + 1]); v_unm = v_load(&p_src[idx - 1]); v_unn = v_load(&p_src[idx]); v_unp = v_load(&p_src[idx + 1]); - v_upm = v_load(&p_src[idx + W - 1]); - v_upn = v_load(&p_src[idx + W]); - v_upp = v_load(&p_src[idx + W + 1]); // Expand to uint v_expand(v_umm, v_umm1, v_umm2); @@ -211,9 +209,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi v_expand(v_unm, v_unm1, v_unm2); v_expand(v_unn, v_unn1, v_unn2); v_expand(v_unp, v_unp1, v_unp2); - v_expand(v_upm, v_upm1, v_upm2); - v_expand(v_upn, v_upn1, v_upn2); - v_expand(v_upp, v_upp1, v_upp2); // Convert to int v_smm1 = v_reinterpret_as_s16(v_umm1); @@ -228,26 +223,56 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi v_snn2 = v_reinterpret_as_s16(v_unn2); v_snp1 = v_reinterpret_as_s16(v_unp1); v_snp2 = v_reinterpret_as_s16(v_unp2); - v_spm1 = v_reinterpret_as_s16(v_upm1); - v_spm2 = v_reinterpret_as_s16(v_upm2); - v_spn1 = v_reinterpret_as_s16(v_upn1); - v_spn2 = v_reinterpret_as_s16(v_upn2); - v_spp1 = v_reinterpret_as_s16(v_upp1); - v_spp2 = v_reinterpret_as_s16(v_upp2); - // dx - v_sdx1 = (v_smp1 - v_smm1) + v_two*(v_snp1 - v_snm1) + (v_spp1 - v_spm1); - v_sdx2 = (v_smp2 - v_smm2) + v_two*(v_snp2 - v_snm2) + (v_spp2 - v_spm2); + for ( i = 1; i < H - 1; i++ ) + { + // Load last row for 3x3 Sobel filter + idx = i*W + j; + v_upm = v_load(&p_src[idx + W - 1]); + v_upn = v_load(&p_src[idx + W]); + v_upp = v_load(&p_src[idx + W + 1]); - // dy - v_sdy1 = (v_spm1 - v_smm1) + v_two*(v_spn1 - v_smn1) + (v_spp1 - v_smp1); - v_sdy2 = (v_spm2 - v_smm2) + v_two*(v_spn2 - v_smn2) + (v_spp2 - v_smp2); + // Expand to uint + v_expand(v_upm, v_upm1, v_upm2); + v_expand(v_upn, v_upn1, v_upn2); + v_expand(v_upp, v_upp1, v_upp2); - // Store - v_store(&p_dx[idx], v_sdx1); - v_store(&p_dx[idx+8], v_sdx2); - v_store(&p_dy[idx], v_sdy1); - v_store(&p_dy[idx+8], v_sdy2); + // Convert to int + v_spm1 = v_reinterpret_as_s16(v_upm1); + v_spm2 = v_reinterpret_as_s16(v_upm2); + v_spn1 = v_reinterpret_as_s16(v_upn1); + v_spn2 = v_reinterpret_as_s16(v_upn2); + v_spp1 = v_reinterpret_as_s16(v_upp1); + v_spp2 = v_reinterpret_as_s16(v_upp2); + + // dx + v_sdx1 = (v_smp1 - v_smm1) + v_two*(v_snp1 - v_snm1) + (v_spp1 - v_spm1); + v_sdx2 = (v_smp2 - v_smm2) + v_two*(v_snp2 - v_snm2) + (v_spp2 - v_spm2); + + // dy + v_sdy1 = (v_spm1 - v_smm1) + v_two*(v_spn1 - v_smn1) + (v_spp1 - v_smp1); + v_sdy2 = (v_spm2 - v_smm2) + v_two*(v_spn2 - v_smn2) + (v_spp2 - v_smp2); + + // Store + v_store(&p_dx[idx], v_sdx1); + v_store(&p_dx[idx+8], v_sdx2); + v_store(&p_dy[idx], v_sdy1); + v_store(&p_dy[idx+8], v_sdy2); + + // Shift loaded rows up one + v_smm1 = v_snm1; + v_smm2 = v_snm2; + v_smn1 = v_snn1; + v_smn2 = v_snn2; + v_smp1 = v_snp1; + v_smp2 = v_snp2; + v_snm1 = v_spm1; + v_snm2 = v_spm2; + v_snn1 = v_spn1; + v_snn2 = v_spn2; + v_snp1 = v_spp1; + v_snp2 = v_spp2; + } } // Cleanup From f9c4c96663b029587220f0b8c324ea5ff7b6410e Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 04:29:07 +0200 Subject: [PATCH 06/25] spatialGradient: Reduce temporary vectors --- modules/imgproc/src/spatialgradient.cpp | 87 +++++++++++-------------- 1 file changed, 38 insertions(+), 49 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index e0db8cd985..7b35e249d9 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -178,12 +178,8 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi // n: offset 0 // p: offset 1 // Example: umn is offset -1 in row and offset 0 in column - v_uint8x16 v_umm, v_umn, v_ump, - v_unm, v_unn, v_unp, - v_upm, v_upn, v_upp; - v_uint16x8 v_umm1, v_umm2, v_umn1, v_umn2, v_ump1, v_ump2, - v_unm1, v_unm2, v_unn1, v_unn2, v_unp1, v_unp2, - v_upm1, v_upm2, v_upn1, v_upn2, v_upp1, v_upp2; + v_uint8x16 v_um, v_un, v_up; + v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; v_int16x8 v_smm1, v_smm2, v_smn1, v_smn2, v_smp1, v_smp2, v_snm1, v_snm2, v_snn1, v_snn2, v_snp1, v_snp2, v_spm1, v_spm2, v_spn1, v_spn2, v_spp1, v_spp2, @@ -195,55 +191,48 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi { // Load top two rows for 3x3 Sobel filter idx = W + j; - v_umm = v_load(&p_src[idx - W - 1]); - v_umn = v_load(&p_src[idx - W]); - v_ump = v_load(&p_src[idx - W + 1]); - v_unm = v_load(&p_src[idx - 1]); - v_unn = v_load(&p_src[idx]); - v_unp = v_load(&p_src[idx + 1]); + v_um = v_load(&p_src[idx - W - 1]); + v_un = v_load(&p_src[idx - W]); + v_up = v_load(&p_src[idx - W + 1]); + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_smm1 = v_reinterpret_as_s16(v_um1); + v_smm2 = v_reinterpret_as_s16(v_um2); + v_smn1 = v_reinterpret_as_s16(v_un1); + v_smn2 = v_reinterpret_as_s16(v_un2); + v_smp1 = v_reinterpret_as_s16(v_up1); + v_smp2 = v_reinterpret_as_s16(v_up2); - // Expand to uint - v_expand(v_umm, v_umm1, v_umm2); - v_expand(v_umn, v_umn1, v_umn2); - v_expand(v_ump, v_ump1, v_ump2); - v_expand(v_unm, v_unm1, v_unm2); - v_expand(v_unn, v_unn1, v_unn2); - v_expand(v_unp, v_unp1, v_unp2); - - // Convert to int - v_smm1 = v_reinterpret_as_s16(v_umm1); - v_smm2 = v_reinterpret_as_s16(v_umm2); - v_smn1 = v_reinterpret_as_s16(v_umn1); - v_smn2 = v_reinterpret_as_s16(v_umn2); - v_smp1 = v_reinterpret_as_s16(v_ump1); - v_smp2 = v_reinterpret_as_s16(v_ump2); - v_snm1 = v_reinterpret_as_s16(v_unm1); - v_snm2 = v_reinterpret_as_s16(v_unm2); - v_snn1 = v_reinterpret_as_s16(v_unn1); - v_snn2 = v_reinterpret_as_s16(v_unn2); - v_snp1 = v_reinterpret_as_s16(v_unp1); - v_snp2 = v_reinterpret_as_s16(v_unp2); + v_um = v_load(&p_src[idx - 1]); + v_un = v_load(&p_src[idx]); + v_up = v_load(&p_src[idx + 1]); + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_snm1 = v_reinterpret_as_s16(v_um1); + v_snm2 = v_reinterpret_as_s16(v_um2); + v_snn1 = v_reinterpret_as_s16(v_un1); + v_snn2 = v_reinterpret_as_s16(v_un2); + v_snp1 = v_reinterpret_as_s16(v_up1); + v_snp2 = v_reinterpret_as_s16(v_up2); for ( i = 1; i < H - 1; i++ ) { // Load last row for 3x3 Sobel filter idx = i*W + j; - v_upm = v_load(&p_src[idx + W - 1]); - v_upn = v_load(&p_src[idx + W]); - v_upp = v_load(&p_src[idx + W + 1]); - - // Expand to uint - v_expand(v_upm, v_upm1, v_upm2); - v_expand(v_upn, v_upn1, v_upn2); - v_expand(v_upp, v_upp1, v_upp2); - - // Convert to int - v_spm1 = v_reinterpret_as_s16(v_upm1); - v_spm2 = v_reinterpret_as_s16(v_upm2); - v_spn1 = v_reinterpret_as_s16(v_upn1); - v_spn2 = v_reinterpret_as_s16(v_upn2); - v_spp1 = v_reinterpret_as_s16(v_upp1); - v_spp2 = v_reinterpret_as_s16(v_upp2); + v_um = v_load(&p_src[idx + W - 1]); + v_un = v_load(&p_src[idx + W]); + v_up = v_load(&p_src[idx + W + 1]); + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_spm1 = v_reinterpret_as_s16(v_um1); + v_spm2 = v_reinterpret_as_s16(v_um2); + v_spn1 = v_reinterpret_as_s16(v_un1); + v_spn2 = v_reinterpret_as_s16(v_un2); + v_spp1 = v_reinterpret_as_s16(v_up1); + v_spp2 = v_reinterpret_as_s16(v_up2); // dx v_sdx1 = (v_smp1 - v_smm1) + v_two*(v_snp1 - v_snm1) + (v_spp1 - v_spm1); From b5c4355c13c35de242c23d284886fc9e17589b9e Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 04:29:28 +0200 Subject: [PATCH 07/25] spatialGradient: Add basic perf test --- modules/imgproc/perf/perf_spatialgradient.cpp | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 modules/imgproc/perf/perf_spatialgradient.cpp diff --git a/modules/imgproc/perf/perf_spatialgradient.cpp b/modules/imgproc/perf/perf_spatialgradient.cpp new file mode 100644 index 0000000000..31219a3d92 --- /dev/null +++ b/modules/imgproc/perf/perf_spatialgradient.cpp @@ -0,0 +1,34 @@ +#include "perf_precomp.hpp" + +using namespace std; +using namespace cv; +using namespace perf; +using namespace testing; +using std::tr1::make_tuple; +using std::tr1::get; + +typedef std::tr1::tuple Size_Ksize_t; +typedef perf::TestBaseWithParam Size_Ksize; + +PERF_TEST_P( Size_Ksize, spatialGradient, + Combine( + SZ_ALL_HD, + Values( 3 ) + ) +) +{ + Size size = std::tr1::get<0>(GetParam()); + int ksize = std::tr1::get<1>(GetParam()); + + Mat src(size, CV_8UC1); + Mat dx(size, CV_16SC1); + Mat dy(size, CV_16SC1); + + declare.in(src, WARMUP_RNG).out(dx, dy); + + TEST_CYCLE() spatialGradient(src, dx, dy, ksize); + + SANITY_CHECK(dx); + SANITY_CHECK(dy); +} + From 815cd8970d653cd1fcfd54e84af172d0701def14 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 04:46:17 +0200 Subject: [PATCH 08/25] spatialGradient: Remove unnecessary index calculation --- modules/imgproc/src/spatialgradient.cpp | 3 ++- modules/imgproc/test/test_filter.cpp | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 7b35e249d9..7afcda52b2 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -220,7 +220,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi for ( i = 1; i < H - 1; i++ ) { // Load last row for 3x3 Sobel filter - idx = i*W + j; v_um = v_load(&p_src[idx + W - 1]); v_un = v_load(&p_src[idx + W]); v_up = v_load(&p_src[idx + W + 1]); @@ -261,6 +260,8 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi v_snn2 = v_spn2; v_snp1 = v_spp1; v_snp2 = v_spp2; + + idx += W; } } diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index 968d01eda9..0c98ed35c5 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -602,7 +602,6 @@ void CV_SpatialGradientTest::run_func() test_mat[OUTPUT][1] = dy; } - void CV_SpatialGradientTest::prepare_to_validation( int /*test_case_idx*/ ) { int dx, dy; From e633c991b0ab03a5fbbcb6ab58089fd34be17fbc Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 04:57:01 +0200 Subject: [PATCH 09/25] spatialGradient: Doc, fix dangling newline error --- modules/imgproc/include/opencv2/imgproc.hpp | 17 ++++++++++++++--- modules/imgproc/perf/perf_spatialgradient.cpp | 1 - 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index c93975eeb8..947dfc3029 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1369,14 +1369,25 @@ CV_EXPORTS_W void Sobel( InputArray src, OutputArray dst, int ddepth, double scale = 1, double delta = 0, int borderType = BORDER_DEFAULT ); -/** @brief TODO +/** @brief Calculates the first order image derivative in both x and y using a Sobel operator -TODO +Equivalent to calling: +@code +Sobel( src, dx, CV_16SC1, 1, 0, 3 ); +Sobel( src, dy, CV_16SC1, 0, 1, 3 ); +@endcode + +@param src input image. +@param dx output image with first-order derivative in x. +@param dy output image with first-order derivative in y. +@param ksize size of Sobel kernel. It must be 3. + +@sa Sobel */ CV_EXPORTS_W void spatialGradient( InputArray src, OutputArray dx, - OutputArray dy, int ksize ); + OutputArray dy, int ksize = 3 ); /** @brief Calculates the first x- or y- image derivative using Scharr operator. diff --git a/modules/imgproc/perf/perf_spatialgradient.cpp b/modules/imgproc/perf/perf_spatialgradient.cpp index 31219a3d92..87456146de 100644 --- a/modules/imgproc/perf/perf_spatialgradient.cpp +++ b/modules/imgproc/perf/perf_spatialgradient.cpp @@ -31,4 +31,3 @@ PERF_TEST_P( Size_Ksize, spatialGradient, SANITY_CHECK(dx); SANITY_CHECK(dy); } - From 2ff614dfabc872261969b46e655aa15223ab66d8 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 19 Jun 2015 05:30:05 +0200 Subject: [PATCH 10/25] spatialGradient: Per row in outer loop --- modules/imgproc/src/spatialgradient.cpp | 99 ++++++++++--------------- 1 file changed, 41 insertions(+), 58 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 7afcda52b2..e87869a4ea 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -186,39 +186,40 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi v_two = v_setall_s16(2), v_sdx1, v_sdx2, v_sdy1, v_sdy2; - // Go through 16-column chunks at a time - for ( j = 1; j < W - 1 - 15; j += 16 ) + for ( i = 1; i < H - 1; i++ ) { - // Load top two rows for 3x3 Sobel filter - idx = W + j; - v_um = v_load(&p_src[idx - W - 1]); - v_un = v_load(&p_src[idx - W]); - v_up = v_load(&p_src[idx - W + 1]); - v_expand(v_um, v_um1, v_um2); - v_expand(v_un, v_un1, v_un2); - v_expand(v_up, v_up1, v_up2); - v_smm1 = v_reinterpret_as_s16(v_um1); - v_smm2 = v_reinterpret_as_s16(v_um2); - v_smn1 = v_reinterpret_as_s16(v_un1); - v_smn2 = v_reinterpret_as_s16(v_un2); - v_smp1 = v_reinterpret_as_s16(v_up1); - v_smp2 = v_reinterpret_as_s16(v_up2); - - v_um = v_load(&p_src[idx - 1]); - v_un = v_load(&p_src[idx]); - v_up = v_load(&p_src[idx + 1]); - v_expand(v_um, v_um1, v_um2); - v_expand(v_un, v_un1, v_un2); - v_expand(v_up, v_up1, v_up2); - v_snm1 = v_reinterpret_as_s16(v_um1); - v_snm2 = v_reinterpret_as_s16(v_um2); - v_snn1 = v_reinterpret_as_s16(v_un1); - v_snn2 = v_reinterpret_as_s16(v_un2); - v_snp1 = v_reinterpret_as_s16(v_up1); - v_snp2 = v_reinterpret_as_s16(v_up2); - - for ( i = 1; i < H - 1; i++ ) + // 16-column chunks at a time + for ( j = 1; j < W - 1 - 15; j += 16 ) { + // Load top row for 3x3 Sobel filter + idx = i*W + j; + v_um = v_load(&p_src[idx - W - 1]); + v_un = v_load(&p_src[idx - W]); + v_up = v_load(&p_src[idx - W + 1]); + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_smm1 = v_reinterpret_as_s16(v_um1); + v_smm2 = v_reinterpret_as_s16(v_um2); + v_smn1 = v_reinterpret_as_s16(v_un1); + v_smn2 = v_reinterpret_as_s16(v_un2); + v_smp1 = v_reinterpret_as_s16(v_up1); + v_smp2 = v_reinterpret_as_s16(v_up2); + + // Load second row for 3x3 Sobel filter + v_um = v_load(&p_src[idx - 1]); + v_un = v_load(&p_src[idx]); + v_up = v_load(&p_src[idx + 1]); + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_snm1 = v_reinterpret_as_s16(v_um1); + v_snm2 = v_reinterpret_as_s16(v_um2); + v_snn1 = v_reinterpret_as_s16(v_un1); + v_snn2 = v_reinterpret_as_s16(v_un2); + v_snp1 = v_reinterpret_as_s16(v_up1); + v_snp2 = v_reinterpret_as_s16(v_up2); + // Load last row for 3x3 Sobel filter v_um = v_load(&p_src[idx + W - 1]); v_un = v_load(&p_src[idx + W]); @@ -246,35 +247,17 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi v_store(&p_dx[idx+8], v_sdx2); v_store(&p_dy[idx], v_sdy1); v_store(&p_dy[idx+8], v_sdy2); - - // Shift loaded rows up one - v_smm1 = v_snm1; - v_smm2 = v_snm2; - v_smn1 = v_snn1; - v_smn2 = v_snn2; - v_smp1 = v_snp1; - v_smp2 = v_snp2; - v_snm1 = v_spm1; - v_snm2 = v_spm2; - v_snn1 = v_spn1; - v_snn2 = v_spn2; - v_snp1 = v_spp1; - v_snp2 = v_spp2; - - idx += W; } - } - // Cleanup - int end_j = j; - for ( i = 1; i < H - 1; i++ ) - for ( j = end_j; j < W - 1; j++ ) - { - idx = i*W + j; - p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + - (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); - p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + - (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); + // Cleanup + for ( ; j < W - 1; j++ ) + { + idx = i*W + j; + p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + + (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); + p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + + (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); + } } #else for ( i = 1; i < H - 1; i++ ) From 6803d1ed2808344b810c0ca4d4cd5e79f1a9db3d Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 26 Jun 2015 14:49:31 +0200 Subject: [PATCH 11/25] Support non continuous, BORDER_REPLICATE TODO: HAL-accelerated code --- modules/imgproc/include/opencv2/imgproc.hpp | 4 +- modules/imgproc/perf/perf_spatialgradient.cpp | 12 +- modules/imgproc/src/spatialgradient.cpp | 160 +++++++----------- modules/imgproc/test/test_filter.cpp | 16 +- 4 files changed, 82 insertions(+), 110 deletions(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 947dfc3029..baa81cf0ba 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1382,12 +1382,14 @@ Sobel( src, dy, CV_16SC1, 0, 1, 3 ); @param dx output image with first-order derivative in x. @param dy output image with first-order derivative in y. @param ksize size of Sobel kernel. It must be 3. +@param borderType pixel extrapolation method, see cv::BorderTypes @sa Sobel */ CV_EXPORTS_W void spatialGradient( InputArray src, OutputArray dx, - OutputArray dy, int ksize = 3 ); + OutputArray dy, int ksize = 3, + int borderType = BORDER_DEFAULT ); /** @brief Calculates the first x- or y- image derivative using Scharr operator. diff --git a/modules/imgproc/perf/perf_spatialgradient.cpp b/modules/imgproc/perf/perf_spatialgradient.cpp index 87456146de..84d41e1dc2 100644 --- a/modules/imgproc/perf/perf_spatialgradient.cpp +++ b/modules/imgproc/perf/perf_spatialgradient.cpp @@ -7,18 +7,20 @@ using namespace testing; using std::tr1::make_tuple; using std::tr1::get; -typedef std::tr1::tuple Size_Ksize_t; -typedef perf::TestBaseWithParam Size_Ksize; +typedef std::tr1::tuple Size_Ksize_BorderType_t; +typedef perf::TestBaseWithParam Size_Ksize_BorderType; -PERF_TEST_P( Size_Ksize, spatialGradient, +PERF_TEST_P( Size_Ksize_BorderType, spatialGradient, Combine( SZ_ALL_HD, - Values( 3 ) + Values( 3 ), + Values( BORDER_DEFAULT ) ) ) { Size size = std::tr1::get<0>(GetParam()); int ksize = std::tr1::get<1>(GetParam()); + int borderType = std::tr1::get<2>(GetParam()); Mat src(size, CV_8UC1); Mat dx(size, CV_16SC1); @@ -26,7 +28,7 @@ PERF_TEST_P( Size_Ksize, spatialGradient, declare.in(src, WARMUP_RNG).out(dx, dy); - TEST_CYCLE() spatialGradient(src, dx, dy, ksize); + TEST_CYCLE() spatialGradient(src, dx, dy, ksize, borderType); SANITY_CHECK(dx); SANITY_CHECK(dy); diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index e87869a4ea..ac209af17c 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -46,128 +46,73 @@ namespace cv { -void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksize ) +void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, + int ksize, int borderType ) { // Prepare InputArray src Mat src = _src.getMat(); CV_Assert( !src.empty() ); - CV_Assert( src.isContinuous() ); CV_Assert( src.type() == CV_8UC1 ); + CV_Assert( borderType == BORDER_DEFAULT || borderType == BORDER_REPLICATE ); // Prepare OutputArrays dx, dy _dx.create( src.size(), CV_16SC1 ); _dy.create( src.size(), CV_16SC1 ); Mat dx = _dx.getMat(), dy = _dy.getMat(); - CV_Assert( dx.isContinuous() ); - CV_Assert( dy.isContinuous() ); // TODO: Allow for other kernel sizes CV_Assert(ksize == 3); - // Reference - //Sobel( src, dx, CV_16SC1, 1, 0, ksize ); - //Sobel( src, dy, CV_16SC1, 0, 1, ksize ); - // Get dimensions - int H = src.rows, - W = src.cols, - N = H * W; - - // Get raw pointers to input/output data - uchar* p_src = src.ptr(0); - short* p_dx = dx.ptr(0); - short* p_dy = dy.ptr(0); + const int H = src.rows, + W = src.cols; // Row, column indices int i, j; - /* NOTE: - * - * Sobel-x: -1 0 1 - * -2 0 2 - * -1 0 1 - * - * Sobel-y: -1 -2 -1 - * 0 0 0 - * 1 2 1 - */ + // Store pointers to rows of input/output data + // Padded by two rows for border handling + uchar* P_src[H+2]; + short* P_dx [H+2]; + short* P_dy [H+2]; - // No-SSE - int idx; + int i_top = 0, // Case for H == 1 && W == 1 && BORDER_REPLICATE + i_bottom = H - 1, + j_offl = 0, // j offset from 0th pixel to reach -1st pixel + j_offr = 0; // j offset from W-1th pixel to reach Wth pixel - p_dx[0] = 0; // Top-left corner - p_dy[0] = 0; - p_dx[W-1] = 0; // Top-right corner - p_dy[W-1] = 0; - p_dx[N-1] = 0; // Bottom-right corner - p_dy[N-1] = 0; - p_dx[N-W] = 0; // Bottom-left corner - p_dy[N-W] = 0; - - // Handle special case: column matrix - if ( W == 1 ) + if ( borderType == BORDER_DEFAULT ) // Equiv. to BORDER_REFLECT_101 { - for ( i = 1; i < H - 1; i++ ) + if ( H > 1 ) { - p_dx[i] = 0; - p_dy[i] = 4*(p_src[i + 1] - p_src[i - 1]); // Should be 2?! 4 makes tests pass + i_top = 1; + i_bottom = H - 2; } - return; - } - - // Handle special case: row matrix - if ( H == 1 ) - { - for ( j = 1; j < W - 1; j++ ) + if ( W > 1 ) { - p_dx[j] = 4*(p_src[j + 1] - p_src[j - 1]); // Should be 2?! 4 makes tests pass - p_dy[j] = 0; + j_offl = 1; + j_offr = -1; } - return; } - // Do top row - for ( j = 1; j < W - 1; j++ ) + P_src[0] = src.ptr(i_top); // Mirrored top border + P_src[H+1] = src.ptr(i_bottom); // Mirrored bottom border + + for ( i = 0; i < H; i++ ) { - idx = j; - p_dx[idx] = -(p_src[idx+W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + - (p_src[idx+W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); - p_dy[idx] = 0; + P_src[i+1] = src.ptr(i); + P_dx [i] = dx.ptr(i); + P_dy [i] = dy.ptr(i); } - // Do right column - idx = 2*W - 1; - for ( i = 1; i < H - 1; i++ ) - { - p_dx[idx] = 0; - p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W-1]) + - (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W-1]); - idx += W; - } + // Pointer to row vectors + uchar *p_src, *c_src, *n_src; // previous, current, next row + short *c_dx, *c_dy; - // Do bottom row - idx = N - W + 1; - for ( j = 1; j < W - 1; j++ ) - { - p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx-W-1]) + - (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx-W+1]); - p_dy[idx] = 0; - idx++; - } - - // Do left column - idx = W; - for ( i = 1; i < H - 1; i++ ) - { - p_dx[idx] = 0; - p_dy[idx] = -(p_src[idx-W+1] + 2*p_src[idx-W] + p_src[idx-W+1]) + - (p_src[idx+W+1] + 2*p_src[idx+W] + p_src[idx+W+1]); - idx += W; - } - - // Do Inner area + int j_start = 0; +/* #if CV_SIMD128 // Characters in variable names have the following meanings: // u: unsigned char @@ -260,16 +205,39 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksi } } #else - for ( i = 1; i < H - 1; i++ ) - for ( j = 1; j < W - 1; j++ ) +*/ + + /* NOTE: + * + * Sobel-x: -1 0 1 + * -2 0 2 + * -1 0 1 + * + * Sobel-y: -1 -2 -1 + * 0 0 0 + * 1 2 1 + */ + int j_p, j_n; + for ( i = 0; i < H; i++ ) { - idx = i*W + j; - p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + - (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); - p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + - (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); + p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; + c_dx = P_dx [i]; + c_dy = P_dy [i]; + + for ( j = j_start; j < W; j++ ) + { + j_p = j - 1; + j_n = j + 1; + if ( j_p < 0 ) j_p = j + j_offl; + if ( j_n >= W ) j_n = j + j_offr; + + c_dx[j] = -(p_src[j_p] + c_src[j_p] + c_src[j_p] + n_src[j_p]) + + (p_src[j_n] + c_src[j_n] + c_src[j_n] + n_src[j_n]); + c_dy[j] = -(p_src[j_p] + p_src[j] + p_src[j] + p_src[j_n]) + + (n_src[j_p] + n_src[j] + n_src[j] + n_src[j_n]); + } } -#endif +//#endif } diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index 0c98ed35c5..5994b1b11b 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -587,19 +587,17 @@ void CV_SpatialGradientTest::get_test_array_types_and_sizes( int test_case_idx, // Outputs are only CV_16SC1 for now types[OUTPUT][0] = types[OUTPUT][1] = types[REF_OUTPUT][0] - = types[REF_OUTPUT][1] = CV_16SC1; + = types[REF_OUTPUT][1] = CV_16SC1; ksize = 3; + border = BORDER_DEFAULT; // TODO: Add BORDER_REPLICATE } void CV_SpatialGradientTest::run_func() { - Mat dx, dy; - spatialGradient( test_mat[INPUT][0].clone(), dx, dy, ksize ); - - test_mat[OUTPUT][0] = dx; - test_mat[OUTPUT][1] = dy; + spatialGradient( test_mat[INPUT][0], test_mat[OUTPUT][0], + test_mat[OUTPUT][1], ksize, border ); } void CV_SpatialGradientTest::prepare_to_validation( int /*test_case_idx*/ ) @@ -607,10 +605,12 @@ void CV_SpatialGradientTest::prepare_to_validation( int /*test_case_idx*/ ) int dx, dy; dx = 1; dy = 0; - Sobel( test_mat[INPUT][0], test_mat[REF_OUTPUT][0], CV_16SC1, dx, dy, ksize ); + Sobel( test_mat[INPUT][0], test_mat[REF_OUTPUT][0], CV_16SC1, dx, dy, ksize, + 1, 0, border ); dx = 0; dy = 1; - Sobel( test_mat[INPUT][0], test_mat[REF_OUTPUT][1], CV_16SC1, dx, dy, ksize ); + Sobel( test_mat[INPUT][0], test_mat[REF_OUTPUT][1], CV_16SC1, dx, dy, ksize, + 1, 0, border ); } From 7b01e32fe8aee1a28e249e377689cd662305614a Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 26 Jun 2015 16:41:00 +0200 Subject: [PATCH 12/25] spatialGradient: HAL-accelerated TODO: Make nosse ver faster than Sobel Make sse ver faster than Sobel for BORDER_REPLICATE --- modules/imgproc/perf/perf_spatialgradient.cpp | 2 +- modules/imgproc/src/spatialgradient.cpp | 189 ++++++++++++------ 2 files changed, 128 insertions(+), 63 deletions(-) diff --git a/modules/imgproc/perf/perf_spatialgradient.cpp b/modules/imgproc/perf/perf_spatialgradient.cpp index 84d41e1dc2..0f9479abd9 100644 --- a/modules/imgproc/perf/perf_spatialgradient.cpp +++ b/modules/imgproc/perf/perf_spatialgradient.cpp @@ -14,7 +14,7 @@ PERF_TEST_P( Size_Ksize_BorderType, spatialGradient, Combine( SZ_ALL_HD, Values( 3 ), - Values( BORDER_DEFAULT ) + Values( BORDER_DEFAULT, BORDER_REPLICATE ) ) ) { diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index ac209af17c..056adc5752 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -111,8 +111,8 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, uchar *p_src, *c_src, *n_src; // previous, current, next row short *c_dx, *c_dy; + int i_start = 0; int j_start = 0; -/* #if CV_SIMD128 // Characters in variable names have the following meanings: // u: unsigned char @@ -123,90 +123,156 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // n: offset 0 // p: offset 1 // Example: umn is offset -1 in row and offset 0 in column + uchar tmp; v_uint8x16 v_um, v_un, v_up; v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; - v_int16x8 v_smm1, v_smm2, v_smn1, v_smn2, v_smp1, v_smp2, - v_snm1, v_snm2, v_snn1, v_snn2, v_snp1, v_snp2, - v_spm1, v_spm2, v_spn1, v_spn2, v_spp1, v_spp2, - v_two = v_setall_s16(2), - v_sdx1, v_sdx2, v_sdy1, v_sdy2; + v_int16x8 v_s1m1, v_s1m2, v_s1n1, v_s1n2, v_s1p1, v_s1p2, + v_s2m1, v_s2m2, v_s2n1, v_s2n2, v_s2p1, v_s2p2, + v_s3m1, v_s3m2, v_s3n1, v_s3n2, v_s3p1, v_s3p2, + v_s4m1, v_s4m2, v_s4n1, v_s4n2, v_s4p1, v_s4p2, + v_tmp, v_sdx1, v_sdx2, v_sdy1, v_sdy2; - for ( i = 1; i < H - 1; i++ ) + uchar *m_src; + short *c_dx1, *c_dx2, *c_dy1, *c_dy2; + for ( i = 0; i < H - 2; i += 2 ) { + p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; + c_dx1 = P_dx [i]; + c_dy1 = P_dy [i]; + c_dx2 = P_dx [i+1]; + c_dy2 = P_dy [i+1]; + // 16-column chunks at a time - for ( j = 1; j < W - 1 - 15; j += 16 ) + for ( j = 0; j < W - 15; j += 16 ) { + bool left = false, right = false; + if ( j == 0 ) left = true; + if ( j == W - 16 ) right = true; + // Load top row for 3x3 Sobel filter - idx = i*W + j; - v_um = v_load(&p_src[idx - W - 1]); - v_un = v_load(&p_src[idx - W]); - v_up = v_load(&p_src[idx - W + 1]); + if ( left ) { tmp = p_src[j-1]; p_src[j-1] = p_src[j+j_offl]; } + v_um = v_load(&p_src[j-1]); + if ( left ) p_src[j-1] = tmp; + + v_un = v_load(&p_src[j]); + + if ( right ) { tmp = p_src[j+16]; p_src[j+16] = p_src[j+15+j_offr]; } + v_up = v_load(&p_src[j+1]); + if ( right ) p_src[j+16] = tmp; + v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); - v_smm1 = v_reinterpret_as_s16(v_um1); - v_smm2 = v_reinterpret_as_s16(v_um2); - v_smn1 = v_reinterpret_as_s16(v_un1); - v_smn2 = v_reinterpret_as_s16(v_un2); - v_smp1 = v_reinterpret_as_s16(v_up1); - v_smp2 = v_reinterpret_as_s16(v_up2); + v_s1m1 = v_reinterpret_as_s16(v_um1); + v_s1m2 = v_reinterpret_as_s16(v_um2); + v_s1n1 = v_reinterpret_as_s16(v_un1); + v_s1n2 = v_reinterpret_as_s16(v_un2); + v_s1p1 = v_reinterpret_as_s16(v_up1); + v_s1p2 = v_reinterpret_as_s16(v_up2); // Load second row for 3x3 Sobel filter - v_um = v_load(&p_src[idx - 1]); - v_un = v_load(&p_src[idx]); - v_up = v_load(&p_src[idx + 1]); - v_expand(v_um, v_um1, v_um2); - v_expand(v_un, v_un1, v_un2); - v_expand(v_up, v_up1, v_up2); - v_snm1 = v_reinterpret_as_s16(v_um1); - v_snm2 = v_reinterpret_as_s16(v_um2); - v_snn1 = v_reinterpret_as_s16(v_un1); - v_snn2 = v_reinterpret_as_s16(v_un2); - v_snp1 = v_reinterpret_as_s16(v_up1); - v_snp2 = v_reinterpret_as_s16(v_up2); + if ( left ) { tmp = c_src[j-1]; c_src[j-1] = c_src[j+j_offl]; } + v_um = v_load(&c_src[j-1]); + if ( left ) c_src[j-1] = tmp; + + v_un = v_load(&c_src[j]); + + if ( right ) { tmp = c_src[j+16]; c_src[j+16] = c_src[j+15+j_offr]; } + v_up = v_load(&c_src[j+1]); + if ( right ) c_src[j+16] = tmp; - // Load last row for 3x3 Sobel filter - v_um = v_load(&p_src[idx + W - 1]); - v_un = v_load(&p_src[idx + W]); - v_up = v_load(&p_src[idx + W + 1]); v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); - v_spm1 = v_reinterpret_as_s16(v_um1); - v_spm2 = v_reinterpret_as_s16(v_um2); - v_spn1 = v_reinterpret_as_s16(v_un1); - v_spn2 = v_reinterpret_as_s16(v_un2); - v_spp1 = v_reinterpret_as_s16(v_up1); - v_spp2 = v_reinterpret_as_s16(v_up2); + v_s2m1 = v_reinterpret_as_s16(v_um1); + v_s2m2 = v_reinterpret_as_s16(v_um2); + v_s2n1 = v_reinterpret_as_s16(v_un1); + v_s2n2 = v_reinterpret_as_s16(v_un2); + v_s2p1 = v_reinterpret_as_s16(v_up1); + v_s2p2 = v_reinterpret_as_s16(v_up2); + + // Load third row for 3x3 Sobel filter + if ( left ) { tmp = n_src[j-1]; n_src[j-1] = n_src[j+j_offl]; } + v_um = v_load(&n_src[j-1]); + if ( left ) n_src[j-1] = tmp; + + v_un = v_load(&n_src[j]); + + if ( right ) { tmp = n_src[j+16]; n_src[j+16] = n_src[j+15+j_offr]; } + v_up = v_load(&n_src[j+1]); + if ( right ) n_src[j+16] = tmp; + + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_s3m1 = v_reinterpret_as_s16(v_um1); + v_s3m2 = v_reinterpret_as_s16(v_um2); + v_s3n1 = v_reinterpret_as_s16(v_un1); + v_s3n2 = v_reinterpret_as_s16(v_un2); + v_s3p1 = v_reinterpret_as_s16(v_up1); + v_s3p2 = v_reinterpret_as_s16(v_up2); + + // Load fourth row for 3x3 Sobel filter + if ( left ) { tmp = m_src[j-1]; m_src[j-1] = m_src[j+j_offl]; } + v_um = v_load(&m_src[j-1]); + if ( left ) m_src[j-1] = tmp; + + v_un = v_load(&m_src[j]); + + if ( right ) { tmp = m_src[j+16]; m_src[j+16] = m_src[j+15+j_offr]; } + v_up = v_load(&m_src[j+1]); + if ( right ) m_src[j+16] = tmp; + + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_s4m1 = v_reinterpret_as_s16(v_um1); + v_s4m2 = v_reinterpret_as_s16(v_um2); + v_s4n1 = v_reinterpret_as_s16(v_un1); + v_s4n2 = v_reinterpret_as_s16(v_un2); + v_s4p1 = v_reinterpret_as_s16(v_up1); + v_s4p2 = v_reinterpret_as_s16(v_up2); // dx - v_sdx1 = (v_smp1 - v_smm1) + v_two*(v_snp1 - v_snm1) + (v_spp1 - v_spm1); - v_sdx2 = (v_smp2 - v_smm2) + v_two*(v_snp2 - v_snm2) + (v_spp2 - v_spm2); + v_tmp = v_s2p1 - v_s2m1; + v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); + v_tmp = v_s2p2 - v_s2m2; + v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); // dy - v_sdy1 = (v_spm1 - v_smm1) + v_two*(v_spn1 - v_smn1) + (v_spp1 - v_smp1); - v_sdy2 = (v_spm2 - v_smm2) + v_two*(v_spn2 - v_smn2) + (v_spp2 - v_smp2); + v_tmp = v_s3n1 - v_s1n1; + v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); + v_tmp = v_s3n2 - v_s1n2; + v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); // Store - v_store(&p_dx[idx], v_sdx1); - v_store(&p_dx[idx+8], v_sdx2); - v_store(&p_dy[idx], v_sdy1); - v_store(&p_dy[idx+8], v_sdy2); - } + v_store(&c_dx1[j], v_sdx1); + v_store(&c_dx1[j+8], v_sdx2); + v_store(&c_dy1[j], v_sdy1); + v_store(&c_dy1[j+8], v_sdy2); - // Cleanup - for ( ; j < W - 1; j++ ) - { - idx = i*W + j; - p_dx[idx] = -(p_src[idx-W-1] + 2*p_src[idx-1] + p_src[idx+W-1]) + - (p_src[idx-W+1] + 2*p_src[idx+1] + p_src[idx+W+1]); - p_dy[idx] = -(p_src[idx-W-1] + 2*p_src[idx-W] + p_src[idx-W+1]) + - (p_src[idx+W-1] + 2*p_src[idx+W] + p_src[idx+W+1]); + // dx + v_tmp = v_s3p1 - v_s3m1; + v_sdx1 = (v_s2p1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s4m1); + v_tmp = v_s3p2 - v_s3m2; + v_sdx2 = (v_s2p2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s4m2); + + // dy + v_tmp = v_s4n1 - v_s2n1; + v_sdy1 = (v_s4m1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s2p1); + v_tmp = v_s4n2 - v_s2n2; + v_sdy2 = (v_s4m2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s2p2); + + // Store + v_store(&c_dx2[j], v_sdx1); + v_store(&c_dx2[j+8], v_sdx2); + v_store(&c_dy2[j], v_sdy1); + v_store(&c_dy2[j+8], v_sdy2); } } -#else -*/ - + i_start = i; + j_start = j; +#endif /* NOTE: * * Sobel-x: -1 0 1 @@ -224,7 +290,7 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, c_dx = P_dx [i]; c_dy = P_dy [i]; - for ( j = j_start; j < W; j++ ) + for ( j = i >= i_start ? 0 : j_start; j < W; j++ ) { j_p = j - 1; j_n = j + 1; @@ -237,7 +303,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, (n_src[j_p] + n_src[j] + n_src[j] + n_src[j_n]); } } -//#endif } From f92e2ed57a0d08d697784c7b0efb4b82f8d44a27 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 26 Jun 2015 17:13:23 +0200 Subject: [PATCH 13/25] spatialGradient: Make nosse version faster --- modules/imgproc/src/spatialgradient.cpp | 26 ++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 056adc5752..644f1a3043 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -284,23 +284,39 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, * 1 2 1 */ int j_p, j_n; + uchar v00, v01, v02, v10, v11, v12, v20, v21, v22; for ( i = 0; i < H; i++ ) { p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; c_dx = P_dx [i]; c_dy = P_dy [i]; - for ( j = i >= i_start ? 0 : j_start; j < W; j++ ) + // Pre-load 2 columns + j = i >= i_start ? 0 : j_start; + j_p = j - 1; + if ( j_p < 0 ) j_p = j + j_offl; + v00 = p_src[j_p]; v01 = p_src[j]; + v10 = c_src[j_p]; v11 = c_src[j]; + v20 = n_src[j_p]; v21 = n_src[j]; + + for ( ; j < W; j++ ) { j_p = j - 1; j_n = j + 1; if ( j_p < 0 ) j_p = j + j_offl; if ( j_n >= W ) j_n = j + j_offr; - c_dx[j] = -(p_src[j_p] + c_src[j_p] + c_src[j_p] + n_src[j_p]) + - (p_src[j_n] + c_src[j_n] + c_src[j_n] + n_src[j_n]); - c_dy[j] = -(p_src[j_p] + p_src[j] + p_src[j] + p_src[j_n]) + - (n_src[j_p] + n_src[j] + n_src[j] + n_src[j_n]); + // Get values for next column + v02 = p_src[j_n]; + v12 = c_src[j_n]; + v22 = n_src[j_n]; + + c_dx[j] = -(v00 + v10 + v10 + v20) + (v02 + v12 + v12 + v22); + c_dy[j] = -(v00 + v01 + v01 + v02) + (v20 + v21 + v21 + v22); + + // Move values back one column for next iteration + v00 = v01; v10 = v11; v20 = v21; + v01 = v02; v11 = v12; v21 = v22; } } From febd0f14c692fc381ee60cab070d589a60b0bfdb Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 26 Jun 2015 17:15:44 +0200 Subject: [PATCH 14/25] spatialGradient: Don't dynamically alloc C array, use vector --- modules/imgproc/src/spatialgradient.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 644f1a3043..9a33cac33d 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -74,9 +74,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // Store pointers to rows of input/output data // Padded by two rows for border handling - uchar* P_src[H+2]; - short* P_dx [H+2]; - short* P_dy [H+2]; + std::vector P_src(H+2); + std::vector P_dx (H+2); + std::vector P_dy (H+2); int i_top = 0, // Case for H == 1 && W == 1 && BORDER_REPLICATE i_bottom = H - 1, From 8a21726ae5943421733b5b0bc6caa52f7f9bc7cc Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 26 Jun 2015 17:26:21 +0200 Subject: [PATCH 15/25] spatialGradient: Remove an unnecessary branch in nosse code --- modules/imgproc/src/spatialgradient.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 9a33cac33d..24e8571b00 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -301,9 +301,7 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, for ( ; j < W; j++ ) { - j_p = j - 1; j_n = j + 1; - if ( j_p < 0 ) j_p = j + j_offl; if ( j_n >= W ) j_n = j + j_offr; // Get values for next column From 62cad09c64389581131ebe83d714b0c7be55c4ce Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Fri, 26 Jun 2015 17:35:17 +0200 Subject: [PATCH 16/25] spatialGradient: Process 1 row at a time in SSE --- modules/imgproc/src/spatialgradient.cpp | 68 +++++-------------------- 1 file changed, 12 insertions(+), 56 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 24e8571b00..f33c28d819 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -129,18 +129,13 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s1m1, v_s1m2, v_s1n1, v_s1n2, v_s1p1, v_s1p2, v_s2m1, v_s2m2, v_s2n1, v_s2n2, v_s2p1, v_s2p2, v_s3m1, v_s3m2, v_s3n1, v_s3n2, v_s3p1, v_s3p2, - v_s4m1, v_s4m2, v_s4n1, v_s4n2, v_s4p1, v_s4p2, - v_tmp, v_sdx1, v_sdx2, v_sdy1, v_sdy2; + v_tmp, v_sdx, v_sdy; - uchar *m_src; - short *c_dx1, *c_dx2, *c_dy1, *c_dy2; for ( i = 0; i < H - 2; i += 2 ) { - p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; - c_dx1 = P_dx [i]; - c_dy1 = P_dy [i]; - c_dx2 = P_dx [i+1]; - c_dy2 = P_dy [i+1]; + p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; + c_dx = P_dx [i]; + c_dy = P_dy [i]; // 16-column chunks at a time for ( j = 0; j < W - 15; j += 16 ) @@ -212,62 +207,23 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_s3p1 = v_reinterpret_as_s16(v_up1); v_s3p2 = v_reinterpret_as_s16(v_up2); - // Load fourth row for 3x3 Sobel filter - if ( left ) { tmp = m_src[j-1]; m_src[j-1] = m_src[j+j_offl]; } - v_um = v_load(&m_src[j-1]); - if ( left ) m_src[j-1] = tmp; - - v_un = v_load(&m_src[j]); - - if ( right ) { tmp = m_src[j+16]; m_src[j+16] = m_src[j+15+j_offr]; } - v_up = v_load(&m_src[j+1]); - if ( right ) m_src[j+16] = tmp; - - v_expand(v_um, v_um1, v_um2); - v_expand(v_un, v_un1, v_un2); - v_expand(v_up, v_up1, v_up2); - v_s4m1 = v_reinterpret_as_s16(v_um1); - v_s4m2 = v_reinterpret_as_s16(v_um2); - v_s4n1 = v_reinterpret_as_s16(v_un1); - v_s4n2 = v_reinterpret_as_s16(v_un2); - v_s4p1 = v_reinterpret_as_s16(v_up1); - v_s4p2 = v_reinterpret_as_s16(v_up2); - // dx v_tmp = v_s2p1 - v_s2m1; - v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); + v_sdx = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); v_tmp = v_s2p2 - v_s2m2; - v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); + v_sdx = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); // dy v_tmp = v_s3n1 - v_s1n1; - v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); + v_sdy = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); v_tmp = v_s3n2 - v_s1n2; - v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); + v_sdy = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); // Store - v_store(&c_dx1[j], v_sdx1); - v_store(&c_dx1[j+8], v_sdx2); - v_store(&c_dy1[j], v_sdy1); - v_store(&c_dy1[j+8], v_sdy2); - - // dx - v_tmp = v_s3p1 - v_s3m1; - v_sdx1 = (v_s2p1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s4m1); - v_tmp = v_s3p2 - v_s3m2; - v_sdx2 = (v_s2p2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s4m2); - - // dy - v_tmp = v_s4n1 - v_s2n1; - v_sdy1 = (v_s4m1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s2p1); - v_tmp = v_s4n2 - v_s2n2; - v_sdy2 = (v_s4m2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s2p2); - - // Store - v_store(&c_dx2[j], v_sdx1); - v_store(&c_dx2[j+8], v_sdx2); - v_store(&c_dy2[j], v_sdy1); - v_store(&c_dy2[j+8], v_sdy2); + v_store(&c_dx[j], v_sdx); + v_store(&c_dx[j+8], v_sdx); + v_store(&c_dy[j], v_sdy); + v_store(&c_dy[j+8], v_sdy); } } i_start = i; From f958f29c55723b8f3e160ba9ddb802b22d08d2ac Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Sat, 27 Jun 2015 01:03:43 +0200 Subject: [PATCH 17/25] spatialGradient: Suppress uninitialised j warnings --- modules/imgproc/src/spatialgradient.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index f33c28d819..5119fa08ff 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -70,7 +70,8 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, W = src.cols; // Row, column indices - int i, j; + int i = 0, + j = 0; // Store pointers to rows of input/output data // Padded by two rows for border handling From db0cc56c52e09747d6f97c490a47be04bd090d98 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Sat, 27 Jun 2015 09:53:42 +0200 Subject: [PATCH 18/25] spatialGradient: Re-introduce 2-rows at a time --- modules/imgproc/src/spatialgradient.cpp | 68 ++++++++++++++++++++----- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 5119fa08ff..986178246c 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -130,13 +130,16 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s1m1, v_s1m2, v_s1n1, v_s1n2, v_s1p1, v_s1p2, v_s2m1, v_s2m2, v_s2n1, v_s2n2, v_s2p1, v_s2p2, v_s3m1, v_s3m2, v_s3n1, v_s3n2, v_s3p1, v_s3p2, - v_tmp, v_sdx, v_sdy; + v_s4m1, v_s4m2, v_s4n1, v_s4n2, v_s4p1, v_s4p2, + v_tmp, v_sdx1, v_sdx2, v_sdy1, v_sdy2; - for ( i = 0; i < H - 2; i += 2 ) + uchar *m_src; + short *n_dx, *n_dy; + + for ( i = 0; i < H - 1; i += 2 ) { - p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; - c_dx = P_dx [i]; - c_dy = P_dy [i]; + p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; + c_dx = P_dx[i]; c_dy = P_dy[i]; n_dx = P_dx[i+1]; n_dy = P_dy[i+1]; // 16-column chunks at a time for ( j = 0; j < W - 15; j += 16 ) @@ -208,23 +211,62 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_s3p1 = v_reinterpret_as_s16(v_up1); v_s3p2 = v_reinterpret_as_s16(v_up2); + // Load fourth row for 3x3 Sobel filter + if ( left ) { tmp = m_src[j-1]; m_src[j-1] = m_src[j+j_offl]; } + v_um = v_load(&m_src[j-1]); + if ( left ) m_src[j-1] = tmp; + + v_un = v_load(&m_src[j]); + + if ( right ) { tmp = m_src[j+16]; m_src[j+16] = m_src[j+15+j_offr]; } + v_up = v_load(&m_src[j+1]); + if ( right ) m_src[j+16] = tmp; + + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_s4m1 = v_reinterpret_as_s16(v_um1); + v_s4m2 = v_reinterpret_as_s16(v_um2); + v_s4n1 = v_reinterpret_as_s16(v_un1); + v_s4n2 = v_reinterpret_as_s16(v_un2); + v_s4p1 = v_reinterpret_as_s16(v_up1); + v_s4p2 = v_reinterpret_as_s16(v_up2); + // dx v_tmp = v_s2p1 - v_s2m1; - v_sdx = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); + v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); v_tmp = v_s2p2 - v_s2m2; - v_sdx = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); + v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); // dy v_tmp = v_s3n1 - v_s1n1; - v_sdy = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); + v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); v_tmp = v_s3n2 - v_s1n2; - v_sdy = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); + v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); // Store - v_store(&c_dx[j], v_sdx); - v_store(&c_dx[j+8], v_sdx); - v_store(&c_dy[j], v_sdy); - v_store(&c_dy[j+8], v_sdy); + v_store(&c_dx[j], v_sdx1); + v_store(&c_dx[j+8], v_sdx2); + v_store(&c_dy[j], v_sdy1); + v_store(&c_dy[j+8], v_sdy2); + + // dx + v_tmp = v_s3p1 - v_s3m1; + v_sdx1 = (v_s2p1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s4m1); + v_tmp = v_s3p2 - v_s3m2; + v_sdx2 = (v_s2p2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s4m2); + + // dy + v_tmp = v_s4n1 - v_s2n1; + v_sdy1 = (v_s4m1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s2p1); + v_tmp = v_s4n2 - v_s2n2; + v_sdy2 = (v_s4m2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s2p2); + + // Store + v_store(&n_dx[j], v_sdx1); + v_store(&n_dx[j+8], v_sdx2); + v_store(&n_dy[j], v_sdy1); + v_store(&n_dy[j+8], v_sdy2); } } i_start = i; From 15ea4010877418c2648406ec63f93d560c708926 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Sat, 27 Jun 2015 10:01:47 +0200 Subject: [PATCH 19/25] spatialGradient: Move vector decl into loop --- modules/imgproc/src/spatialgradient.cpp | 81 +++++++++++-------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 986178246c..ad8f6dd617 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -115,6 +115,10 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int i_start = 0; int j_start = 0; #if CV_SIMD128 + uchar tmp; + uchar *m_src; + short *n_dx, *n_dy; + // Characters in variable names have the following meanings: // u: unsigned char // s: signed int @@ -124,18 +128,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // n: offset 0 // p: offset 1 // Example: umn is offset -1 in row and offset 0 in column - uchar tmp; - v_uint8x16 v_um, v_un, v_up; - v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; - v_int16x8 v_s1m1, v_s1m2, v_s1n1, v_s1n2, v_s1p1, v_s1p2, - v_s2m1, v_s2m2, v_s2n1, v_s2n2, v_s2p1, v_s2p2, - v_s3m1, v_s3m2, v_s3n1, v_s3n2, v_s3p1, v_s3p2, - v_s4m1, v_s4m2, v_s4n1, v_s4n2, v_s4p1, v_s4p2, - v_tmp, v_sdx1, v_sdx2, v_sdy1, v_sdy2; - - uchar *m_src; - short *n_dx, *n_dy; - for ( i = 0; i < H - 1; i += 2 ) { p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; @@ -150,24 +142,25 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // Load top row for 3x3 Sobel filter if ( left ) { tmp = p_src[j-1]; p_src[j-1] = p_src[j+j_offl]; } - v_um = v_load(&p_src[j-1]); + v_uint8x16 v_um = v_load(&p_src[j-1]); if ( left ) p_src[j-1] = tmp; - v_un = v_load(&p_src[j]); + v_uint8x16 v_un = v_load(&p_src[j]); if ( right ) { tmp = p_src[j+16]; p_src[j+16] = p_src[j+15+j_offr]; } - v_up = v_load(&p_src[j+1]); + v_uint8x16 v_up = v_load(&p_src[j+1]); if ( right ) p_src[j+16] = tmp; + v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); - v_s1m1 = v_reinterpret_as_s16(v_um1); - v_s1m2 = v_reinterpret_as_s16(v_um2); - v_s1n1 = v_reinterpret_as_s16(v_un1); - v_s1n2 = v_reinterpret_as_s16(v_un2); - v_s1p1 = v_reinterpret_as_s16(v_up1); - v_s1p2 = v_reinterpret_as_s16(v_up2); + v_int16x8 v_s1m1 = v_reinterpret_as_s16(v_um1); + v_int16x8 v_s1m2 = v_reinterpret_as_s16(v_um2); + v_int16x8 v_s1n1 = v_reinterpret_as_s16(v_un1); + v_int16x8 v_s1n2 = v_reinterpret_as_s16(v_un2); + v_int16x8 v_s1p1 = v_reinterpret_as_s16(v_up1); + v_int16x8 v_s1p2 = v_reinterpret_as_s16(v_up2); // Load second row for 3x3 Sobel filter if ( left ) { tmp = c_src[j-1]; c_src[j-1] = c_src[j+j_offl]; } @@ -183,12 +176,12 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); - v_s2m1 = v_reinterpret_as_s16(v_um1); - v_s2m2 = v_reinterpret_as_s16(v_um2); - v_s2n1 = v_reinterpret_as_s16(v_un1); - v_s2n2 = v_reinterpret_as_s16(v_un2); - v_s2p1 = v_reinterpret_as_s16(v_up1); - v_s2p2 = v_reinterpret_as_s16(v_up2); + v_int16x8 v_s2m1 = v_reinterpret_as_s16(v_um1); + v_int16x8 v_s2m2 = v_reinterpret_as_s16(v_um2); + v_int16x8 v_s2n1 = v_reinterpret_as_s16(v_un1); + v_int16x8 v_s2n2 = v_reinterpret_as_s16(v_un2); + v_int16x8 v_s2p1 = v_reinterpret_as_s16(v_up1); + v_int16x8 v_s2p2 = v_reinterpret_as_s16(v_up2); // Load third row for 3x3 Sobel filter if ( left ) { tmp = n_src[j-1]; n_src[j-1] = n_src[j+j_offl]; } @@ -204,12 +197,12 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); - v_s3m1 = v_reinterpret_as_s16(v_um1); - v_s3m2 = v_reinterpret_as_s16(v_um2); - v_s3n1 = v_reinterpret_as_s16(v_un1); - v_s3n2 = v_reinterpret_as_s16(v_un2); - v_s3p1 = v_reinterpret_as_s16(v_up1); - v_s3p2 = v_reinterpret_as_s16(v_up2); + v_int16x8 v_s3m1 = v_reinterpret_as_s16(v_um1); + v_int16x8 v_s3m2 = v_reinterpret_as_s16(v_um2); + v_int16x8 v_s3n1 = v_reinterpret_as_s16(v_un1); + v_int16x8 v_s3n2 = v_reinterpret_as_s16(v_un2); + v_int16x8 v_s3p1 = v_reinterpret_as_s16(v_up1); + v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2); // Load fourth row for 3x3 Sobel filter if ( left ) { tmp = m_src[j-1]; m_src[j-1] = m_src[j+j_offl]; } @@ -225,24 +218,24 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); - v_s4m1 = v_reinterpret_as_s16(v_um1); - v_s4m2 = v_reinterpret_as_s16(v_um2); - v_s4n1 = v_reinterpret_as_s16(v_un1); - v_s4n2 = v_reinterpret_as_s16(v_un2); - v_s4p1 = v_reinterpret_as_s16(v_up1); - v_s4p2 = v_reinterpret_as_s16(v_up2); + v_int16x8 v_s4m1 = v_reinterpret_as_s16(v_um1); + v_int16x8 v_s4m2 = v_reinterpret_as_s16(v_um2); + v_int16x8 v_s4n1 = v_reinterpret_as_s16(v_un1); + v_int16x8 v_s4n2 = v_reinterpret_as_s16(v_un2); + v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1); + v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2); // dx - v_tmp = v_s2p1 - v_s2m1; - v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); + v_int16x8 v_tmp = v_s2p1 - v_s2m1; + v_int16x8 v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); v_tmp = v_s2p2 - v_s2m2; - v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); + v_int16x8 v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); // dy v_tmp = v_s3n1 - v_s1n1; - v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); + v_int16x8 v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); v_tmp = v_s3n2 - v_s1n2; - v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); + v_int16x8 v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); // Store v_store(&c_dx[j], v_sdx1); From 658f96b447005ee592e3fd6a62e46843e3df50c4 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Mon, 29 Jun 2015 23:12:33 +0900 Subject: [PATCH 20/25] spatialGradient: L/R border handling outside. Kernelize. --- modules/imgproc/src/spatialgradient.cpp | 155 ++++++++++++------------ 1 file changed, 78 insertions(+), 77 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index ad8f6dd617..d53b4fdfc4 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -43,9 +43,38 @@ #include "precomp.hpp" #include "opencv2/hal/intrin.hpp" +#include namespace cv { +/* NOTE: + * + * Sobel-x: -1 0 1 + * -2 0 2 + * -1 0 1 + * + * Sobel-y: -1 -2 -1 + * 0 0 0 + * 1 2 1 + */ +template +static inline void spatialGradientKernel( T& vx, T& vy, + T v00, T v01, T v02, + T v10, T v12, + T v20, T v21, T v22 ) +{ + // vx = (v22 - v00) + (v02 - v20) + 2 * (v12 - v10) + // vy = (v22 - v00) + (v20 - v02) + 2 * (v21 - v01) + + T tmp_add = v22 - v00, + tmp_sub = v02 - v20, + tmp_x = v12 - v10, + tmp_y = v21 - v01; + + vx = tmp_add + tmp_sub + tmp_x + tmp_x; + vy = tmp_add - tmp_sub + tmp_y + tmp_y; +} + void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int ksize, int borderType ) { @@ -115,7 +144,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int i_start = 0; int j_start = 0; #if CV_SIMD128 - uchar tmp; uchar *m_src; short *n_dx, *n_dy; @@ -133,24 +161,13 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; c_dx = P_dx[i]; c_dy = P_dy[i]; n_dx = P_dx[i+1]; n_dy = P_dy[i+1]; - // 16-column chunks at a time - for ( j = 0; j < W - 15; j += 16 ) + // Process rest of columns 16-column chunks at a time + for ( j = 1; j < W - 16; j += 16 ) { - bool left = false, right = false; - if ( j == 0 ) left = true; - if ( j == W - 16 ) right = true; - // Load top row for 3x3 Sobel filter - if ( left ) { tmp = p_src[j-1]; p_src[j-1] = p_src[j+j_offl]; } v_uint8x16 v_um = v_load(&p_src[j-1]); - if ( left ) p_src[j-1] = tmp; - v_uint8x16 v_un = v_load(&p_src[j]); - - if ( right ) { tmp = p_src[j+16]; p_src[j+16] = p_src[j+15+j_offr]; } v_uint8x16 v_up = v_load(&p_src[j+1]); - if ( right ) p_src[j+16] = tmp; - v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); @@ -163,16 +180,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s1p2 = v_reinterpret_as_s16(v_up2); // Load second row for 3x3 Sobel filter - if ( left ) { tmp = c_src[j-1]; c_src[j-1] = c_src[j+j_offl]; } v_um = v_load(&c_src[j-1]); - if ( left ) c_src[j-1] = tmp; - v_un = v_load(&c_src[j]); - - if ( right ) { tmp = c_src[j+16]; c_src[j+16] = c_src[j+15+j_offr]; } v_up = v_load(&c_src[j+1]); - if ( right ) c_src[j+16] = tmp; - v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); @@ -184,16 +194,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s2p2 = v_reinterpret_as_s16(v_up2); // Load third row for 3x3 Sobel filter - if ( left ) { tmp = n_src[j-1]; n_src[j-1] = n_src[j+j_offl]; } v_um = v_load(&n_src[j-1]); - if ( left ) n_src[j-1] = tmp; - v_un = v_load(&n_src[j]); - - if ( right ) { tmp = n_src[j+16]; n_src[j+16] = n_src[j+15+j_offr]; } v_up = v_load(&n_src[j+1]); - if ( right ) n_src[j+16] = tmp; - v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); @@ -205,15 +208,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2); // Load fourth row for 3x3 Sobel filter - if ( left ) { tmp = m_src[j-1]; m_src[j-1] = m_src[j+j_offl]; } v_um = v_load(&m_src[j-1]); - if ( left ) m_src[j-1] = tmp; - v_un = v_load(&m_src[j]); - - if ( right ) { tmp = m_src[j+16]; m_src[j+16] = m_src[j+15+j_offr]; } v_up = v_load(&m_src[j+1]); - if ( right ) m_src[j+16] = tmp; v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); @@ -225,17 +222,18 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1); v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2); - // dx - v_int16x8 v_tmp = v_s2p1 - v_s2m1; - v_int16x8 v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); - v_tmp = v_s2p2 - v_s2m2; - v_int16x8 v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); + // dx & dy for rows 1, 2, 3 + v_int16x8 v_sdx1, v_sdy1; + spatialGradientKernel( v_sdx1, v_sdy1, + v_s1m1, v_s1n1, v_s1p1, + v_s2m1, v_s2p1, + v_s3m1, v_s3n1, v_s3p1 ); - // dy - v_tmp = v_s3n1 - v_s1n1; - v_int16x8 v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); - v_tmp = v_s3n2 - v_s1n2; - v_int16x8 v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); + v_int16x8 v_sdx2, v_sdy2; + spatialGradientKernel( v_sdx2, v_sdy2, + v_s1m2, v_s1n2, v_s1p2, + v_s2m2, v_s2p2, + v_s3m2, v_s3n2, v_s3p2 ); // Store v_store(&c_dx[j], v_sdx1); @@ -243,17 +241,16 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_store(&c_dy[j], v_sdy1); v_store(&c_dy[j+8], v_sdy2); - // dx - v_tmp = v_s3p1 - v_s3m1; - v_sdx1 = (v_s2p1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s4m1); - v_tmp = v_s3p2 - v_s3m2; - v_sdx2 = (v_s2p2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s4m2); + // dx & dy for rows 2, 3, 4 + spatialGradientKernel( v_sdx1, v_sdy1, + v_s2m1, v_s2n1, v_s2p1, + v_s3m1, v_s3p1, + v_s4m1, v_s4n1, v_s4p1 ); - // dy - v_tmp = v_s4n1 - v_s2n1; - v_sdy1 = (v_s4m1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s2p1); - v_tmp = v_s4n2 - v_s2n2; - v_sdy2 = (v_s4m2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s2p2); + spatialGradientKernel( v_sdx2, v_sdy2, + v_s2m2, v_s2n2, v_s2p2, + v_s3m2, v_s3p2, + v_s4m2, v_s4n2, v_s4p2 ); // Store v_store(&n_dx[j], v_sdx1); @@ -265,16 +262,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, i_start = i; j_start = j; #endif - /* NOTE: - * - * Sobel-x: -1 0 1 - * -2 0 2 - * -1 0 1 - * - * Sobel-y: -1 -2 -1 - * 0 0 0 - * 1 2 1 - */ int j_p, j_n; uchar v00, v01, v02, v10, v11, v12, v20, v21, v22; for ( i = 0; i < H; i++ ) @@ -283,31 +270,45 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, c_dx = P_dx [i]; c_dy = P_dy [i]; - // Pre-load 2 columns - j = i >= i_start ? 0 : j_start; + // Process left-most column + j = 0; + j_p = j + j_offl; + j_n = 1; + if ( j_n >= W ) j_n = j + j_offr; + v00 = p_src[j_p]; v01 = p_src[j]; v02 = p_src[j_n]; + v10 = c_src[j_p]; v11 = c_src[j]; v12 = c_src[j_n]; + v20 = n_src[j_p]; v21 = n_src[j]; v22 = n_src[j_n]; + spatialGradientKernel( c_dx[0], c_dy[0], v00, v01, v02, v10, + v12, v20, v21, v22 ); + v00 = v01; v10 = v11; v20 = v21; + v01 = v02; v11 = v12; v21 = v22; + + // Process middle columns + j = i >= i_start ? 1 : j_start; j_p = j - 1; - if ( j_p < 0 ) j_p = j + j_offl; v00 = p_src[j_p]; v01 = p_src[j]; v10 = c_src[j_p]; v11 = c_src[j]; v20 = n_src[j_p]; v21 = n_src[j]; - for ( ; j < W; j++ ) + for ( ; j < W - 1; j++ ) { - j_n = j + 1; - if ( j_n >= W ) j_n = j + j_offr; - // Get values for next column - v02 = p_src[j_n]; - v12 = c_src[j_n]; - v22 = n_src[j_n]; - - c_dx[j] = -(v00 + v10 + v10 + v20) + (v02 + v12 + v12 + v22); - c_dy[j] = -(v00 + v01 + v01 + v02) + (v20 + v21 + v21 + v22); + j_n = j + 1; v02 = p_src[j_n]; v12 = c_src[j_n]; v22 = n_src[j_n]; + spatialGradientKernel( c_dx[j], c_dy[j], v00, v01, v02, v10, + v12, v20, v21, v22 ); // Move values back one column for next iteration v00 = v01; v10 = v11; v20 = v21; v01 = v02; v11 = v12; v21 = v22; } + + // Process right-most column + if ( j < W ) + { + j_n = j + j_offr; v02 = p_src[j_n]; v12 = c_src[j_n]; v22 = n_src[j_n]; + spatialGradientKernel( c_dx[j], c_dy[j], v00, v01, v02, v10, + v12, v20, v21, v22 ); + } } } From cf0fdfa2bb9e42145832136be4f55edb3403fd98 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Mon, 29 Jun 2015 23:50:05 +0900 Subject: [PATCH 21/25] spatialGradient: Change ordering of vector loads --- modules/imgproc/src/spatialgradient.cpp | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index d53b4fdfc4..178e20cec0 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -207,21 +207,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_int16x8 v_s3p1 = v_reinterpret_as_s16(v_up1); v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2); - // Load fourth row for 3x3 Sobel filter - v_um = v_load(&m_src[j-1]); - v_un = v_load(&m_src[j]); - v_up = v_load(&m_src[j+1]); - - v_expand(v_um, v_um1, v_um2); - v_expand(v_un, v_un1, v_un2); - v_expand(v_up, v_up1, v_up2); - v_int16x8 v_s4m1 = v_reinterpret_as_s16(v_um1); - v_int16x8 v_s4m2 = v_reinterpret_as_s16(v_um2); - v_int16x8 v_s4n1 = v_reinterpret_as_s16(v_un1); - v_int16x8 v_s4n2 = v_reinterpret_as_s16(v_un2); - v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1); - v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2); - // dx & dy for rows 1, 2, 3 v_int16x8 v_sdx1, v_sdy1; spatialGradientKernel( v_sdx1, v_sdy1, @@ -241,6 +226,21 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, v_store(&c_dy[j], v_sdy1); v_store(&c_dy[j+8], v_sdy2); + // Load fourth row for 3x3 Sobel filter + v_um = v_load(&m_src[j-1]); + v_un = v_load(&m_src[j]); + v_up = v_load(&m_src[j+1]); + + v_expand(v_um, v_um1, v_um2); + v_expand(v_un, v_un1, v_un2); + v_expand(v_up, v_up1, v_up2); + v_int16x8 v_s4m1 = v_reinterpret_as_s16(v_um1); + v_int16x8 v_s4m2 = v_reinterpret_as_s16(v_um2); + v_int16x8 v_s4n1 = v_reinterpret_as_s16(v_un1); + v_int16x8 v_s4n2 = v_reinterpret_as_s16(v_un2); + v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1); + v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2); + // dx & dy for rows 2, 3, 4 spatialGradientKernel( v_sdx1, v_sdy1, v_s2m1, v_s2n1, v_s2p1, From 5dddb478635999c7b33cc53426562d1ff1ee6d4e Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Tue, 30 Jun 2015 10:51:10 +0900 Subject: [PATCH 22/25] spatialGradient: Remove pointers caching --- modules/imgproc/src/spatialgradient.cpp | 45 +++++++++++++------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index 178e20cec0..c00ec1d67e 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -102,12 +102,7 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int i = 0, j = 0; - // Store pointers to rows of input/output data - // Padded by two rows for border handling - std::vector P_src(H+2); - std::vector P_dx (H+2); - std::vector P_dy (H+2); - + // Handle border types int i_top = 0, // Case for H == 1 && W == 1 && BORDER_REPLICATE i_bottom = H - 1, j_offl = 0, // j offset from 0th pixel to reach -1st pixel @@ -127,16 +122,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, } } - P_src[0] = src.ptr(i_top); // Mirrored top border - P_src[H+1] = src.ptr(i_bottom); // Mirrored bottom border - - for ( i = 0; i < H; i++ ) - { - P_src[i+1] = src.ptr(i); - P_dx [i] = dx.ptr(i); - P_dy [i] = dy.ptr(i); - } - // Pointer to row vectors uchar *p_src, *c_src, *n_src; // previous, current, next row short *c_dx, *c_dy; @@ -158,8 +143,19 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // Example: umn is offset -1 in row and offset 0 in column for ( i = 0; i < H - 1; i += 2 ) { - p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; - c_dx = P_dx[i]; c_dy = P_dy[i]; n_dx = P_dx[i+1]; n_dy = P_dy[i+1]; + if ( i == 0 ) p_src = src.ptr(i_top); + else p_src = src.ptr(i-1); + + c_src = src.ptr(i); + n_src = src.ptr(i+1); + + if ( i == H - 2 ) m_src = src.ptr(i_bottom); + else m_src = src.ptr(i+2); + + c_dx = dx.ptr(i); + c_dy = dy.ptr(i); + n_dx = dx.ptr(i+1); + n_dy = dy.ptr(i+1); // Process rest of columns 16-column chunks at a time for ( j = 1; j < W - 16; j += 16 ) @@ -266,9 +262,16 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, uchar v00, v01, v02, v10, v11, v12, v20, v21, v22; for ( i = 0; i < H; i++ ) { - p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; - c_dx = P_dx [i]; - c_dy = P_dy [i]; + if ( i == 0 ) p_src = src.ptr(i_top); + else p_src = src.ptr(i-1); + + c_src = src.ptr(i); + + if ( i == H - 1 ) n_src = src.ptr(i_bottom); + else n_src = src.ptr(i+1); + + c_dx = dx.ptr(i); + c_dy = dy.ptr(i); // Process left-most column j = 0; From ed38ca51794af4732357654eeefd1bfa6d768c8c Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Tue, 30 Jun 2015 16:08:15 +0900 Subject: [PATCH 23/25] spatialGradient: Remove 4 loads in inner loop --- modules/imgproc/src/spatialgradient.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index c00ec1d67e..fee4ab270b 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -157,13 +157,18 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, n_dx = dx.ptr(i+1); n_dy = dy.ptr(i+1); + v_uint8x16 v_select_m = v_uint8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0xFF); + // Process rest of columns 16-column chunks at a time for ( j = 1; j < W - 16; j += 16 ) { // Load top row for 3x3 Sobel filter v_uint8x16 v_um = v_load(&p_src[j-1]); - v_uint8x16 v_un = v_load(&p_src[j]); v_uint8x16 v_up = v_load(&p_src[j+1]); + // TODO: Replace _mm_slli_si128 with hal method + v_uint8x16 v_un = v_select(v_select_m, v_uint8x16(_mm_slli_si128(v_up.val, 1)), + v_uint8x16(_mm_srli_si128(v_um.val, 1))); v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); @@ -177,8 +182,10 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // Load second row for 3x3 Sobel filter v_um = v_load(&c_src[j-1]); - v_un = v_load(&c_src[j]); v_up = v_load(&c_src[j+1]); + // TODO: Replace _mm_slli_si128 with hal method + v_un = v_select(v_select_m, v_uint8x16(_mm_slli_si128(v_up.val, 1)), + v_uint8x16(_mm_srli_si128(v_um.val, 1))); v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); @@ -191,8 +198,10 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // Load third row for 3x3 Sobel filter v_um = v_load(&n_src[j-1]); - v_un = v_load(&n_src[j]); v_up = v_load(&n_src[j+1]); + // TODO: Replace _mm_slli_si128 with hal method + v_un = v_select(v_select_m, v_uint8x16(_mm_slli_si128(v_up.val, 1)), + v_uint8x16(_mm_srli_si128(v_um.val, 1))); v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); @@ -224,9 +233,10 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, // Load fourth row for 3x3 Sobel filter v_um = v_load(&m_src[j-1]); - v_un = v_load(&m_src[j]); v_up = v_load(&m_src[j+1]); - + // TODO: Replace _mm_slli_si128 with hal method + v_un = v_select(v_select_m, v_uint8x16(_mm_slli_si128(v_up.val, 1)), + v_uint8x16(_mm_srli_si128(v_um.val, 1))); v_expand(v_um, v_um1, v_um2); v_expand(v_un, v_un1, v_un2); v_expand(v_up, v_up1, v_up2); From 90c398ea673f8252a74c843e897013f368ed37d9 Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Wed, 1 Jul 2015 00:42:08 +0900 Subject: [PATCH 24/25] spatialGradient: Add CV_SSE2 check --- modules/imgproc/src/spatialgradient.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index fee4ab270b..df88a7b1bf 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -128,7 +128,7 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, int i_start = 0; int j_start = 0; -#if CV_SIMD128 +#if CV_SIMD128 && CV_SSE2 uchar *m_src; short *n_dx, *n_dy; From 20bf88bad1eb8348b08482c0dafba97d9635364a Mon Sep 17 00:00:00 2001 From: Seon-Wook Park Date: Wed, 1 Jul 2015 09:34:27 +0900 Subject: [PATCH 25/25] spatialGradient: Make kern args const& --- modules/imgproc/src/spatialgradient.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/src/spatialgradient.cpp b/modules/imgproc/src/spatialgradient.cpp index df88a7b1bf..b4dc032acb 100644 --- a/modules/imgproc/src/spatialgradient.cpp +++ b/modules/imgproc/src/spatialgradient.cpp @@ -59,9 +59,9 @@ namespace cv */ template static inline void spatialGradientKernel( T& vx, T& vy, - T v00, T v01, T v02, - T v10, T v12, - T v20, T v21, T v22 ) + const T& v00, const T& v01, const T& v02, + const T& v10, const T& v12, + const T& v20, const T& v21, const T& v22 ) { // vx = (v22 - v00) + (v02 - v20) + 2 * (v12 - v10) // vy = (v22 - v00) + (v20 - v02) + 2 * (v21 - v01)