diff --git a/modules/calib3d/CMakeLists.txt b/modules/calib3d/CMakeLists.txt index e9d1ccb7cf..9af6570b09 100644 --- a/modules/calib3d/CMakeLists.txt +++ b/modules/calib3d/CMakeLists.txt @@ -1,4 +1,7 @@ set(the_description "Camera Calibration and 3D Reconstruction") + +ocv_add_dispatched_file(undistort SSE2 AVX2) + set(debug_modules "") if(DEBUG_opencv_calib3d) list(APPEND debug_modules opencv_highgui) diff --git a/modules/calib3d/perf/perf_undistort.cpp b/modules/calib3d/perf/perf_undistort.cpp new file mode 100644 index 0000000000..6381a15c92 --- /dev/null +++ b/modules/calib3d/perf/perf_undistort.cpp @@ -0,0 +1,19 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html +#include "perf_precomp.hpp" + +namespace opencv_test { + +PERF_TEST(Undistort, InitUndistortMap) +{ + Size size_w_h(512 + 3, 512); + Mat k(3, 3, CV_32FC1); + Mat d(1, 14, CV_64FC1); + Mat dst(size_w_h, CV_32FC2); + declare.in(k, d, WARMUP_RNG).out(dst); + TEST_CYCLE() initUndistortRectifyMap(k, d, noArray(), k, size_w_h, CV_32FC2, dst, noArray()); + SANITY_CHECK_NOTHING(); +} + +} diff --git a/modules/calib3d/src/undistort.avx2.cpp b/modules/calib3d/src/undistort.avx2.cpp deleted file mode 100644 index 9b6608a783..0000000000 --- a/modules/calib3d/src/undistort.avx2.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "undistort.hpp" - -namespace cv -{ - -int initUndistortRectifyMapLine_AVX(float* m1f, float* m2f, short* m1, ushort* m2, double* matTilt, const double* ir, - double& _x, double& _y, double& _w, int width, int m1type, - double k1, double k2, double k3, double k4, double k5, double k6, - double p1, double p2, double s1, double s2, double s3, double s4, - double u0, double v0, double fx, double fy) -{ - int j = 0; - - static const __m256d __one = _mm256_set1_pd(1.0); - static const __m256d __two = _mm256_set1_pd(2.0); - - const __m256d __matTilt_00 = _mm256_set1_pd(matTilt[0]); - const __m256d __matTilt_10 = _mm256_set1_pd(matTilt[3]); - const __m256d __matTilt_20 = _mm256_set1_pd(matTilt[6]); - - const __m256d __matTilt_01 = _mm256_set1_pd(matTilt[1]); - const __m256d __matTilt_11 = _mm256_set1_pd(matTilt[4]); - const __m256d __matTilt_21 = _mm256_set1_pd(matTilt[7]); - - const __m256d __matTilt_02 = _mm256_set1_pd(matTilt[2]); - const __m256d __matTilt_12 = _mm256_set1_pd(matTilt[5]); - const __m256d __matTilt_22 = _mm256_set1_pd(matTilt[8]); - - for (; j <= width - 4; j += 4, _x += 4 * ir[0], _y += 4 * ir[3], _w += 4 * ir[6]) - { - // Question: Should we load the constants first? - __m256d __w = _mm256_div_pd(__one, _mm256_set_pd(_w + 3 * ir[6], _w + 2 * ir[6], _w + ir[6], _w)); - __m256d __x = _mm256_mul_pd(_mm256_set_pd(_x + 3 * ir[0], _x + 2 * ir[0], _x + ir[0], _x), __w); - __m256d __y = _mm256_mul_pd(_mm256_set_pd(_y + 3 * ir[3], _y + 2 * ir[3], _y + ir[3], _y), __w); - __m256d __x2 = _mm256_mul_pd(__x, __x); - __m256d __y2 = _mm256_mul_pd(__y, __y); - __m256d __r2 = _mm256_add_pd(__x2, __y2); - __m256d __2xy = _mm256_mul_pd(__two, _mm256_mul_pd(__x, __y)); - __m256d __kr = _mm256_div_pd( -#if CV_FMA3 - _mm256_fmadd_pd(_mm256_fmadd_pd(_mm256_fmadd_pd(_mm256_set1_pd(k3), __r2, _mm256_set1_pd(k2)), __r2, _mm256_set1_pd(k1)), __r2, __one), - _mm256_fmadd_pd(_mm256_fmadd_pd(_mm256_fmadd_pd(_mm256_set1_pd(k6), __r2, _mm256_set1_pd(k5)), __r2, _mm256_set1_pd(k4)), __r2, __one) -#else - _mm256_add_pd(__one, _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set1_pd(k3), __r2), _mm256_set1_pd(k2)), __r2), _mm256_set1_pd(k1)), __r2)), - _mm256_add_pd(__one, _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set1_pd(k6), __r2), _mm256_set1_pd(k5)), __r2), _mm256_set1_pd(k4)), __r2)) -#endif - ); - __m256d __r22 = _mm256_mul_pd(__r2, __r2); -#if CV_FMA3 - __m256d __xd = _mm256_fmadd_pd(__x, __kr, - _mm256_add_pd( - _mm256_fmadd_pd(_mm256_set1_pd(p1), __2xy, _mm256_mul_pd(_mm256_set1_pd(p2), _mm256_fmadd_pd(__two, __x2, __r2))), - _mm256_fmadd_pd(_mm256_set1_pd(s1), __r2, _mm256_mul_pd(_mm256_set1_pd(s2), __r22)))); - __m256d __yd = _mm256_fmadd_pd(__y, __kr, - _mm256_add_pd( - _mm256_fmadd_pd(_mm256_set1_pd(p1), _mm256_fmadd_pd(__two, __y2, __r2), _mm256_mul_pd(_mm256_set1_pd(p2), __2xy)), - _mm256_fmadd_pd(_mm256_set1_pd(s3), __r2, _mm256_mul_pd(_mm256_set1_pd(s4), __r22)))); - - __m256d __vecTilt2 = _mm256_fmadd_pd(__matTilt_20, __xd, _mm256_fmadd_pd(__matTilt_21, __yd, __matTilt_22)); -#else - __m256d __xd = _mm256_add_pd( - _mm256_mul_pd(__x, __kr), - _mm256_add_pd( - _mm256_add_pd( - _mm256_mul_pd(_mm256_set1_pd(p1), __2xy), - _mm256_mul_pd(_mm256_set1_pd(p2), _mm256_add_pd(__r2, _mm256_mul_pd(__two, __x2)))), - _mm256_add_pd( - _mm256_mul_pd(_mm256_set1_pd(s1), __r2), - _mm256_mul_pd(_mm256_set1_pd(s2), __r22)))); - __m256d __yd = _mm256_add_pd( - _mm256_mul_pd(__y, __kr), - _mm256_add_pd( - _mm256_add_pd( - _mm256_mul_pd(_mm256_set1_pd(p1), _mm256_add_pd(__r2, _mm256_mul_pd(__two, __y2))), - _mm256_mul_pd(_mm256_set1_pd(p2), __2xy)), - _mm256_add_pd( - _mm256_mul_pd(_mm256_set1_pd(s3), __r2), - _mm256_mul_pd(_mm256_set1_pd(s4), __r22)))); - - __m256d __vecTilt2 = _mm256_add_pd(_mm256_add_pd( - _mm256_mul_pd(__matTilt_20, __xd), _mm256_mul_pd(__matTilt_21, __yd)), __matTilt_22); -#endif - __m256d __invProj = _mm256_blendv_pd( - _mm256_div_pd(__one, __vecTilt2), __one, - _mm256_cmp_pd(__vecTilt2, _mm256_setzero_pd(), _CMP_EQ_OQ)); - -#if CV_FMA3 - __m256d __u = _mm256_fmadd_pd(__matTilt_00, __xd, _mm256_fmadd_pd(__matTilt_01, __yd, __matTilt_02)); - __u = _mm256_fmadd_pd(_mm256_mul_pd(_mm256_set1_pd(fx), __invProj), __u, _mm256_set1_pd(u0)); - - __m256d __v = _mm256_fmadd_pd(__matTilt_10, __xd, _mm256_fmadd_pd(__matTilt_11, __yd, __matTilt_12)); - __v = _mm256_fmadd_pd(_mm256_mul_pd(_mm256_set1_pd(fy), __invProj), __v, _mm256_set1_pd(v0)); -#else - __m256d __u = _mm256_add_pd(_mm256_add_pd( - _mm256_mul_pd(__matTilt_00, __xd), _mm256_mul_pd(__matTilt_01, __yd)), __matTilt_02); - __u = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_set1_pd(fx), __invProj), __u), _mm256_set1_pd(u0)); - - __m256d __v = _mm256_add_pd(_mm256_add_pd( - _mm256_mul_pd(__matTilt_10, __xd), _mm256_mul_pd(__matTilt_11, __yd)), __matTilt_12); - __v = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_set1_pd(fy), __invProj), __v), _mm256_set1_pd(v0)); -#endif - - if (m1type == CV_32FC1) - { - _mm_storeu_ps(&m1f[j], _mm256_cvtpd_ps(__u)); - _mm_storeu_ps(&m2f[j], _mm256_cvtpd_ps(__v)); - } - else if (m1type == CV_32FC2) - { - __m128 __u_float = _mm256_cvtpd_ps(__u); - __m128 __v_float = _mm256_cvtpd_ps(__v); - - _mm_storeu_ps(&m1f[j * 2], _mm_unpacklo_ps(__u_float, __v_float)); - _mm_storeu_ps(&m1f[j * 2 + 4], _mm_unpackhi_ps(__u_float, __v_float)); - } - else // m1type == CV_16SC2 - { - __u = _mm256_mul_pd(__u, _mm256_set1_pd(INTER_TAB_SIZE)); - __v = _mm256_mul_pd(__v, _mm256_set1_pd(INTER_TAB_SIZE)); - - __m128i __iu = _mm256_cvtpd_epi32(__u); - __m128i __iv = _mm256_cvtpd_epi32(__v); - - static const __m128i __INTER_TAB_SIZE_m1 = _mm_set1_epi32(INTER_TAB_SIZE - 1); - __m128i __m2 = _mm_add_epi32( - _mm_mullo_epi32(_mm_and_si128(__iv, __INTER_TAB_SIZE_m1), _mm_set1_epi32(INTER_TAB_SIZE)), - _mm_and_si128(__iu, __INTER_TAB_SIZE_m1)); - __m2 = _mm_packus_epi32(__m2, __m2); - _mm_maskstore_epi64((long long int*) &m2[j], _mm_set_epi32(0, 0, 0xFFFFFFFF, 0xFFFFFFFF), __m2); - - // gcc4.9 does not support _mm256_set_m128 - // __m256i __m1 = _mm256_set_m128i(__iv, __iu); - __m256i __m1 = _mm256_setzero_si256(); - __m1 = _mm256_inserti128_si256(__m1, __iu, 0); - __m1 = _mm256_inserti128_si256(__m1, __iv, 1); - __m1 = _mm256_srai_epi32(__m1, INTER_BITS); // v3 v2 v1 v0 u3 u2 u1 u0 (int32_t) - static const __m256i __permute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); - __m1 = _mm256_permutevar8x32_epi32(__m1, __permute_mask); // v3 u3 v2 u2 v1 u1 v0 u0 (int32_t) - __m1 = _mm256_packs_epi32(__m1, __m1); // x x x x v3 u3 v2 u2 x x x x v1 u1 v0 u0 (int16_t) - _mm_storeu_si128((__m128i*) &m1[j * 2], _mm256_extracti128_si256(_mm256_permute4x64_epi64(__m1, (2 << 2) + 0), 0)); - } - } - - _mm256_zeroupper(); - - return j; -} - -} - -/* End of file */ diff --git a/modules/calib3d/src/undistort.cpp b/modules/calib3d/src/undistort.dispatch.cpp similarity index 83% rename from modules/calib3d/src/undistort.cpp rename to modules/calib3d/src/undistort.dispatch.cpp index 913786d07c..2dd52037a9 100644 --- a/modules/calib3d/src/undistort.cpp +++ b/modules/calib3d/src/undistort.dispatch.cpp @@ -42,11 +42,16 @@ #include "precomp.hpp" #include "distortion_model.hpp" -#include "undistort.hpp" #include "calib3d_c_api.h" -cv::Mat cv::getDefaultNewCameraMatrix( InputArray _cameraMatrix, Size imgsize, +#include "undistort.simd.hpp" +#include "undistort.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv +{ + +Mat getDefaultNewCameraMatrix( InputArray _cameraMatrix, Size imgsize, bool centerPrincipalPoint ) { Mat cameraMatrix = _cameraMatrix.getMat(); @@ -63,134 +68,22 @@ cv::Mat cv::getDefaultNewCameraMatrix( InputArray _cameraMatrix, Size imgsize, return newCameraMatrix; } -class initUndistortRectifyMapComputer : public cv::ParallelLoopBody +namespace { +Ptr getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, + const double* _ir, Matx33d &_matTilt, + double _u0, double _v0, double _fx, double _fy, + double _k1, double _k2, double _p1, double _p2, + double _k3, double _k4, double _k5, double _k6, + double _s1, double _s2, double _s3, double _s4) { -public: - initUndistortRectifyMapComputer( - cv::Size _size, cv::Mat &_map1, cv::Mat &_map2, int _m1type, - const double* _ir, cv::Matx33d &_matTilt, - double _u0, double _v0, double _fx, double _fy, - double _k1, double _k2, double _p1, double _p2, - double _k3, double _k4, double _k5, double _k6, - double _s1, double _s2, double _s3, double _s4) - : size(_size), - map1(_map1), - map2(_map2), - m1type(_m1type), - ir(_ir), - matTilt(_matTilt), - u0(_u0), - v0(_v0), - fx(_fx), - fy(_fy), - k1(_k1), - k2(_k2), - p1(_p1), - p2(_p2), - k3(_k3), - k4(_k4), - k5(_k5), - k6(_k6), - s1(_s1), - s2(_s2), - s3(_s3), - s4(_s4) { -#if CV_TRY_AVX2 - useAVX2 = cv::checkHardwareSupport(CV_CPU_AVX2); -#endif - } + CV_INSTRUMENT_REGION(); - void operator()( const cv::Range& range ) const CV_OVERRIDE - { - const int begin = range.start; - const int end = range.end; + CV_CPU_DISPATCH(getInitUndistortRectifyMapComputer, (_size, _map1, _map2, _m1type, _ir, _matTilt, _u0, _v0, _fx, _fy, _k1, _k2, _p1, _p2, _k3, _k4, _k5, _k6, _s1, _s2, _s3, _s4), + CV_CPU_DISPATCH_MODES_ALL); +} +} - for( int i = begin; i < end; i++ ) - { - float* m1f = map1.ptr(i); - float* m2f = map2.empty() ? 0 : map2.ptr(i); - short* m1 = (short*)m1f; - ushort* m2 = (ushort*)m2f; - double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8]; - - int j = 0; - - if (m1type == CV_16SC2) - CV_Assert(m1 != NULL && m2 != NULL); - else if (m1type == CV_32FC1) - CV_Assert(m1f != NULL && m2f != NULL); - else - CV_Assert(m1 != NULL); - - #if CV_TRY_AVX2 - if( useAVX2 ) - j = cv::initUndistortRectifyMapLine_AVX(m1f, m2f, m1, m2, - matTilt.val, ir, _x, _y, _w, size.width, m1type, - k1, k2, k3, k4, k5, k6, p1, p2, s1, s2, s3, s4, u0, v0, fx, fy); - #endif - for( ; j < size.width; j++, _x += ir[0], _y += ir[3], _w += ir[6] ) - { - double w = 1./_w, x = _x*w, y = _y*w; - double x2 = x*x, y2 = y*y; - double r2 = x2 + y2, _2xy = 2*x*y; - double kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2); - double xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2); - double yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2); - cv::Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1); - double invProj = vecTilt(2) ? 1./vecTilt(2) : 1; - double u = fx*invProj*vecTilt(0) + u0; - double v = fy*invProj*vecTilt(1) + v0; - if( m1type == CV_16SC2 ) - { - int iu = cv::saturate_cast(u*cv::INTER_TAB_SIZE); - int iv = cv::saturate_cast(v*cv::INTER_TAB_SIZE); - m1[j*2] = (short)(iu >> cv::INTER_BITS); - m1[j*2+1] = (short)(iv >> cv::INTER_BITS); - m2[j] = (ushort)((iv & (cv::INTER_TAB_SIZE-1))*cv::INTER_TAB_SIZE + (iu & (cv::INTER_TAB_SIZE-1))); - } - else if( m1type == CV_32FC1 ) - { - m1f[j] = (float)u; - m2f[j] = (float)v; - } - else - { - m1f[j*2] = (float)u; - m1f[j*2+1] = (float)v; - } - } - } - } - -private: - cv::Size size; - cv::Mat &map1; - cv::Mat &map2; - int m1type; - const double* ir; - cv::Matx33d &matTilt; - double u0; - double v0; - double fx; - double fy; - double k1; - double k2; - double p1; - double p2; - double k3; - double k4; - double k5; - double k6; - double s1; - double s2; - double s3; - double s4; -#if CV_TRY_AVX2 - bool useAVX2; -#endif -}; - -void cv::initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoeffs, +void initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoeffs, InputArray _matR, InputArray _newCameraMatrix, Size size, int m1type, OutputArray _map1, OutputArray _map2 ) { @@ -263,17 +156,17 @@ void cv::initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoef double tauY = distCoeffs.cols + distCoeffs.rows - 1 >= 14 ? distPtr[13] : 0.; // Matrix for trapezoidal distortion of tilted image sensor - cv::Matx33d matTilt = cv::Matx33d::eye(); - cv::detail::computeTiltProjectionMatrix(tauX, tauY, &matTilt); + Matx33d matTilt = Matx33d::eye(); + detail::computeTiltProjectionMatrix(tauX, tauY, &matTilt); - parallel_for_(Range(0, size.height), initUndistortRectifyMapComputer( + parallel_for_(Range(0, size.height), *getInitUndistortRectifyMapComputer( size, map1, map2, m1type, ir, matTilt, u0, v0, fx, fy, k1, k2, p1, p2, k3, k4, k5, k6, s1, s2, s3, s4)); } -void cv::undistort( InputArray _src, OutputArray _dst, InputArray _cameraMatrix, - InputArray _distCoeffs, InputArray _newCameraMatrix ) +void undistort( InputArray _src, OutputArray _dst, InputArray _cameraMatrix, + InputArray _distCoeffs, InputArray _newCameraMatrix ) { CV_INSTRUMENT_REGION(); @@ -319,6 +212,7 @@ void cv::undistort( InputArray _src, OutputArray _dst, InputArray _cameraMatrix, } } +} CV_IMPL void cvUndistort2( const CvArr* srcarr, CvArr* dstarr, const CvMat* Aarr, const CvMat* dist_coeffs, const CvMat* newAarr ) diff --git a/modules/calib3d/src/undistort.hpp b/modules/calib3d/src/undistort.hpp deleted file mode 100644 index 26633d0fb5..0000000000 --- a/modules/calib3d/src/undistort.hpp +++ /dev/null @@ -1,59 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef OPENCV_CALIB3D_UNDISTORT_HPP -#define OPENCV_CALIB3D_UNDISTORT_HPP - -namespace cv -{ -#if CV_TRY_AVX2 - int initUndistortRectifyMapLine_AVX(float* m1f, float* m2f, short* m1, ushort* m2, double* matTilt, const double* ir, - double& _x, double& _y, double& _w, int width, int m1type, - double k1, double k2, double k3, double k4, double k5, double k6, - double p1, double p2, double s1, double s2, double s3, double s4, - double u0, double v0, double fx, double fy); -#endif -} - -#endif // OPENCV_CALIB3D_UNDISTORT_HPP - -/* End of file */ diff --git a/modules/calib3d/src/undistort.simd.hpp b/modules/calib3d/src/undistort.simd.hpp new file mode 100644 index 0000000000..20ca545fdb --- /dev/null +++ b/modules/calib3d/src/undistort.simd.hpp @@ -0,0 +1,324 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencv2/core/hal/intrin.hpp" + +namespace cv { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// forward declarations +Ptr getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, + const double* _ir, Matx33d &_matTilt, + double _u0, double _v0, double _fx, double _fy, + double _k1, double _k2, double _p1, double _p2, + double _k3, double _k4, double _k5, double _k6, + double _s1, double _s2, double _s3, double _s4); + + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +namespace +{ +class initUndistortRectifyMapComputer : public ParallelLoopBody +{ +public: + initUndistortRectifyMapComputer( + Size _size, Mat &_map1, Mat &_map2, int _m1type, + const double* _ir, Matx33d &_matTilt, + double _u0, double _v0, double _fx, double _fy, + double _k1, double _k2, double _p1, double _p2, + double _k3, double _k4, double _k5, double _k6, + double _s1, double _s2, double _s3, double _s4) + : size(_size), + map1(_map1), + map2(_map2), + m1type(_m1type), + ir(_ir), + matTilt(_matTilt), + u0(_u0), + v0(_v0), + fx(_fx), + fy(_fy), + k1(_k1), + k2(_k2), + p1(_p1), + p2(_p2), + k3(_k3), + k4(_k4), + k5(_k5), + k6(_k6), + s1(_s1), + s2(_s2), + s3(_s3), + s4(_s4) { +#if CV_SIMD_64F + for (int i = 0; i < 2 * v_float64::nlanes; ++i) + { + s_x[i] = ir[0] * i; + s_y[i] = ir[3] * i; + s_w[i] = ir[6] * i; + } +#endif + } + + void operator()( const cv::Range& range ) const CV_OVERRIDE + { + CV_INSTRUMENT_REGION(); + + const int begin = range.start; + const int end = range.end; + + for( int i = begin; i < end; i++ ) + { + float* m1f = map1.ptr(i); + float* m2f = map2.empty() ? 0 : map2.ptr(i); + short* m1 = (short*)m1f; + ushort* m2 = (ushort*)m2f; + double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8]; + + int j = 0; + + if (m1type == CV_16SC2) + CV_Assert(m1 != NULL && m2 != NULL); + else if (m1type == CV_32FC1) + CV_Assert(m1f != NULL && m2f != NULL); + else + CV_Assert(m1 != NULL); + +#if CV_SIMD_64F + const v_float64 v_one = vx_setall_f64(1.0); + for (; j <= size.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes, _x += 2*v_float64::nlanes * ir[0], _y += 2*v_float64::nlanes * ir[3], _w += 2*v_float64::nlanes * ir[6]) + { + v_float64 m_0, m_1, m_2, m_3; + m_2 = v_one / (vx_setall_f64(_w) + vx_load(s_w)); + m_3 = v_one / (vx_setall_f64(_w) + vx_load(s_w + v_float64::nlanes)); + m_0 = vx_setall_f64(_x); m_1 = vx_setall_f64(_y); + v_float64 x_0 = (m_0 + vx_load(s_x)) * m_2; + v_float64 x_1 = (m_0 + vx_load(s_x + v_float64::nlanes)) * m_3; + v_float64 y_0 = (m_1 + vx_load(s_y)) * m_2; + v_float64 y_1 = (m_1 + vx_load(s_y + v_float64::nlanes)) * m_3; + + v_float64 xd_0 = x_0 * x_0; + v_float64 yd_0 = y_0 * y_0; + v_float64 xd_1 = x_1 * x_1; + v_float64 yd_1 = y_1 * y_1; + + v_float64 r2_0 = xd_0 + yd_0; + v_float64 r2_1 = xd_1 + yd_1; + + m_1 = vx_setall_f64(k3); + m_2 = vx_setall_f64(k2); + m_3 = vx_setall_f64(k1); + m_0 = v_muladd(v_muladd(v_muladd(m_1, r2_0, m_2), r2_0, m_3), r2_0, v_one); + m_1 = v_muladd(v_muladd(v_muladd(m_1, r2_1, m_2), r2_1, m_3), r2_1, v_one); + m_3 = vx_setall_f64(k6); + m_2 = vx_setall_f64(k5); + m_0 /= v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(k4)), r2_0, v_one); + m_1 /= v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(k4)), r2_1, v_one); + x_0 *= m_0; y_0 *= m_0; x_1 *= m_1; y_1 *= m_1; + + m_0 = vx_setall_f64(p1); + m_1 = vx_setall_f64(p2); + m_2 = vx_setall_f64(2.0); + xd_0 = v_muladd(v_muladd(m_2, xd_0, r2_0), m_1, x_0); + yd_0 = v_muladd(v_muladd(m_2, yd_0, r2_0), m_0, y_0); + xd_1 = v_muladd(v_muladd(m_2, xd_1, r2_1), m_1, x_1); + yd_1 = v_muladd(v_muladd(m_2, yd_1, r2_1), m_0, y_1); + + m_0 *= m_2; m_1 *= m_2; + m_2 = x_0 * y_0; + m_3 = x_1 * y_1; + xd_0 = v_muladd(m_0, m_2, xd_0); + yd_0 = v_muladd(m_1, m_2, yd_0); + xd_1 = v_muladd(m_0, m_3, xd_1); + yd_1 = v_muladd(m_1, m_3, yd_1); + + m_0 = r2_0 * r2_0; + m_1 = r2_1 * r2_1; + m_2 = vx_setall_f64(s2); + m_3 = vx_setall_f64(s1); + xd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, xd_0)); + xd_1 = v_muladd(m_3, r2_1, v_muladd(m_2, m_1, xd_1)); + m_2 = vx_setall_f64(s4); + m_3 = vx_setall_f64(s3); + yd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, yd_0)); + yd_1 = v_muladd(m_3, r2_1, v_muladd(m_2, m_1, yd_1)); + + m_0 = vx_setall_f64(matTilt.val[0]); + m_1 = vx_setall_f64(matTilt.val[1]); + m_2 = vx_setall_f64(matTilt.val[2]); + x_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2)); + x_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2)); + m_0 = vx_setall_f64(matTilt.val[3]); + m_1 = vx_setall_f64(matTilt.val[4]); + m_2 = vx_setall_f64(matTilt.val[5]); + y_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2)); + y_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2)); + m_0 = vx_setall_f64(matTilt.val[6]); + m_1 = vx_setall_f64(matTilt.val[7]); + m_2 = vx_setall_f64(matTilt.val[8]); + r2_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2)); + r2_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2)); + m_0 = vx_setzero_f64(); + r2_0 = v_select(r2_0 == m_0, v_one, v_one / r2_0); + r2_1 = v_select(r2_1 == m_0, v_one, v_one / r2_1); + + m_0 = vx_setall_f64(fx); + m_1 = vx_setall_f64(u0); + m_2 = vx_setall_f64(fy); + m_3 = vx_setall_f64(v0); + x_0 = v_muladd(m_0 * r2_0, x_0, m_1); + y_0 = v_muladd(m_2 * r2_0, y_0, m_3); + x_1 = v_muladd(m_0 * r2_1, x_1, m_1); + y_1 = v_muladd(m_2 * r2_1, y_1, m_3); + + if (m1type == CV_32FC1) + { + v_store(&m1f[j], v_cvt_f32(x_0, x_1)); + v_store(&m2f[j], v_cvt_f32(y_0, y_1)); + } + else if (m1type == CV_32FC2) + { + v_float32 mf0, mf1; + v_zip(v_cvt_f32(x_0, x_1), v_cvt_f32(y_0, y_1), mf0, mf1); + v_store(&m1f[j * 2], mf0); + v_store(&m1f[j * 2 + v_float32::nlanes], mf1); + } + else // m1type == CV_16SC2 + { + m_0 = vx_setall_f64(INTER_TAB_SIZE); + x_0 *= m_0; x_1 *= m_0; y_0 *= m_0; y_1 *= m_0; + + v_int32 mask = vx_setall_s32(INTER_TAB_SIZE - 1); + v_int32 iu = v_round(x_0, x_1); + v_int32 iv = v_round(y_0, y_1); + + v_pack_u_store(&m2[j], (iu & mask) + (iv & mask) * vx_setall_s32(INTER_TAB_SIZE)); + v_int32 out0, out1; + v_zip(iu >> INTER_BITS, iv >> INTER_BITS, out0, out1); + v_store(&m1[j * 2], v_pack(out0, out1)); + } + } + + vx_cleanup(); +#endif + for( ; j < size.width; j++, _x += ir[0], _y += ir[3], _w += ir[6] ) + { + double w = 1./_w, x = _x*w, y = _y*w; + double x2 = x*x, y2 = y*y; + double r2 = x2 + y2, _2xy = 2*x*y; + double kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2); + double xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2); + double yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2); + Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1); + double invProj = vecTilt(2) ? 1./vecTilt(2) : 1; + double u = fx*invProj*vecTilt(0) + u0; + double v = fy*invProj*vecTilt(1) + v0; + if( m1type == CV_16SC2 ) + { + int iu = saturate_cast(u*INTER_TAB_SIZE); + int iv = saturate_cast(v*INTER_TAB_SIZE); + m1[j*2] = (short)(iu >> INTER_BITS); + m1[j*2+1] = (short)(iv >> INTER_BITS); + m2[j] = (ushort)((iv & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (iu & (INTER_TAB_SIZE-1))); + } + else if( m1type == CV_32FC1 ) + { + m1f[j] = (float)u; + m2f[j] = (float)v; + } + else + { + m1f[j*2] = (float)u; + m1f[j*2+1] = (float)v; + } + } + } + } + +private: + Size size; + Mat &map1; + Mat &map2; + int m1type; + const double* ir; + Matx33d &matTilt; + double u0; + double v0; + double fx; + double fy; + double k1; + double k2; + double p1; + double p2; + double k3; + double k4; + double k5; + double k6; + double s1; + double s2; + double s3; + double s4; +#if CV_SIMD_64F + double s_x[2*v_float64::nlanes]; + double s_y[2*v_float64::nlanes]; + double s_w[2*v_float64::nlanes]; +#endif +}; +} + +Ptr getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type, + const double* _ir, Matx33d &_matTilt, + double _u0, double _v0, double _fx, double _fy, + double _k1, double _k2, double _p1, double _p2, + double _k3, double _k4, double _k5, double _k6, + double _s1, double _s2, double _s3, double _s4) +{ + CV_INSTRUMENT_REGION(); + + return Ptr(new initUndistortRectifyMapComputer(_size, _map1, _map2, _m1type, _ir, _matTilt, _u0, _v0, _fx, _fy, + _k1, _k2, _p1, _p2, _k3, _k4, _k5, _k6, _s1, _s2, _s3, _s4)); +} + +#endif +CV_CPU_OPTIMIZATION_NAMESPACE_END +} +/* End of file */ diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index c1844d8b8c..299b415d5e 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -366,6 +366,7 @@ CV__DNN_INLINE_NS_BEGIN */ std::vector > sliceRanges; int axis; + int num_split; static Ptr create(const LayerParams ¶ms); }; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 9bc017cf1f..a12cfb68e5 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -383,7 +383,7 @@ CV__DNN_INLINE_NS_BEGIN /** @brief Dump net to String * @returns String with structure, hyperparameters, backend, target and fusion - * To see correct backend, target and fusion run after forward(). + * Call method after setInput(). To see correct backend, target and fusion run after forward(). */ CV_WRAP String dump(); /** @brief Dump net structure, hyperparameters, backend, target and fusion to dot file diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index a4733307f4..611b9f7fcb 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2979,6 +2979,13 @@ String parseLayerParams(const String& name, const LayerParams& lp) { String Net::dump() { CV_Assert(!empty()); + + if (impl->netInputLayer->inputsData.empty()) + CV_Error(Error::StsError, "Requested set input"); + + if (!impl->netWasAllocated) + impl->setUpNet(); + std::ostringstream out; std::map& map = impl->layers; int prefBackend = impl->preferableBackend; diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 73d6a301ae..7640d4637e 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -61,6 +61,7 @@ public: { setParamsFrom(params); axis = params.get("axis", 1); + num_split = params.get("num_split", 0); if (params.has("slice_point")) { CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end")); @@ -141,9 +142,10 @@ public: else // Divide input blob on equal parts by axis. { CV_Assert(0 <= axis && axis < inpShape.size()); - CV_Assert(requiredOutputs > 0 && inpShape[axis] % requiredOutputs == 0); - inpShape[axis] /= requiredOutputs; - outputs.resize(requiredOutputs, inpShape); + int splits = num_split ? num_split : requiredOutputs; + CV_Assert(splits > 0 && inpShape[axis] % splits == 0); + inpShape[axis] /= splits; + outputs.resize(splits, inpShape); } return false; } diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index cc45c3f425..f85bd59218 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1410,6 +1410,9 @@ void TFImporter::populateNet(Net dstNet) axis = toNCHW(axis); layerParams.set("axis", axis); + if (hasLayerAttr(layer, "num_split")) + layerParams.set("num_split", getLayerAttr(layer, "num_split").i()); + int id = dstNet.addLayer(name, "Slice", layerParams); layer_id[name] = id; diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 545575d2eb..74e2c1cf40 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -78,6 +78,26 @@ TEST(readNet, Regression) EXPECT_FALSE(net.empty()); } +typedef testing::TestWithParam > dump; +TEST_P(dump, Regression) +{ + const int backend = get<0>(GetParam()); + const int target = get<1>(GetParam()); + Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"), + findDataFile("dnn/squeezenet_v1.1.caffemodel", false)); + + int size[] = {1, 3, 227, 227}; + Mat input = cv::Mat::ones(4, size, CV_32F); + net.setInput(input); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + EXPECT_FALSE(net.dump().empty()); + net.forward(); + EXPECT_FALSE(net.dump().empty()); +} + +INSTANTIATE_TEST_CASE_P(/**/, dump, dnnBackendsAndTargets()); + class FirstCustomLayer CV_FINAL : public Layer { public: diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 7ae94db3f3..ce77e867a2 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -605,7 +605,7 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics) if (target != DNN_TARGET_CPU) throw SkipTestException("Only CPU is supported"); - String onnxmodel = findDataFile("dnn/resnet-34_kinetics.onnx"); + String onnxmodel = findDataFile("dnn/resnet-34_kinetics.onnx", false); Mat image0 = imread(findDataFile("dnn/dog416.png")); Mat image1 = imread(findDataFile("dnn/street.png")); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 2dae678403..0357b8ecc5 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -350,6 +350,11 @@ TEST_P(Test_TensorFlow_layers, l2_normalize_3d) runTensorFlowNet("l2_normalize_3d"); } +TEST_P(Test_TensorFlow_layers, Split) +{ + runTensorFlowNet("split"); +} + class Test_TensorFlow_nets : public DNNTestLayer {}; TEST_P(Test_TensorFlow_nets, MobileNet_SSD) diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index 157a83b603..6574f557fa 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -1142,6 +1142,9 @@ getThreshVal_Otsu_8u( const Mat& _src ) const int N = 256; int i, j, h[N] = {0}; + #if CV_ENABLE_UNROLLED + int h_unrolled[3][N] = {}; + #endif for( i = 0; i < size.height; i++ ) { const uchar* src = _src.ptr() + step*i; @@ -1150,9 +1153,9 @@ getThreshVal_Otsu_8u( const Mat& _src ) for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; - h[v0]++; h[v1]++; + h[v0]++; h_unrolled[0][v1]++; v0 = src[j+2]; v1 = src[j+3]; - h[v0]++; h[v1]++; + h_unrolled[1][v0]++; h_unrolled[2][v1]++; } #endif for( ; j < size.width; j++ ) @@ -1161,7 +1164,12 @@ getThreshVal_Otsu_8u( const Mat& _src ) double mu = 0, scale = 1./(size.width*size.height); for( i = 0; i < N; i++ ) + { + #if CV_ENABLE_UNROLLED + h[i] += h_unrolled[0][i] + h_unrolled[1][i] + h_unrolled[2][i]; + #endif mu += i*(double)h[i]; + } mu *= scale; double mu1 = 0, q1 = 0; @@ -1206,6 +1214,9 @@ getThreshVal_Triangle_8u( const Mat& _src ) const int N = 256; int i, j, h[N] = {0}; + #if CV_ENABLE_UNROLLED + int h_unrolled[3][N] = {}; + #endif for( i = 0; i < size.height; i++ ) { const uchar* src = _src.ptr() + step*i; @@ -1214,9 +1225,9 @@ getThreshVal_Triangle_8u( const Mat& _src ) for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; - h[v0]++; h[v1]++; + h[v0]++; h_unrolled[0][v1]++; v0 = src[j+2]; v1 = src[j+3]; - h[v0]++; h[v1]++; + h_unrolled[1][v0]++; h_unrolled[2][v1]++; } #endif for( ; j < size.width; j++ ) @@ -1227,6 +1238,13 @@ getThreshVal_Triangle_8u( const Mat& _src ) int temp; bool isflipped = false; + #if CV_ENABLE_UNROLLED + for( i = 0; i < N; i++ ) + { + h[i] += h_unrolled[0][i] + h_unrolled[1][i] + h_unrolled[2][i]; + } + #endif + for( i = 0; i < N; i++ ) { if( h[i] > 0 )