diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake index b9b1a95799..7b3874ff0e 100644 --- a/cmake/OpenCVFindLibsPerf.cmake +++ b/cmake/OpenCVFindLibsPerf.cmake @@ -51,7 +51,12 @@ endif(WITH_CUDA) # --- Eigen --- if(WITH_EIGEN AND NOT HAVE_EIGEN) - find_package(Eigen3 QUIET) + if(NOT OPENCV_SKIP_EIGEN_FIND_PACKAGE_CONFIG) + find_package(Eigen3 CONFIG QUIET) # Ceres 2.0.0 CMake scripts doesn't work with CMake's FindEigen3.cmake module (due to missing EIGEN3_VERSION_STRING) + endif() + if(NOT Eigen3_FOUND) + find_package(Eigen3 QUIET) + endif() if(Eigen3_FOUND) if(TARGET Eigen3::Eigen) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index bfa61d0502..b307703a32 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -122,6 +122,33 @@ static const double DFTTab[][2] = { 1.00000000000000000, 0.00000000292583616 } }; +namespace { +template +struct Constants { + static const T sin_120; + static const T fft5_2; + static const T fft5_3; + static const T fft5_4; + static const T fft5_5; +}; + +template +const T Constants::sin_120 = (T)0.86602540378443864676372317075294; + +template +const T Constants::fft5_2 = (T)0.559016994374947424102293417182819; + +template +const T Constants::fft5_3 = (T)-0.951056516295153572116439333379382; + +template +const T Constants::fft5_4 = (T)-1.538841768587626701285145288018455; + +template +const T Constants::fft5_5 = (T)0.363271264002680442947733378740309; + +} //namespace + #define BitRev(i,shift) \ ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \ ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \ @@ -372,6 +399,149 @@ DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wa } } +// Reference radix-2 implementation. +template struct DFT_R2 +{ + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + const int nx = n/2; + for(int i = 0 ; i < c_n; i += n) + { + Complex* v = dst + i; + T r0 = v[0].re + v[nx].re; + T i0 = v[0].im + v[nx].im; + T r1 = v[0].re - v[nx].re; + T i1 = v[0].im - v[nx].im; + v[0].re = r0; v[0].im = i0; + v[nx].re = r1; v[nx].im = i1; + + for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 ) + { + v = dst + i + j; + r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im; + i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im; + r0 = v[0].re; i0 = v[0].im; + + v[0].re = r0 + r1; v[0].im = i0 + i1; + v[nx].re = r0 - r1; v[nx].im = i0 - i1; + } + } + } +}; + +// Reference radix-3 implementation. +template struct DFT_R3 +{ + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + const int nx = n / 3; + for(int i = 0; i < c_n; i += n ) + { + { + Complex* v = dst + i; + T r1 = v[nx].re + v[nx*2].re; + T i1 = v[nx].im + v[nx*2].im; + T r0 = v[0].re; + T i0 = v[0].im; + T r2 = Constants::sin_120*(v[nx].im - v[nx*2].im); + T i2 = Constants::sin_120*(v[nx*2].re - v[nx].re); + v[0].re = r0 + r1; v[0].im = i0 + i1; + r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; + v[nx].re = r0 + r2; v[nx].im = i0 + i2; + v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; + } + + for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 ) + { + Complex* v = dst + i + j; + T r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im; + T i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re; + T i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im; + T r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re; + T r1 = r0 + i2; T i1 = i0 + r2; + + r2 = Constants::sin_120*(i0 - r2); i2 = Constants::sin_120*(i2 - r0); + r0 = v[0].re; i0 = v[0].im; + v[0].re = r0 + r1; v[0].im = i0 + i1; + r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; + v[nx].re = r0 + r2; v[nx].im = i0 + i2; + v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; + } + } + } +}; + +// Reference radix-5 implementation. +template struct DFT_R5 +{ + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + const int nx = n / 5; + for(int i = 0; i < c_n; i += n ) + { + for(int j = 0, dw = 0; j < nx; j++, dw += dw0 ) + { + Complex* v0 = dst + i + j; + Complex* v1 = v0 + nx*2; + Complex* v2 = v1 + nx*2; + + T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5; + + r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im; + i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re; + r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im; + i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re; + + r1 = r3 + r2; i1 = i3 + i2; + r3 -= r2; i3 -= i2; + + r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im; + i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re; + r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im; + i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re; + + r2 = r4 + r0; i2 = i4 + i0; + r4 -= r0; i4 -= i0; + + r0 = v0[0].re; i0 = v0[0].im; + r5 = r1 + r2; i5 = i1 + i2; + + v0[0].re = r0 + r5; v0[0].im = i0 + i5; + + r0 -= (T)0.25*r5; i0 -= (T)0.25*i5; + r1 = Constants::fft5_2*(r1 - r2); i1 = Constants::fft5_2*(i1 - i2); + r2 = -Constants::fft5_3*(i3 + i4); i2 = Constants::fft5_3*(r3 + r4); + + i3 *= -Constants::fft5_5; r3 *= Constants::fft5_5; + i4 *= -Constants::fft5_4; r4 *= Constants::fft5_4; + + r5 = r2 + i3; i5 = i2 + r3; + r2 -= i4; i2 -= r4; + + r3 = r0 + r1; i3 = i0 + i1; + r0 -= r1; i0 -= i1; + + v0[nx].re = r3 + r2; v0[nx].im = i3 + i2; + v2[0].re = r3 - r2; v2[0].im = i3 - i2; + + v1[0].re = r0 + r5; v1[0].im = i0 + i5; + v1[nx].re = r0 - r5; v1[nx].im = i0 - i5; + } + } + } +}; + +template struct DFT_VecR2 +{ + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + return DFT_R2()(dst, c_n, n, dw0, wave); + } +}; + +template struct DFT_VecR3 +{ + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + return DFT_R3()(dst, c_n, n, dw0, wave); + } +}; + template struct DFT_VecR4 { int operator()(Complex*, int, int, int&, const Complex*) const { return 1; } @@ -379,6 +549,98 @@ template struct DFT_VecR4 #if CV_SSE3 +// multiplies *a and *b: +// r_re + i*r_im = (a_re + i*a_im)*(b_re + i*b_im) +// r_re and r_im are placed respectively in bits 31:0 and 63:32 of the resulting +// vector register. +inline __m128 complexMul(const Complex* const a, const Complex* const b) { + const __m128 z = _mm_setzero_ps(); + const __m128 neg_elem0 = _mm_set_ps(0.0f,0.0f,0.0f,-0.0f); + // v_a[31:0] is a->re and v_a[63:32] is a->im. + const __m128 v_a = _mm_loadl_pi(z, (const __m64*)a); + const __m128 v_b = _mm_loadl_pi(z, (const __m64*)b); + // x_1 = v[nx] * wave[dw]. + const __m128 v_a_riri = _mm_shuffle_ps(v_a, v_a, _MM_SHUFFLE(0, 1, 0, 1)); + const __m128 v_b_irri = _mm_shuffle_ps(v_b, v_b, _MM_SHUFFLE(1, 0, 0, 1)); + const __m128 mul = _mm_mul_ps(v_a_riri, v_b_irri); + const __m128 xored = _mm_xor_ps(mul, neg_elem0); + return _mm_hadd_ps(xored, z); +} + +// optimized radix-2 transform +template<> struct DFT_VecR2 { + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + const __m128 z = _mm_setzero_ps(); + const int nx = n/2; + for(int i = 0 ; i < c_n; i += n) + { + { + Complex* v = dst + i; + float r0 = v[0].re + v[nx].re; + float i0 = v[0].im + v[nx].im; + float r1 = v[0].re - v[nx].re; + float i1 = v[0].im - v[nx].im; + v[0].re = r0; v[0].im = i0; + v[nx].re = r1; v[nx].im = i1; + } + + for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 ) + { + Complex* v = dst + i + j; + const __m128 x_1 = complexMul(&v[nx], &wave[dw]); + const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]); + _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1)); + _mm_storel_pi((__m64*)&v[nx], _mm_sub_ps(v_0, x_1)); + } + } + } +}; + +// Optimized radix-3 implementation. +template<> struct DFT_VecR3 { + void operator()(Complex* dst, const int c_n, const int n, const int dw0, const Complex* wave) const { + const int nx = n / 3; + const __m128 z = _mm_setzero_ps(); + const __m128 neg_elem1 = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f); + const __m128 sin_120 = _mm_set1_ps(Constants::sin_120); + const __m128 one_half = _mm_set1_ps(0.5f); + for(int i = 0; i < c_n; i += n ) + { + { + Complex* v = dst + i; + + float r1 = v[nx].re + v[nx*2].re; + float i1 = v[nx].im + v[nx*2].im; + float r0 = v[0].re; + float i0 = v[0].im; + float r2 = Constants::sin_120*(v[nx].im - v[nx*2].im); + float i2 = Constants::sin_120*(v[nx*2].re - v[nx].re); + v[0].re = r0 + r1; v[0].im = i0 + i1; + r0 -= (float)0.5*r1; i0 -= (float)0.5*i1; + v[nx].re = r0 + r2; v[nx].im = i0 + i2; + v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; + } + + for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 ) + { + Complex* v = dst + i + j; + const __m128 x_0 = complexMul(&v[nx], &wave[dw]); + const __m128 x_2 = complexMul(&v[nx*2], &wave[dw*2]); + const __m128 x_1 = _mm_add_ps(x_0, x_2); + + const __m128 v_0 = _mm_loadl_pi(z, (const __m64*)&v[0]); + _mm_storel_pi((__m64*)&v[0], _mm_add_ps(v_0, x_1)); + + const __m128 x_3 = _mm_mul_ps(sin_120, _mm_xor_ps(_mm_sub_ps(x_2, x_0), neg_elem1)); + const __m128 x_3s = _mm_shuffle_ps(x_3, x_3, _MM_SHUFFLE(0, 1, 0, 1)); + const __m128 x_4 = _mm_sub_ps(v_0, _mm_mul_ps(one_half, x_1)); + _mm_storel_pi((__m64*)&v[nx], _mm_add_ps(x_4, x_3s)); + _mm_storel_pi((__m64*)&v[nx*2], _mm_sub_ps(x_4, x_3s)); + } + } + } +}; + // optimized radix-4 transform template<> struct DFT_VecR4 { @@ -573,12 +835,6 @@ struct OcvDftOptions { template static void DFT(const OcvDftOptions & c, const Complex* src, Complex* dst) { - static const T sin_120 = (T)0.86602540378443864676372317075294; - static const T fft5_2 = (T)0.559016994374947424102293417182819; - static const T fft5_3 = (T)-0.951056516295153572116439333379382; - static const T fft5_4 = (T)-1.538841768587626701285145288018455; - static const T fft5_5 = (T)0.363271264002680442947733378740309; - const Complex* wave = (Complex*)c.wave; const int * itab = c.itab; @@ -775,30 +1031,18 @@ DFT(const OcvDftOptions & c, const Complex* src, Complex* dst) for( ; n < c.factors[0]; ) { // do the remaining radix-2 transform - nx = n; n *= 2; dw0 /= 2; - for( i = 0; i < c.n; i += n ) + if(c.haveSSE3) { - Complex* v = dst + i; - T r0 = v[0].re + v[nx].re; - T i0 = v[0].im + v[nx].im; - T r1 = v[0].re - v[nx].re; - T i1 = v[0].im - v[nx].im; - v[0].re = r0; v[0].im = i0; - v[nx].re = r1; v[nx].im = i1; - - for( j = 1, dw = dw0; j < nx; j++, dw += dw0 ) - { - v = dst + i + j; - r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im; - i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im; - r0 = v[0].re; i0 = v[0].im; - - v[0].re = r0 + r1; v[0].im = i0 + i1; - v[nx].re = r0 - r1; v[nx].im = i0 - i1; - } + DFT_VecR2 vr2; + vr2(dst, c.n, n, dw0, wave); + } + else + { + DFT_R2 vr2; + vr2(dst, c.n, n, dw0, wave); } } } @@ -813,94 +1057,21 @@ DFT(const OcvDftOptions & c, const Complex* src, Complex* dst) if( factor == 3 ) { - // radix-3 - for( i = 0; i < c.n; i += n ) + if(c.haveSSE3) { - Complex* v = dst + i; - - T r1 = v[nx].re + v[nx*2].re; - T i1 = v[nx].im + v[nx*2].im; - T r0 = v[0].re; - T i0 = v[0].im; - T r2 = sin_120*(v[nx].im - v[nx*2].im); - T i2 = sin_120*(v[nx*2].re - v[nx].re); - v[0].re = r0 + r1; v[0].im = i0 + i1; - r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; - v[nx].re = r0 + r2; v[nx].im = i0 + i2; - v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; - - for( j = 1, dw = dw0; j < nx; j++, dw += dw0 ) - { - v = dst + i + j; - r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im; - i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re; - i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im; - r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re; - r1 = r0 + i2; i1 = i0 + r2; - - r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0); - r0 = v[0].re; i0 = v[0].im; - v[0].re = r0 + r1; v[0].im = i0 + i1; - r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; - v[nx].re = r0 + r2; v[nx].im = i0 + i2; - v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; - } + DFT_VecR3 vr3; + vr3(dst, c.n, n, dw0, wave); + } + else + { + DFT_R3 vr3; + vr3(dst, c.n, n, dw0, wave); } } else if( factor == 5 ) { - // radix-5 - for( i = 0; i < c.n; i += n ) - { - for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) - { - Complex* v0 = dst + i + j; - Complex* v1 = v0 + nx*2; - Complex* v2 = v1 + nx*2; - - T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5; - - r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im; - i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re; - r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im; - i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re; - - r1 = r3 + r2; i1 = i3 + i2; - r3 -= r2; i3 -= i2; - - r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im; - i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re; - r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im; - i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re; - - r2 = r4 + r0; i2 = i4 + i0; - r4 -= r0; i4 -= i0; - - r0 = v0[0].re; i0 = v0[0].im; - r5 = r1 + r2; i5 = i1 + i2; - - v0[0].re = r0 + r5; v0[0].im = i0 + i5; - - r0 -= (T)0.25*r5; i0 -= (T)0.25*i5; - r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2); - r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4); - - i3 *= -fft5_5; r3 *= fft5_5; - i4 *= -fft5_4; r4 *= fft5_4; - - r5 = r2 + i3; i5 = i2 + r3; - r2 -= i4; i2 -= r4; - - r3 = r0 + r1; i3 = i0 + i1; - r0 -= r1; i0 -= i1; - - v0[nx].re = r3 + r2; v0[nx].im = i3 + i2; - v2[0].re = r3 - r2; v2[0].im = i3 - i2; - - v1[0].re = r0 + r5; v1[0].im = i0 + i5; - v1[nx].re = r0 - r5; v1[nx].im = i0 - i5; - } - } + DFT_R5 vr5; + vr5(dst, c.n, n, dw0, wave); } else { diff --git a/modules/features2d/src/keypoint.cpp b/modules/features2d/src/keypoint.cpp index 219634e5b4..bab1e22b45 100644 --- a/modules/features2d/src/keypoint.cpp +++ b/modules/features2d/src/keypoint.cpp @@ -44,9 +44,9 @@ namespace cv { -struct KeypointResponseGreaterThanThreshold +struct KeypointResponseGreaterThanOrEqualToThreshold { - KeypointResponseGreaterThanThreshold(float _value) : + KeypointResponseGreaterThanOrEqualToThreshold(float _value) : value(_value) { } @@ -83,7 +83,7 @@ void KeyPointsFilter::retainBest(std::vector& keypoints, int n_points) //use std::partition to grab all of the keypoints with the boundary response. std::vector::const_iterator new_end = std::partition(keypoints.begin() + n_points, keypoints.end(), - KeypointResponseGreaterThanThreshold(ambiguous_response)); + KeypointResponseGreaterThanOrEqualToThreshold(ambiguous_response)); //resize the keypoints, given this new end point. nth_element and partition reordered the points inplace keypoints.resize(new_end - keypoints.begin()); } diff --git a/modules/videoio/src/cap_mfx_reader.cpp b/modules/videoio/src/cap_mfx_reader.cpp index f178909913..330f9bd340 100644 --- a/modules/videoio/src/cap_mfx_reader.cpp +++ b/modules/videoio/src/cap_mfx_reader.cpp @@ -112,6 +112,7 @@ VideoCapture_IntelMFX::VideoCapture_IntelMFX(const cv::String &filename) return; } + frameSize = Size(params.mfx.FrameInfo.CropW, params.mfx.FrameInfo.CropH); good = true; } @@ -127,10 +128,23 @@ VideoCapture_IntelMFX::~VideoCapture_IntelMFX() cleanup(deviceHandler); } -double VideoCapture_IntelMFX::getProperty(int) const +double VideoCapture_IntelMFX::getProperty(int prop) const { - MSG(cerr << "MFX: getProperty() is not implemented" << endl); - return 0; + if (!good) + { + MSG(cerr << "MFX: can not call getProperty(), backend has not been initialized" << endl); + return 0; + } + switch (prop) + { + case CAP_PROP_FRAME_WIDTH: + return frameSize.width; + case CAP_PROP_FRAME_HEIGHT: + return frameSize.height; + default: + MSG(cerr << "MFX: unsupported property" << endl); + return 0; + } } bool VideoCapture_IntelMFX::setProperty(int, double) diff --git a/modules/videoio/src/cap_mfx_reader.hpp b/modules/videoio/src/cap_mfx_reader.hpp index 26e8f77e3b..122bc50763 100644 --- a/modules/videoio/src/cap_mfx_reader.hpp +++ b/modules/videoio/src/cap_mfx_reader.hpp @@ -34,6 +34,7 @@ private: MFXVideoDECODE *decoder; SurfacePool *pool; void *outSurface; + cv::Size frameSize; bool good; }; diff --git a/modules/videoio/test/test_mfx.cpp b/modules/videoio/test/test_mfx.cpp index 363345635d..2048fe5af9 100644 --- a/modules/videoio/test/test_mfx.cpp +++ b/modules/videoio/test/test_mfx.cpp @@ -118,6 +118,8 @@ TEST_P(videoio_mfx, read_write_raw) VideoCapture cap; cap.open(filename, CAP_INTEL_MFX); ASSERT_TRUE(cap.isOpened()); + EXPECT_EQ(FRAME_SIZE.width, cap.get(CAP_PROP_FRAME_WIDTH)); + EXPECT_EQ(FRAME_SIZE.height, cap.get(CAP_PROP_FRAME_HEIGHT)); for (int i = 0; i < FRAME_COUNT; ++i) { ASSERT_TRUE(cap.read(frame));