From 41678fe3d3034683d6a8c6872295573af78ec2ae Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 7 Jul 2020 13:26:43 +0300 Subject: [PATCH 01/10] Fixed checkMasks in DescriptorMatcher with train descs in UMats --- modules/features2d/src/matchers.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/modules/features2d/src/matchers.cpp b/modules/features2d/src/matchers.cpp index d39afe1ade..ee6e15bb31 100644 --- a/modules/features2d/src/matchers.cpp +++ b/modules/features2d/src/matchers.cpp @@ -622,15 +622,20 @@ void DescriptorMatcher::checkMasks( InputArrayOfArrays _masks, int queryDescript if( isMaskSupported() && !masks.empty() ) { // Check masks - size_t imageCount = std::max(trainDescCollection.size(), utrainDescCollection.size() ); + const size_t imageCount = std::max(trainDescCollection.size(), utrainDescCollection.size() ); CV_Assert( masks.size() == imageCount ); for( size_t i = 0; i < imageCount; i++ ) { - if( !masks[i].empty() && (!trainDescCollection[i].empty() || !utrainDescCollection[i].empty() ) ) + if (masks[i].empty()) + continue; + const bool hasTrainDesc = !trainDescCollection.empty() && !trainDescCollection[i].empty(); + const bool hasUTrainDesc = !utrainDescCollection.empty() && !utrainDescCollection[i].empty(); + if (hasTrainDesc || hasUTrainDesc) { - int rows = trainDescCollection[i].empty() ? utrainDescCollection[i].rows : trainDescCollection[i].rows; - CV_Assert( masks[i].rows == queryDescriptorsCount && - masks[i].cols == rows && masks[i].type() == CV_8UC1); + const int rows = hasTrainDesc ? trainDescCollection[i].rows : utrainDescCollection[i].rows; + CV_Assert(masks[i].type() == CV_8UC1 + && masks[i].rows == queryDescriptorsCount + && masks[i].cols == rows); } } } From 4dd9a36a3c03b13848c7b2f9cd0c8aad7d65ef3a Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 9 Jul 2020 16:03:04 +0300 Subject: [PATCH 02/10] Added test for checkMasks with UMat train descs --- .../features2d/test/test_matchers_algorithmic.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/features2d/test/test_matchers_algorithmic.cpp b/modules/features2d/test/test_matchers_algorithmic.cpp index a7116e9bc3..01d08ffe02 100644 --- a/modules/features2d/test/test_matchers_algorithmic.cpp +++ b/modules/features2d/test/test_matchers_algorithmic.cpp @@ -565,7 +565,6 @@ TEST(Features2d_DMatch, issue_11855) 1, 1, 1); Mat targets = (Mat_(2, 3) << 1, 1, 1, 0, 0, 0); - Ptr bf = BFMatcher::create(NORM_HAMMING, true); vector > match; bf->knnMatch(sources, targets, match, 1, noArray(), true); @@ -577,4 +576,18 @@ TEST(Features2d_DMatch, issue_11855) EXPECT_EQ(0.0f, match[0][0].distance); } +TEST(Features2d_DMatch, issue_17771) +{ + Mat sources = (Mat_(2, 3) << 1, 1, 0, + 1, 1, 1); + Mat targets = (Mat_(2, 3) << 1, 1, 1, + 0, 0, 0); + UMat usources = sources.getUMat(ACCESS_READ); + UMat utargets = targets.getUMat(ACCESS_READ); + vector > match; + Ptr ubf = BFMatcher::create(NORM_HAMMING); + Mat mask = (Mat_(2, 2) << 1, 0, 0, 1); + EXPECT_NO_THROW(ubf->knnMatch(usources, utargets, match, 1, mask, true)); +} + }} // namespace From 0df8fb70b4cadb5aa9babe023075cd3db221830a Mon Sep 17 00:00:00 2001 From: Yosshi999 Date: Thu, 9 Jul 2020 16:50:20 +0000 Subject: [PATCH 03/10] use bufferarea for allocating buffer --- modules/features2d/src/sift.simd.hpp | 29 ++++++++++++---------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp index fefed638c5..9899862931 100644 --- a/modules/features2d/src/sift.simd.hpp +++ b/modules/features2d/src/sift.simd.hpp @@ -73,6 +73,7 @@ #include #include "opencv2/core/hal/intrin.hpp" +#include namespace cv { @@ -167,23 +168,17 @@ float calcOrientationHist( int i, j, k, len = (radius*2+1)*(radius*2+1); float expf_scale = -1.f/(2.f * sigma * sigma); -#if CV_SIMD - AutoBuffer bufX(len + v_float32::nlanes); - AutoBuffer bufY(len + v_float32::nlanes); - AutoBuffer bufO(len + v_float32::nlanes); - AutoBuffer bufW(len + v_float32::nlanes); - AutoBuffer bufT(n+4 + v_float32::nlanes); - float *X = alignPtr(bufX.data(), CV_SIMD_WIDTH); - float *Y = alignPtr(bufY.data(), CV_SIMD_WIDTH); - float *Mag = X; - float *Ori = alignPtr(bufO.data(), CV_SIMD_WIDTH); - float *W = alignPtr(bufW.data(), CV_SIMD_WIDTH); - float *temphist = alignPtr(bufT.data(), CV_SIMD_WIDTH)+2; -#else - AutoBuffer buf(len*4 + n+4); - float *X = buf.data(), *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len; - float* temphist = W + len + 2; -#endif + + cv::utils::BufferArea area; + float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *temphist = 0; + area.allocate(X, len, CV_SIMD_WIDTH); + area.allocate(Y, len, CV_SIMD_WIDTH); + area.allocate(Ori, len, CV_SIMD_WIDTH); + area.allocate(W, len, CV_SIMD_WIDTH); + area.allocate(temphist, n+4, CV_SIMD_WIDTH); + area.commit(); + temphist += 2; + Mag = X; for( i = 0; i < n; i++ ) temphist[i] = 0.f; From 476094ad5af304375bb557e447b129a6bb615cb4 Mon Sep 17 00:00:00 2001 From: jsxyhelu Date: Tue, 23 Jun 2020 20:39:55 +0800 Subject: [PATCH 04/10] =?UTF-8?q?Use=E2=80=9C=20moms=E2=80=9D=20replace=20?= =?UTF-8?q?"contourArea"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit double area = moms.m00; is same as double area = contourArea(contours[contourIdx]); Not to mention "moms" already calculated here,"contourArea" should not apply --- modules/features2d/src/blobdetector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/src/blobdetector.cpp b/modules/features2d/src/blobdetector.cpp index c973b09764..d07e8bae83 100644 --- a/modules/features2d/src/blobdetector.cpp +++ b/modules/features2d/src/blobdetector.cpp @@ -257,7 +257,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag { std::vector < Point > hull; convexHull(contours[contourIdx], hull); - double area = contourArea(contours[contourIdx]); + double area = moms.m00; double hullArea = contourArea(hull); if (fabs(hullArea) < DBL_EPSILON) continue; From 5cb8619eca7d7bd1d9144c76fae6f951a21f5c49 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 10 Jul 2020 14:29:21 +0000 Subject: [PATCH 05/10] dnn(ie): enable KEY_CPU_THREADS_NUM for Windows --- modules/dnn/src/op_inf_engine.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index 7020e3b0a9..43fb5999d9 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -831,18 +831,18 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::CNNNetwork& net) CV_LOG_INFO(NULL, "DNN-IE: Can't register OpenCV custom layers extension: " << e.what()); } #endif -#ifndef _WIN32 // Limit the number of CPU threads. #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) +#ifndef _WIN32 enginePtr->SetConfig({{ InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()), }}, 0); +#endif // _WIN32 #else if (device_name == "CPU") ie.SetConfig({{ InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()), }}, device_name); -#endif #endif } #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) From e54040d5408070a071c0a98390e06ecfd9f5ef10 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 12 Jul 2020 11:53:46 +0000 Subject: [PATCH 06/10] core: use lazy on-demand initialization for param_traceEnable --- modules/core/src/trace.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/core/src/trace.cpp b/modules/core/src/trace.cpp index 4245ab2080..c316737b13 100644 --- a/modules/core/src/trace.cpp +++ b/modules/core/src/trace.cpp @@ -72,9 +72,13 @@ static int64 getTimestamp() return (int64)((t - g_zero_timestamp) * tick_to_ns); } -// TODO lazy configuration flags -static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false); +static bool getParameterTraceEnable() +{ + static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false); + return param_traceEnable; +} +// TODO lazy configuration flags static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1); static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000); static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000); @@ -841,7 +845,7 @@ TraceManager::TraceManager() CV_LOG("TraceManager ctor: " << (void*)this); CV_LOG("TraceManager configure()"); - activated = param_traceEnable; + activated = getParameterTraceEnable(); if (activated) trace_storage.reset(new SyncTraceStorage(std::string(param_traceLocation) + ".txt")); From 269b81060106c6cff77b1ebe5cfba74cba6466d4 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Sun, 12 Jul 2020 21:22:12 +0900 Subject: [PATCH 07/10] re-enable automatic CC detection on Jetson * treat both CMAKE_C_COMPILER and c_compiler_realpath as candidate --- cmake/OpenCVDetectCUDA.cmake | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index ef3e0184a0..670bfebd2f 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -106,7 +106,18 @@ if(CUDA_FOUND) if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin") # already specified by user elseif(CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER}") - LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}") + get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH) + # C compiler doesn't work with --run option, forcing C++ compiler instead + if(CUDA_HOST_COMPILER STREQUAL c_compiler_realpath OR CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER) + if(DEFINED CMAKE_CXX_COMPILER) + get_filename_component(cxx_compiler_realpath "${CMAKE_CXX_COMPILER}" REALPATH) + LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${cxx_compiler_realpath}") + else() + message(STATUS "CUDA: CMAKE_CXX_COMPILER is not available. You may need to specify CUDA_HOST_COMPILER.") + endif() + else() + LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}") + endif() elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY) LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}") From 749bd8009108547ed5fef4f7498e67ff72287698 Mon Sep 17 00:00:00 2001 From: jasonKercher Date: Mon, 13 Jul 2020 08:29:54 -0400 Subject: [PATCH 08/10] Merge pull request #17770 from jasonKercher:3.4_triggered 3.4 Allow first capture to return false * fix first capture timeout * fix first capture timeout --- modules/videoio/src/cap_v4l.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp index dc82e84571..555af5a1a2 100644 --- a/modules/videoio/src/cap_v4l.cpp +++ b/modules/videoio/src/cap_v4l.cpp @@ -1033,15 +1033,16 @@ bool CvCaptureCAM_V4L::grabFrame() return false; } + // No need to skip this if the first read returns false + /* preparation is ok */ + FirstCapture = false; + #if defined(V4L_ABORT_BADJPEG) // skip first frame. it is often bad -- this is unnotied in traditional apps, // but could be fatal if bad jpeg is enabled if (!read_frame_v4l2()) return false; #endif - - /* preparation is ok */ - FirstCapture = false; } // In the case that the grab frame was without retrieveFrame if (bufferIndex >= 0) From c90e8243426f26ebf4e4bd539f46d2dbc1ab14c2 Mon Sep 17 00:00:00 2001 From: pemmanuelviel Date: Mon, 13 Jul 2020 14:59:10 +0200 Subject: [PATCH 09/10] Merge pull request #17639 from pemmanuelviel:pev--binary-kmeans Pev binary kmeans * Ongoing work transposing kmeans clustering method for bitfields: the computeClustering method Ongoing work transposing kmeans clustering method for bitfields: interface computeBitfieldClustering Fix genericity of computeNodeStatistics Ongoing work transposing kmeans clustering method for bitfields: adapt computeNodeStatistics() Ongoing work transposing kmeans clustering method for bitfields: adapt findNN() method Ongoing work transposing kmeans clustering method for bitfields: allow kmeans with Hamming distance Ongoing work transposing kmeans clustering method for bitfields: adapt distances code Ongoing work transposing kmeans clustering method for bitfields: adapt load/save code Ongoing work transposing kmeans clustering method for bitfields: adapt kmeans hierarchicalClustring() PivotType -> CentersType Renaming Fix type casting for ARM SIMD implementation of Hamming Fix warnings with Win32 compilation Fix warnings with Win64 compilation Fix wrong parenthesis position on rounding * Ensure proper rounding when CentersType is integral --- modules/flann/include/opencv2/flann.hpp | 8 +- .../flann/include/opencv2/flann/all_indices.h | 3 + modules/flann/include/opencv2/flann/dist.h | 316 ++++++++++++--- .../include/opencv2/flann/flann_base.hpp | 2 +- .../include/opencv2/flann/kmeans_index.h | 370 ++++++++++++++++-- 5 files changed, 605 insertions(+), 94 deletions(-) diff --git a/modules/flann/include/opencv2/flann.hpp b/modules/flann/include/opencv2/flann.hpp index 887759e643..674e6583c5 100644 --- a/modules/flann/include/opencv2/flann.hpp +++ b/modules/flann/include/opencv2/flann.hpp @@ -536,7 +536,7 @@ private: @param features The points to be clustered. The matrix must have elements of type Distance::ElementType. @param centers The centers of the clusters obtained. The matrix must have type -Distance::ResultType. The number of rows in this matrix represents the number of clusters desired, +Distance::CentersType. The number of rows in this matrix represents the number of clusters desired, however, because of the way the cut in the hierarchical tree is chosen, the number of clusters computed will be the highest number of the form (branching-1)\*k+1 that's lower than the number of clusters desired, where branching is the tree's branching factor (see description of the @@ -553,15 +553,15 @@ int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::K Distance d = Distance()) { typedef typename Distance::ElementType ElementType; - typedef typename Distance::ResultType DistanceType; + typedef typename Distance::CentersType CentersType; CV_Assert(features.type() == CvType::type()); CV_Assert(features.isContinuous()); ::cvflann::Matrix m_features((ElementType*)features.ptr(0), features.rows, features.cols); - CV_Assert(centers.type() == CvType::type()); + CV_Assert(centers.type() == CvType::type()); CV_Assert(centers.isContinuous()); - ::cvflann::Matrix m_centers((DistanceType*)centers.ptr(0), centers.rows, centers.cols); + ::cvflann::Matrix m_centers((CentersType*)centers.ptr(0), centers.rows, centers.cols); return ::cvflann::hierarchicalClustering(m_features, m_centers, params, d); } diff --git a/modules/flann/include/opencv2/flann/all_indices.h b/modules/flann/include/opencv2/flann/all_indices.h index ba5a2f2dde..2de18af24a 100644 --- a/modules/flann/include/opencv2/flann/all_indices.h +++ b/modules/flann/include/opencv2/flann/all_indices.h @@ -130,6 +130,9 @@ struct index_creator case FLANN_INDEX_LINEAR: nnIndex = new LinearIndex(dataset, params, distance); break; + case FLANN_INDEX_KMEANS: + nnIndex = new KMeansIndex(dataset, params, distance); + break; case FLANN_INDEX_HIERARCHICAL: nnIndex = new HierarchicalClusteringIndex(dataset, params, distance); break; diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h index 4cf32d5987..e41b994d7e 100644 --- a/modules/flann/include/opencv2/flann/dist.h +++ b/modules/flann/include/opencv2/flann/dist.h @@ -1,4 +1,4 @@ -/*********************************************************************** +/*********************************************************************** * Software License Agreement (BSD License) * * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. @@ -68,6 +68,63 @@ inline float abs(float x) { return fabsf(x); } template<> inline double abs(double x) { return fabs(x); } + +template +inline TargetType round(float x) { return static_cast(x); } + +template<> +inline unsigned int round(float x) { return static_cast(x + 0.5f); } + +template<> +inline unsigned short round(float x) { return static_cast(x + 0.5f); } + +template<> +inline unsigned char round(float x) { return static_cast(x + 0.5f); } + +template<> +inline long long round(float x) { return static_cast(x + 0.5f); } + +template<> +inline long round(float x) { return static_cast(x + 0.5f); } + +template<> +inline int round(float x) { return static_cast(x + 0.5f) - (x<0); } + +template<> +inline short round(float x) { return static_cast(x + 0.5f) - (x<0); } + +template<> +inline char round(float x) { return static_cast(x + 0.5f) - (x<0); } + + +template +inline TargetType round(double x) { return static_cast(x); } + +template<> +inline unsigned int round(double x) { return static_cast(x + 0.5); } + +template<> +inline unsigned short round(double x) { return static_cast(x + 0.5); } + +template<> +inline unsigned char round(double x) { return static_cast(x + 0.5); } + +template<> +inline long long round(double x) { return static_cast(x + 0.5); } + +template<> +inline long round(double x) { return static_cast(x + 0.5); } + +template<> +inline int round(double x) { return static_cast(x + 0.5) - (x<0); } + +template<> +inline short round(double x) { return static_cast(x + 0.5) - (x<0); } + +template<> +inline char round(double x) { return static_cast(x + 0.5) - (x<0); } + + template struct Accumulator { typedef T Type; }; template<> @@ -88,13 +145,57 @@ struct Accumulator { typedef float Type; }; class True { +public: + static const bool val = true; }; class False { +public: + static const bool val = false; }; +/* + * This is a "zero iterator". It basically behaves like a zero filled + * array to all algorithms that use arrays as iterators (STL style). + * It's useful when there's a need to compute the distance between feature + * and origin it and allows for better compiler optimisation than using a + * zero-filled array. + */ +template +struct ZeroIterator +{ + + T operator*() + { + return 0; + } + + T operator[](int) + { + return 0; + } + + const ZeroIterator& operator ++() + { + return *this; + } + + ZeroIterator operator ++(int) + { + return *this; + } + + ZeroIterator& operator+=(int) + { + return *this; + } + +}; + + + /** * Squared Euclidean distance functor. * @@ -109,6 +210,7 @@ struct L2_Simple typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; template ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const @@ -142,6 +244,7 @@ struct L2 typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the squared Euclidean distance between two vectors. @@ -207,6 +310,7 @@ struct L1 typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the Manhattan (L_1) distance between two vectors. @@ -264,6 +368,7 @@ struct MinkowskiDistance typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; int order; @@ -328,6 +433,7 @@ struct MaxDistance typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the max distance (L_infinity) between two vectors. @@ -385,10 +491,12 @@ struct HammingLUT typedef unsigned char ElementType; typedef int ResultType; + typedef ElementType CentersType; /** this will count the bits in a ^ b */ - ResultType operator()(const unsigned char* a, const unsigned char* b, size_t size) const + template + ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const { static const uchar popCountTable[] = { @@ -402,8 +510,31 @@ struct HammingLUT 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; ResultType result = 0; + const unsigned char* b2 = reinterpret_cast (b); for (size_t i = 0; i < size; i++) { - result += popCountTable[a[i] ^ b[i]]; + result += popCountTable[a[i] ^ b2[i]]; + } + return result; + } + + + ResultType operator()(const unsigned char* a, const ZeroIterator b, size_t size) const + { + (void)b; + static const uchar popCountTable[] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + ResultType result = 0; + for (size_t i = 0; i < size; i++) { + result += popCountTable[a[i]]; } return result; } @@ -422,17 +553,20 @@ struct Hamming typedef T ElementType; typedef int ResultType; + typedef ElementType CentersType; template - ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const + ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const { ResultType result = 0; #if defined(__ARM_NEON__) && !defined(__CUDACC__) { + const unsigned char* a2 = reinterpret_cast (a); + const unsigned char* b2 = reinterpret_cast (b); uint32x4_t bits = vmovq_n_u32(0); for (size_t i = 0; i < size; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t B_vec = vld1q_u8 (b + i); + uint8x16_t A_vec = vld1q_u8 (a2 + i); + uint8x16_t B_vec = vld1q_u8 (b2 + i); uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); uint8x16_t bitsSet = vcntq_u8 (AxorB); uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); @@ -470,6 +604,52 @@ struct Hamming #endif return result; } + + + template + ResultType operator()(const Iterator1 a, ZeroIterator b, size_t size, ResultType /*worst_dist*/ = -1) const + { + (void)b; + ResultType result = 0; +#if defined(__ARM_NEON__) && !defined(__CUDACC__) + { + const unsigned char* a2 = reinterpret_cast (a); + uint32x4_t bits = vmovq_n_u32(0); + for (size_t i = 0; i < size; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a2 + i); + uint8x16_t bitsSet = vcntq_u8 (A_vec); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); + } + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + } +#elif defined(__GNUC__) + { + //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) + typedef unsigned long long pop_t; + const size_t modulo = size % sizeof(pop_t); + const pop_t* a2 = reinterpret_cast (a); + const pop_t* a2_end = a2 + (size / sizeof(pop_t)); + + for (; a2 != a2_end; ++a2) result += __builtin_popcountll(*a2); + + if (modulo) { + //in the case where size is not dividable by sizeof(size_t) + //need to mask off the bits at the end + pop_t a_final = 0; + memcpy(&a_final, a2, modulo); + result += __builtin_popcountll(a_final); + } + } +#else // NO NEON and NOT GNUC + HammingLUT lut; + result = lut(reinterpret_cast (a), b, size); +#endif + return result; + } }; template @@ -480,6 +660,7 @@ struct Hamming2 typedef T ElementType; typedef int ResultType; + typedef ElementType CentersType; /** This is popcount_3() from: * http://en.wikipedia.org/wiki/Hamming_weight */ @@ -500,7 +681,7 @@ struct Hamming2 #endif template - ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const + ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const { #ifdef FLANN_PLATFORM_64_BIT const uint64_t* pa = reinterpret_cast(a); @@ -526,6 +707,31 @@ struct Hamming2 return result; } + + template + ResultType operator()(const Iterator1 a, ZeroIterator b, size_t size, ResultType /*worst_dist*/ = -1) const + { + (void)b; +#ifdef FLANN_PLATFORM_64_BIT + const uint64_t* pa = reinterpret_cast(a); + ResultType result = 0; + size /= long_word_size_; + for(size_t i = 0; i < size; ++i ) { + result += popcnt64(*pa); + ++pa; + } +#else + const uint32_t* pa = reinterpret_cast(a); + ResultType result = 0; + size /= long_word_size_; + for(size_t i = 0; i < size; ++i ) { + result += popcnt32(*pa); + ++pa; + } +#endif + return result; + } + private: #ifdef FLANN_PLATFORM_64_BIT static const size_t long_word_size_ = sizeof(uint64_t)/sizeof(unsigned char); @@ -546,6 +752,7 @@ struct HistIntersectionDistance typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the histogram intersection distance @@ -601,6 +808,7 @@ struct HellingerDistance typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the Hellinger distance @@ -650,6 +858,7 @@ struct ChiSquareDistance typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the chi-square distance @@ -704,6 +913,7 @@ struct KL_Divergence typedef T ElementType; typedef typename Accumulator::Type ResultType; + typedef ResultType CentersType; /** * Compute the Kullback-Leibler divergence @@ -749,46 +959,6 @@ struct KL_Divergence }; - -/* - * This is a "zero iterator". It basically behaves like a zero filled - * array to all algorithms that use arrays as iterators (STL style). - * It's useful when there's a need to compute the distance between feature - * and origin it and allows for better compiler optimisation than using a - * zero-filled array. - */ -template -struct ZeroIterator -{ - - T operator*() - { - return 0; - } - - T operator[](int) - { - return 0; - } - - const ZeroIterator& operator ++() - { - return *this; - } - - ZeroIterator operator ++(int) - { - return *this; - } - - ZeroIterator& operator+=(int) - { - return *this; - } - -}; - - /* * Depending on processed distances, some of them are already squared (e.g. L2) * and some are not (e.g.Hamming). In KMeans++ for instance we want to be sure @@ -849,6 +1019,58 @@ typename Distance::ResultType ensureSquareDistance( typename Distance::ResultTyp } +/* + * ...a template to tell the user if the distance he is working with is actually squared + */ + +template +struct isSquareDist +{ + bool operator()() { return false; } +}; + + +template +struct isSquareDist, ElementType> +{ + bool operator()() { return true; } +}; + +template +struct isSquareDist, ElementType> +{ + bool operator()() { return true; } +}; + + +template +struct isSquareDist, ElementType> +{ + bool operator()() { return true; } +}; + +template +struct isSquareDist, ElementType> +{ + bool operator()() { return true; } +}; + +template +struct isSquareDist, ElementType> +{ + bool operator()() { return true; } +}; + + +template +bool isSquareDistance() +{ + typedef typename Distance::ElementType ElementType; + + isSquareDist dummy; + return dummy(); +} + /* * ...and a template to ensure the user that he will process the normal distance, * and not squared distance, without losing processing time calling sqrt(ensureSquareDistance) diff --git a/modules/flann/include/opencv2/flann/flann_base.hpp b/modules/flann/include/opencv2/flann/flann_base.hpp index 641fdb01e2..0f23930024 100644 --- a/modules/flann/include/opencv2/flann/flann_base.hpp +++ b/modules/flann/include/opencv2/flann/flann_base.hpp @@ -282,7 +282,7 @@ private: * of the form (branching-1)*K+1 smaller than clusters.rows). */ template -int hierarchicalClustering(const Matrix& points, Matrix& centers, +int hierarchicalClustering(const Matrix& points, Matrix& centers, const KMeansIndexParams& params, Distance d = Distance()) { KMeansIndex kmeans(points, params, d); diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h index e290fc5a26..a50e0cdf8d 100644 --- a/modules/flann/include/opencv2/flann/kmeans_index.h +++ b/modules/flann/include/opencv2/flann/kmeans_index.h @@ -49,6 +49,8 @@ #include "saving.h" #include "logger.h" +#define BITS_PER_CHAR 8 + namespace cvflann { @@ -83,6 +85,10 @@ class KMeansIndex : public NNIndex public: typedef typename Distance::ElementType ElementType; typedef typename Distance::ResultType DistanceType; + typedef typename Distance::CentersType CentersType; + + typedef typename Distance::is_kdtree_distance is_kdtree_distance; + typedef typename Distance::is_vector_space_distance is_vector_space_distance; @@ -272,12 +278,14 @@ public: return FLANN_INDEX_KMEANS; } + template class KMeansDistanceComputer : public cv::ParallelLoopBody { public: KMeansDistanceComputer(Distance _distance, const Matrix& _dataset, - const int _branching, const int* _indices, const Matrix& _dcenters, const size_t _veclen, - std::vector &_new_centroids, std::vector &_sq_dists) + const int _branching, const int* _indices, const CentersContainerType& _dcenters, + const size_t _veclen, std::vector &_new_centroids, + std::vector &_sq_dists) : distance(_distance) , dataset(_dataset) , branching(_branching) @@ -315,7 +323,7 @@ public: const Matrix& dataset; const int branching; const int* indices; - const Matrix& dcenters; + const CentersContainerType& dcenters; const size_t veclen; std::vector &new_centroids; std::vector &sq_dists; @@ -429,8 +437,16 @@ public: root_ = pool_.allocate(); std::memset(root_, 0, sizeof(KMeansNode)); - computeNodeStatistics(root_, indices_, (int)size_); - computeClustering(root_, indices_, (int)size_, branching_,0); + if(is_kdtree_distance::val || is_vector_space_distance::val) + { + computeNodeStatistics(root_, indices_, (unsigned int)size_); + computeClustering(root_, indices_, (int)size_, branching_,0); + } + else + { + computeBitfieldNodeStatistics(root_, indices_, (unsigned int)size_); + computeBitfieldClustering(root_, indices_, (int)size_, branching_,0); + } } @@ -515,7 +531,7 @@ public: * numClusters = number of clusters to have in the clustering computed * Returns: number of cluster centers */ - int getClusterCenters(Matrix& centers) + int getClusterCenters(Matrix& centers) { int numClusters = centers.rows; if (numClusters<1) { @@ -530,7 +546,7 @@ public: Logger::info("Clusters requested: %d, returning %d\n",numClusters, clusterCount); for (int i=0; ipivot; + CentersType* center = clusters[i]->pivot; for (size_t j=0; j(); load_value(stream, *node); - node->pivot = new DistanceType[veclen_]; + node->pivot = new CentersType[veclen_]; load_value(stream, *(node->pivot), (int)veclen_); if (node->childs==NULL) { int indices_offset; @@ -652,32 +668,31 @@ private: * indices = array of indices of the points belonging to the node * indices_length = number of indices in the array */ - void computeNodeStatistics(KMeansNodePtr node, int* indices, int indices_length) + void computeNodeStatistics(KMeansNodePtr node, int* indices, unsigned int indices_length) { - - DistanceType radius = 0; DistanceType variance = 0; - DistanceType* mean = new DistanceType[veclen_]; - memoryCounter_ += int(veclen_*sizeof(DistanceType)); + CentersType* mean = new CentersType[veclen_]; + memoryCounter_ += int(veclen_*sizeof(CentersType)); - memset(mean,0,veclen_*sizeof(DistanceType)); + memset(mean,0,veclen_*sizeof(CentersType)); - for (int i=0; i(), veclen_); } + float length = static_cast(indices_length); for (size_t j=0; j( mean[j] / static_cast(indices_length) ); } - variance /= indices_length; + variance /= static_cast( length ); variance -= distance_(mean, ZeroIterator(), veclen_); - DistanceType tmp = 0; - for (int i=0; iradius) { radius = tmp; } @@ -689,6 +704,70 @@ private: } + void computeBitfieldNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length) + { + const unsigned int accumulator_veclen = static_cast( + veclen_*sizeof(CentersType)*BITS_PER_CHAR); + + unsigned long long variance = 0ull; + CentersType* mean = new CentersType[veclen_]; + memoryCounter_ += int(veclen_*sizeof(CentersType)); + unsigned int* mean_accumulator = new unsigned int[accumulator_veclen]; + + memset(mean_accumulator, 0, accumulator_veclen); + + for (unsigned int i=0; i( ensureSquareDistance( + distance_(dataset_[indices[i]], ZeroIterator(), veclen_))); + unsigned char* vec = (unsigned char*)dataset_[indices[i]]; + for (size_t k=0, l=0; k>1) & 0x01; + mean_accumulator[k+2] += (vec[l]>>2) & 0x01; + mean_accumulator[k+3] += (vec[l]>>3) & 0x01; + mean_accumulator[k+4] += (vec[l]>>4) & 0x01; + mean_accumulator[k+5] += (vec[l]>>5) & 0x01; + mean_accumulator[k+6] += (vec[l]>>6) & 0x01; + mean_accumulator[k+7] += (vec[l]>>7) & 0x01; + } + } + double cnt = static_cast(indices_length); + unsigned char* char_mean = (unsigned char*)mean; + for (size_t k=0, l=0; k( + (((int)(0.5 + (double)(mean_accumulator[k]) / cnt))) + | (((int)(0.5 + (double)(mean_accumulator[k+1]) / cnt))<<1) + | (((int)(0.5 + (double)(mean_accumulator[k+2]) / cnt))<<2) + | (((int)(0.5 + (double)(mean_accumulator[k+3]) / cnt))<<3) + | (((int)(0.5 + (double)(mean_accumulator[k+4]) / cnt))<<4) + | (((int)(0.5 + (double)(mean_accumulator[k+5]) / cnt))<<5) + | (((int)(0.5 + (double)(mean_accumulator[k+6]) / cnt))<<6) + | (((int)(0.5 + (double)(mean_accumulator[k+7]) / cnt))<<7)); + } + variance = static_cast( + 0.5 + static_cast(variance) / static_cast(indices_length)); + variance -= static_cast( + ensureSquareDistance( + distance_(mean, ZeroIterator(), veclen_))); + + DistanceType radius = 0; + for (unsigned int i=0; iradius) { + radius = tmp; + } + } + + node->variance = static_cast(variance); + node->radius = radius; + node->pivot = mean; + + delete[] mean_accumulator; + } + + + /** * The method responsible with actually doing the recursive hierarchical * clustering @@ -737,7 +816,6 @@ private: cv::AutoBuffer belongs_to_buf(indices_length); int* belongs_to = belongs_to_buf.data(); for (int i=0; i sq_dists(indices_length); // reassign points to clusters - KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); + KMeansDistanceComputer > invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); parallel_for_(cv::Range(0, (int)indices_length), invoker); for (int i=0; i < (int)indices_length; ++i) { @@ -834,13 +912,13 @@ private: } - DistanceType** centers = new DistanceType*[branching]; + CentersType** centers = new CentersType*[branching]; for (int i=0; i(), veclen_); variance += d; - mean_radius += sqrt(d); + mean_radius += static_cast( sqrt(d) ); std::swap(indices[i],indices[end]); std::swap(belongs_to[i],belongs_to[end]); end++; @@ -883,6 +961,204 @@ private: + void computeBitfieldClustering(KMeansNodePtr node, int* indices, + int indices_length, int branching, int level) + { + node->size = indices_length; + node->level = level; + + if (indices_length < branching) { + node->indices = indices; + std::sort(node->indices,node->indices+indices_length); + node->childs = NULL; + return; + } + + cv::AutoBuffer centers_idx_buf(branching); + int* centers_idx = centers_idx_buf.data(); + int centers_length; + (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length); + + if (centers_lengthindices = indices; + std::sort(node->indices,node->indices+indices_length); + node->childs = NULL; + return; + } + + const unsigned int accumulator_veclen = static_cast( + veclen_*sizeof(ElementType)*BITS_PER_CHAR); + cv::AutoBuffer dcenters_buf(branching*accumulator_veclen); + Matrix dcenters(dcenters_buf.data(), branching, accumulator_veclen); + + CentersType** centers = new CentersType*[branching]; + + for (int i=0; i radiuses(branching); + cv::AutoBuffer count_buf(branching); + int* count = count_buf.data(); + for (int i=0; i belongs_to_buf(indices_length); + int* belongs_to = belongs_to_buf.data(); + for (int i=0; inew_dist) { + belongs_to[i] = j; + dist = new_dist; + } + } + if (dist>radiuses[belongs_to[i]]) { + radiuses[belongs_to[i]] = dist; + } + count[belongs_to[i]]++; + } + + bool converged = false; + int iteration = 0; + while (!converged && iteration>1) & 0x01; + dcenter[k+2] += (vec[l]>>2) & 0x01; + dcenter[k+3] += (vec[l]>>3) & 0x01; + dcenter[k+4] += (vec[l]>>4) & 0x01; + dcenter[k+5] += (vec[l]>>5) & 0x01; + dcenter[k+6] += (vec[l]>>6) & 0x01; + dcenter[k+7] += (vec[l]>>7) & 0x01; + } + } + for (int i=0; i(count[i]); + unsigned int* dcenter = dcenters[i]; + unsigned char* charCenter = (unsigned char*)centers[i]; + for (size_t k=0, l=0; k( + (((int)(0.5 + (double)(dcenter[k]) / cnt))) + | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1) + | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2) + | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3) + | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4) + | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5) + | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6) + | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7)); + } + } + + std::vector new_centroids(indices_length); + std::vector dists(indices_length); + + // reassign points to clusters + KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists); + parallel_for_(cv::Range(0, (int)indices_length), invoker); + + for (int i=0; i < indices_length; ++i) { + DistanceType dist(dists[i]); + int new_centroid(new_centroids[i]); + if (dist > radiuses[new_centroid]) { + radiuses[new_centroid] = dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + + for (int i=0; ichilds = pool_.allocate(branching); + int start = 0; + int end = start; + for (int c=0; c(), veclen_); + variance += static_cast( ensureSquareDistance(d) ); + mean_radius += ensureSimpleDistance(d); + std::swap(indices[i],indices[end]); + std::swap(belongs_to[i],belongs_to[end]); + end++; + } + } + mean_radius = static_cast( + 0.5f + static_cast(mean_radius) / static_cast(s)); + variance = static_cast( + 0.5 + static_cast(variance) / static_cast(s)); + variance -= static_cast( + ensureSquareDistance( + distance_(centers[c], ZeroIterator(), veclen_))); + + node->childs[c] = pool_.allocate(); + std::memset(node->childs[c], 0, sizeof(KMeansNode)); + node->childs[c]->radius = radiuses[c]; + node->childs[c]->pivot = centers[c]; + node->childs[c]->variance = static_cast(variance); + node->childs[c]->mean_radius = mean_radius; + computeBitfieldClustering(node->childs[c],indices+start, end-start, branching, level+1); + start=end; + } + + delete[] centers; + } + + + + /** * Performs one descent in the hierarchical k-means tree. The branches not * visited are stored in a priority queue. @@ -905,12 +1181,16 @@ private: DistanceType rsq = node->radius; DistanceType wsq = result.worstDist(); - DistanceType val = bsq-rsq-wsq; - DistanceType val2 = val*val-4*rsq*wsq; - - //if (val>0) { - if ((val>0)&&(val2>0)) { - return; + if (isSquareDistance()) + { + DistanceType val = bsq-rsq-wsq; + if ((val>0) && (val*val > 4*rsq*wsq)) + return; + } + else + { + if (bsq-rsq > wsq) + return; } } @@ -956,7 +1236,8 @@ private: // float* best_center = node->childs[best_index]->pivot; for (int i=0; ichilds[i]->variance; + domain_distances[i] -= cvflann::round( + cb_index_*node->childs[i]->variance ); // float dist_to_border = getDistanceToBorder(node.childs[i].pivot,best_center,q); // if (domain_distances[i]radius; DistanceType wsq = result.worstDist(); - DistanceType val = bsq-rsq-wsq; - DistanceType val2 = val*val-4*rsq*wsq; - - // if (val>0) { - if ((val>0)&&(val2>0)) { - return; + if (isSquareDistance()) + { + DistanceType val = bsq-rsq-wsq; + if ((val>0) && (val*val > 4*rsq*wsq)) + return; + } + else + { + if (bsq-rsq > wsq) + return; } } @@ -1024,7 +1309,8 @@ private: DistanceType dist = distance_(q, node->childs[i]->pivot, veclen_); int j=0; - while (domain_distances[j]j; --k) { domain_distances[k] = domain_distances[k-1]; sort_indices[k] = sort_indices[k-1]; From 36da867caf32ed9628673aa1a79b441c5e1c83d6 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 13 Jul 2020 17:15:41 +0000 Subject: [PATCH 10/10] features2d: v_fma => v_muladd for integers --- modules/features2d/src/sift.simd.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp index fefed638c5..9b869f25c9 100644 --- a/modules/features2d/src/sift.simd.hpp +++ b/modules/features2d/src/sift.simd.hpp @@ -656,7 +656,7 @@ void calcSIFTDescriptor( v_float32 v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; v_float32 v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; - v_int32 idx = v_fma(v_fma(r0+__1, __d_plus_2, c0+__1), __n_plus_2, o0); + v_int32 idx = v_muladd(v_muladd(r0+__1, __d_plus_2, c0+__1), __n_plus_2, o0); v_store_aligned(idx_buf, idx); v_store_aligned(rco_buf, v_rco000);