From 2241bfb0dfa788e0b516a7da9a1f70733f5431d3 Mon Sep 17 00:00:00 2001
From: Namgoo Lee <namgoo.lee@cognex.com>
Date: Thu, 30 Jul 2020 01:03:34 +0900
Subject: [PATCH 01/12] Use "src" not "*this" for source GpuMat

---
 modules/core/src/cuda/gpu_mat.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu
index e1b0c1b22d..f31f78a87a 100644
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@@ -561,7 +561,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) co
         {convertToNoScale<double, uchar>, convertToNoScale<double, schar>, convertToNoScale<double, ushort>, convertToNoScale<double, short>, convertToNoScale<double, int>, convertToNoScale<double, float>, 0}
     };
 
-    funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream);
+    funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), stream);
 }
 
 void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const
@@ -591,7 +591,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
         {convertToScale<double, uchar>, convertToScale<double, schar>, convertToScale<double, ushort>, convertToScale<double, short>, convertToScale<double, int>, convertToScale<double, float>, convertToScale<double, double>}
     };
 
-    funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
+    funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), alpha, beta, stream);
 }
 
 void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)

From 11ac26bfb43ad6e10c2bd50bbba0489e2bc26a79 Mon Sep 17 00:00:00 2001
From: Namgoo Lee <namgoo.lee@cognex.com>
Date: Thu, 30 Jul 2020 01:24:25 +0900
Subject: [PATCH 02/12] test code

---
 modules/cudaarithm/test/test_gpumat.cpp | 59 +++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/modules/cudaarithm/test/test_gpumat.cpp b/modules/cudaarithm/test/test_gpumat.cpp
index e2fed16ad5..b4d59b1644 100644
--- a/modules/cudaarithm/test/test_gpumat.cpp
+++ b/modules/cudaarithm/test/test_gpumat.cpp
@@ -320,6 +320,65 @@ CUDA_TEST_P(GpuMat_ConvertTo, WithScaling)
     }
 }
 
+CUDA_TEST_P(GpuMat_ConvertTo, InplaceWithOutScaling)
+{
+    cv::Mat src = randomMat(size, depth1);
+
+    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::cuda::GpuMat d_srcDst = loadMat(src);
+            d_srcDst.convertTo(d_srcDst, depth2);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::cuda::GpuMat d_srcDst = loadMat(src, useRoi);
+        d_srcDst.convertTo(d_srcDst, depth2);
+
+        cv::Mat dst_gold;
+        src.convertTo(dst_gold, depth2);
+
+        EXPECT_MAT_NEAR(dst_gold, d_srcDst, depth2 < CV_32F ? 1.0 : 1e-4);
+    }
+}
+
+
+CUDA_TEST_P(GpuMat_ConvertTo, InplaceWithScaling)
+{
+    cv::Mat src = randomMat(size, depth1);
+    double a = randomDouble(0.0, 1.0);
+    double b = randomDouble(-10.0, 10.0);
+
+    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::cuda::GpuMat d_srcDst = loadMat(src);
+            d_srcDst.convertTo(d_srcDst, depth2, a, b);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::cuda::GpuMat d_srcDst = loadMat(src, useRoi);
+        d_srcDst.convertTo(d_srcDst, depth2, a, b);
+
+        cv::Mat dst_gold;
+        src.convertTo(dst_gold, depth2, a, b);
+
+        EXPECT_MAT_NEAR(dst_gold, d_srcDst, depth2 < CV_32F ? 1.0 : 1e-4);
+    }
+}
+
 INSTANTIATE_TEST_CASE_P(CUDA, GpuMat_ConvertTo, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,

From 6226ea00859eb49b8b83ea80db42aa6a2f58fa9a Mon Sep 17 00:00:00 2001
From: Liubov Batanina <piccione-mail@yandex.ru>
Date: Thu, 6 Aug 2020 15:47:34 +0300
Subject: [PATCH 03/12] Fix bug in ONNX Gather op

---
 modules/dnn/src/onnx/onnx_importer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 407dcdc570..f6dc285fad 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -1395,6 +1395,7 @@ void ONNXImporter::populateNet(Net dstNet)
 
                     inpShape.erase(inpShape.begin() + axis);
                     layerParams.type = "Reshape";
+                    layerParams.set("axis", 0);
                     layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
                     node_proto.set_input(0, sliceLp.name);
                 }

From fe9ff64d641be19ce38fd3c36489615704b339c2 Mon Sep 17 00:00:00 2001
From: pemmanuelviel <p.emmanuel.viel@gmail.com>
Date: Mon, 10 Aug 2020 15:26:40 +0200
Subject: [PATCH 04/12] Merge pull request #17643 from
 pemmanuelviel:pev--new-flann-demo

* Add a FLANN example showing how to search a query image in a dataset

* Clean: remove warning

* Replace dependency to boost::filesystem by calls to core/utils/filesystem

* Wait for escape key to exit

* Add an example of binary descriptors support

* Add program options for saving and loading the flann structure

* Fix warnings on Win64

* Fix warnings on 3.4 branch still relying on C++03

* Add ctor to img_info structure

* Comments modification

* * Demo file of FLANN moved and renamed

* Fix distances type when using binary vectors in the FLANN example

* Rename FLANN example file

* Remove dependency of the flann example to opencv_contrib's SURF.

* Remove mention of FLANN and other descriptors that aimed at giving hint on the other options

* Cleaner program options management

* Make waitKey usage minimal in FLANN example

* Fix the conditions order

* Use cv::Ptr
---
 samples/cpp/flann_search_dataset.cpp | 250 +++++++++++++++++++++++++++
 1 file changed, 250 insertions(+)
 create mode 100644 samples/cpp/flann_search_dataset.cpp

diff --git a/samples/cpp/flann_search_dataset.cpp b/samples/cpp/flann_search_dataset.cpp
new file mode 100644
index 0000000000..01ef93f821
--- /dev/null
+++ b/samples/cpp/flann_search_dataset.cpp
@@ -0,0 +1,250 @@
+// flann_search_dataset.cpp
+// Naive program to search a query picture in a dataset illustrating usage of FLANN
+
+#include <iostream>
+#include <vector>
+#include "opencv2/core.hpp"
+#include "opencv2/core/utils/filesystem.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/flann.hpp"
+
+using namespace cv;
+using std::cout;
+using std::endl;
+
+#define _ORB_
+
+const char* keys =
+    "{ help h | | Print help message. }"
+    "{ dataset | | Path to the images folder used as dataset. }"
+    "{ image |   | Path to the image to search for in the dataset. }"
+    "{ save |    | Path and filename where to save the flann structure to. }"
+    "{ load |    | Path and filename where to load the flann structure from. }";
+
+struct img_info {
+    int img_index;
+    unsigned int nbr_of_matches;
+
+    img_info(int _img_index, unsigned int _nbr_of_matches)
+        : img_index(_img_index)
+        , nbr_of_matches(_nbr_of_matches)
+    {}
+};
+
+
+int main( int argc, char* argv[] )
+{
+    //-- Test the program options
+    CommandLineParser parser( argc, argv, keys );
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return -1;
+    }
+
+    const cv::String img_path = parser.get<String>("image");
+    Mat img = imread( samples::findFile( img_path ), IMREAD_GRAYSCALE );
+    if (img.empty() )
+    {
+        cout << "Could not open the image "<< img_path << endl;
+        return -1;
+    }
+
+    const cv::String db_path = parser.get<String>("dataset");
+    if (!utils::fs::isDirectory(db_path))
+    {
+        cout << "Dataset folder "<< db_path.c_str() <<" doesn't exist!" << endl;
+        return -1;
+    }
+
+    const cv::String load_db_path = parser.get<String>("load");
+    if ((load_db_path != String()) && (!utils::fs::exists(load_db_path)))
+    {
+        cout << "File " << load_db_path.c_str()
+             << " where to load the flann structure from doesn't exist!" << endl;
+        return -1;
+    }
+
+    const cv::String save_db_path = parser.get<String>("save");
+
+    //-- Step 1: Detect the keypoints using a detector, compute the descriptors
+    //   in the folder containing the images of the dataset
+#ifdef _SIFT_
+    int minHessian = 400;
+    Ptr<Feature2D> detector = SIFT::create( minHessian );
+#elif defined(_ORB_)
+    Ptr<Feature2D> detector = ORB::create();
+#else
+    cout << "Missing or unknown defined descriptor. "
+            "Only SIFT and ORB are currently interfaced here" << endl;
+    return -1;
+#endif
+
+    std::vector<KeyPoint> db_keypoints;
+    Mat db_descriptors;
+    std::vector<unsigned int> db_images_indice_range; //store the range of indices per image
+    std::vector<int> db_indice_2_image_lut;           //match descriptor indice to its image
+
+    db_images_indice_range.push_back(0);
+    std::vector<cv::String> files;
+    utils::fs::glob(db_path, cv::String(), files);
+    for (std::vector<cv::String>::iterator itr = files.begin(); itr != files.end(); ++itr)
+    {
+        Mat tmp_img = imread( *itr, IMREAD_GRAYSCALE );
+        if (!tmp_img.empty())
+        {
+            std::vector<KeyPoint> kpts;
+            Mat descriptors;
+            detector->detectAndCompute( tmp_img, noArray(), kpts, descriptors );
+
+            db_keypoints.insert( db_keypoints.end(), kpts.begin(), kpts.end() );
+            db_descriptors.push_back( descriptors );
+            db_images_indice_range.push_back( db_images_indice_range.back()
+                                              + static_cast<unsigned int>(kpts.size()) );
+        }
+    }
+
+    //-- Set the LUT
+    db_indice_2_image_lut.resize( db_images_indice_range.back() );
+    const int nbr_of_imgs = static_cast<int>( db_images_indice_range.size()-1 );
+    for (int i = 0; i < nbr_of_imgs; ++i)
+    {
+        const unsigned int first_indice = db_images_indice_range[i];
+        const unsigned int last_indice = db_images_indice_range[i+1];
+        std::fill( db_indice_2_image_lut.begin() + first_indice,
+                   db_indice_2_image_lut.begin() + last_indice,
+                   i );
+    }
+
+    //-- Step 2: build the structure storing the descriptors
+#if defined(_SIFT_)
+    cv::Ptr<flann::GenericIndex<cvflann::L2<float> > > index;
+    if (load_db_path != String())
+        index = cv::makePtr<flann::GenericIndex<cvflann::L2<float> > >(db_descriptors,
+                                                             cvflann::SavedIndexParams(load_db_path));
+    else
+        index = cv::makePtr<flann::GenericIndex<cvflann::L2<float> > >(db_descriptors,
+                                                             cvflann::KDTreeIndexParams(4));
+
+#elif defined(_ORB_)
+    cv::Ptr<flann::GenericIndex<cvflann::Hamming<unsigned char> > > index;
+    if (load_db_path != String())
+        index  = cv::makePtr<flann::GenericIndex<cvflann::Hamming<unsigned char> > >
+                (db_descriptors, cvflann::SavedIndexParams(load_db_path));
+    else
+        index  = cv::makePtr<flann::GenericIndex<cvflann::Hamming<unsigned char> > >
+                (db_descriptors, cvflann::LshIndexParams());
+#else
+    cout<< "Descriptor not listed. Set the proper FLANN distance for this descriptor" <<endl;
+    return -1;
+#endif
+    if (save_db_path != String())
+        index->save(save_db_path);
+
+
+    // Return if no query image was set
+    if (img_path == String())
+        return 0;
+
+    //-- Detect the keypoints and compute the descriptors for the query image
+    std::vector<KeyPoint> img_keypoints;
+    Mat img_descriptors;
+    detector->detectAndCompute( img, noArray(), img_keypoints, img_descriptors );
+
+
+    //-- Step 3: retrieve the descriptors in the dataset matching the ones of the query image
+    // /!\ knnSearch doesn't follow OpenCV standards by not initialising empty Mat properties
+    const int knn = 2;
+    Mat indices(img_descriptors.rows, knn, CV_32S);
+#if defined(_SIFT_)
+#define DIST_TYPE float
+    Mat dists(img_descriptors.rows, knn, CV_32F);
+#elif defined(_ORB_)
+#define DIST_TYPE int
+    Mat dists(img_descriptors.rows, knn, CV_32S);
+#endif
+    index->knnSearch( img_descriptors, indices, dists, knn, cvflann::SearchParams(32) );
+
+    //-- Filter matches using the Lowe's ratio test
+    const float ratio_thresh = 0.7f;
+    std::vector<DMatch> good_matches; //contains
+    std::vector<unsigned int> matches_per_img_histogram( nbr_of_imgs, 0 );
+    for (int i = 0; i < dists.rows; ++i)
+    {
+        if (dists.at<DIST_TYPE>(i,0) < ratio_thresh * dists.at<DIST_TYPE>(i,1))
+        {
+            const int indice_in_db = indices.at<int>(i,0);
+            DMatch dmatch(i, indice_in_db, db_indice_2_image_lut[indice_in_db],
+                          static_cast<float>(dists.at<DIST_TYPE>(i,0)));
+            good_matches.push_back( dmatch );
+            matches_per_img_histogram[ db_indice_2_image_lut[indice_in_db] ]++;
+        }
+    }
+
+
+    //-- Step 4: find the dataset image with the highest proportion of matches
+    std::multimap<float, img_info> images_infos;
+    for (int i = 0; i < nbr_of_imgs; ++i)
+    {
+        const unsigned int nbr_of_matches = matches_per_img_histogram[i];
+        if (nbr_of_matches < 4) //we need at leat 4 points for a homography
+            continue;
+
+        const unsigned int nbr_of_kpts = db_images_indice_range[i+1] - db_images_indice_range[i];
+        const float inverse_proportion_of_retrieved_kpts =
+                static_cast<float>(nbr_of_kpts) / static_cast<float>(nbr_of_matches);
+
+        img_info info(i, nbr_of_matches);
+        images_infos.insert( std::pair<float,img_info>(inverse_proportion_of_retrieved_kpts,
+                                                       info) );
+    }
+
+    if (images_infos.begin() == images_infos.end())
+    {
+        cout<<"No good match could be found."<<endl;
+        return 0;
+    }
+
+    //-- if there are several images with a similar proportion of matches,
+    // select the one with the highest number of matches weighted by the
+    // squared ratio of proportions
+    const float best_matches_proportion = images_infos.begin()->first;
+    float new_matches_proportion = best_matches_proportion;
+    img_info best_img = images_infos.begin()->second;
+
+    std::multimap<float, img_info>::iterator it = images_infos.begin();
+    ++it;
+    while ((it!=images_infos.end()) && (it->first < 1.1*best_matches_proportion))
+    {
+        const float ratio = new_matches_proportion / it->first;
+        if( it->second.nbr_of_matches * (ratio * ratio) > best_img.nbr_of_matches)
+        {
+            new_matches_proportion = it->first;
+            best_img = it->second;
+        }
+        ++it;
+    }
+
+    //-- Step 5: filter goodmatches that belong to the best image match of the dataset
+    std::vector<DMatch> filtered_good_matches;
+    for (std::vector<DMatch>::iterator itr(good_matches.begin()); itr != good_matches.end(); ++itr)
+    {
+        if (itr->imgIdx == best_img.img_index)
+            filtered_good_matches.push_back(*itr);
+    }
+
+    //-- Retrieve the best image match from the dataset
+    Mat db_img = imread( files[best_img.img_index], IMREAD_GRAYSCALE );
+
+    //-- Draw matches
+    Mat img_matches;
+    drawMatches( img, img_keypoints, db_img, db_keypoints, filtered_good_matches, img_matches, Scalar::all(-1),
+                 Scalar::all(-1), std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
+
+    //-- Show detected matches
+    imshow("Good Matches", img_matches );
+    waitKey();
+
+    return 0;
+}

From 7ec221e73487dde351b9add3ebf33ae607ae14ef Mon Sep 17 00:00:00 2001
From: Elizarov Ilya <48130864+ieliz@users.noreply.github.com>
Date: Tue, 11 Aug 2020 11:46:47 +0300
Subject: [PATCH 05/12] Merge pull request #18033 from ieliz:dasiamrpn

Improving DaSiamRPN tracker sample

* changed layerBlobs in dnn.cpp and added DaSiamRPN tracker

* Improving DaSiamRPN tracker sample

* Docs fix

* Removed outdated changes

* Trying to reinitialize tracker without reloading models. Worked with LaSOT-based benchmark with reinit rate=250 frames

* Trying to reverse changes

* Moving the model in the constructor

* Fixing some issues with names

* Variable name changed

* Reverse parser arguments changes
---
 samples/dnn/dasiamrpn_tracker.py | 122 +++++++++++++++++--------------
 1 file changed, 66 insertions(+), 56 deletions(-)

diff --git a/samples/dnn/dasiamrpn_tracker.py b/samples/dnn/dasiamrpn_tracker.py
index df734645db..03e99d6dbf 100644
--- a/samples/dnn/dasiamrpn_tracker.py
+++ b/samples/dnn/dasiamrpn_tracker.py
@@ -14,8 +14,8 @@ import argparse
 import sys
 
 class DaSiamRPNTracker:
-    #initialization of used values, initial bounding box, used network
-    def __init__(self, im, target_pos, target_sz, net, kernel_r1, kernel_cls1):
+    # Initialization of used values, initial bounding box, used network
+    def __init__(self, net="dasiamrpn_model.onnx", kernel_r1="dasiamrpn_kernel_r1.onnx", kernel_cls1="dasiamrpn_kernel_cls1.onnx"):
         self.windowing = "cosine"
         self.exemplar_size = 127
         self.instance_size = 271
@@ -28,42 +28,52 @@ class DaSiamRPNTracker:
         self.penalty_k = 0.055
         self.window_influence = 0.42
         self.lr = 0.295
-        self.im_h = im.shape[0]
-        self.im_w = im.shape[1]
-        self.target_pos = target_pos
-        self.target_sz = target_sz
-        self.avg_chans = np.mean(im, axis=(0, 1))
-        self.net = net
         self.score = []
-
-        if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004:
-             raise AssertionError("Initializing BB is too small-try to restart tracker with larger BB")
-
-        self.anchor = self.__generate_anchor()
-        wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz)
-        hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz)
-        s_z = round(np.sqrt(wc_z * hc_z))
-
-        z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z)
-        z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32)
-        self.net.setInput(z_crop)
-        z_f = self.net.forward('63')
-        kernel_r1.setInput(z_f)
-        r1 = kernel_r1.forward()
-        kernel_cls1.setInput(z_f)
-        cls1 = kernel_cls1.forward()
-        r1 = r1.reshape(20, 256, 4, 4)
-        cls1 = cls1.reshape(10, 256 , 4, 4)
-        self.net.setParam(self.net.getLayerId('65'), 0, r1)
-        self.net.setParam(self.net.getLayerId('68'), 0, cls1)
-
         if self.windowing == "cosine":
             self.window = np.outer(np.hanning(self.score_size), np.hanning(self.score_size))
         elif self.windowing == "uniform":
             self.window = np.ones((self.score_size, self.score_size))
         self.window = np.tile(self.window.flatten(), self.anchor_num)
+        # Loading network`s and kernel`s models
+        self.net = cv.dnn.readNet(net)
+        self.kernel_r1 = cv.dnn.readNet(kernel_r1)
+        self.kernel_cls1 = cv.dnn.readNet(kernel_cls1)
 
-    #creating anchor for tracking bounding box
+    def init(self, im, init_bb):
+        target_pos, target_sz = np.array([init_bb[0], init_bb[1]]), np.array([init_bb[2], init_bb[3]])
+        self.im_h = im.shape[0]
+        self.im_w = im.shape[1]
+        self.target_pos = target_pos
+        self.target_sz = target_sz
+        self.avg_chans = np.mean(im, axis=(0, 1))
+
+        # When we trying to generate ONNX model from the pre-trained .pth model
+        # we are using only one state of the network. In our case used state
+        # with big bounding box, so we were forced to add assertion for
+        # too small bounding boxes - current state of the network can not
+        # work properly with such small bounding boxes
+        if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004:
+            raise AssertionError(
+        "Initializing BB is too small-try to restart tracker with larger BB")
+
+        self.anchor = self.__generate_anchor()
+        wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz)
+        hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz)
+        s_z = round(np.sqrt(wc_z * hc_z))
+        z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z)
+        z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32)
+        self.net.setInput(z_crop)
+        z_f = self.net.forward('63')
+        self.kernel_r1.setInput(z_f)
+        r1 = self.kernel_r1.forward()
+        self.kernel_cls1.setInput(z_f)
+        cls1 = self.kernel_cls1.forward()
+        r1 = r1.reshape(20, 256, 4, 4)
+        cls1 = cls1.reshape(10, 256 , 4, 4)
+        self.net.setParam(self.net.getLayerId('65'), 0, r1)
+        self.net.setParam(self.net.getLayerId('68'), 0, cls1)
+
+    # Сreating anchor for tracking bounding box
     def __generate_anchor(self):
         self.anchor = np.zeros((self.anchor_num, 4),  dtype = np.float32)
         size = self.total_stride * self.total_stride
@@ -86,8 +96,8 @@ class DaSiamRPNTracker:
         self.anchor[:, 0], self.anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
         return self.anchor
 
-    #track function
-    def track(self, im):
+    # Function for updating tracker state
+    def update(self, im):
         wc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz)
         hc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz)
         s_z = np.sqrt(wc_z * hc_z)
@@ -96,7 +106,7 @@ class DaSiamRPNTracker:
         pad = d_search / scale_z
         s_x = round(s_z + 2 * pad)
 
-        #region preprocessing
+        # Region preprocessing part
         x_crop = self.__get_subwindow_tracking(im, self.instance_size, s_x)
         x_crop = x_crop.transpose(2, 0, 1).reshape(1, 3, 271, 271).astype(np.float32)
         self.score = self.__tracker_eval(x_crop, scale_z)
@@ -105,7 +115,12 @@ class DaSiamRPNTracker:
         self.target_sz[0] = max(10, min(self.im_w, self.target_sz[0]))
         self.target_sz[1] = max(10, min(self.im_h, self.target_sz[1]))
 
-    #update bounding box position
+        cx, cy = self.target_pos
+        w, h = self.target_sz
+        updated_bb = (cx, cy, w, h)
+        return True, updated_bb
+
+    # Function for updating position of the bounding box
     def __tracker_eval(self, x_crop, scale_z):
         target_size = self.target_sz * scale_z
         self.net.setInput(x_crop)
@@ -160,7 +175,7 @@ class DaSiamRPNTracker:
         y = e_x / e_x.sum(axis = 0)
         return y
 
-    #evaluations with cropped image
+    # Reshaping cropped image for using in the model
     def __get_subwindow_tracking(self, im, model_size, original_sz):
         im_sz = im.shape
         c = (original_sz + 1) / 2
@@ -171,19 +186,20 @@ class DaSiamRPNTracker:
         left_pad = int(max(0., -context_xmin))
         top_pad = int(max(0., -context_ymin))
         right_pad = int(max(0., context_xmax - im_sz[1] + 1))
-        bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
+        bot_pad = int(max(0., context_ymax - im_sz[0] + 1))
         context_xmin += left_pad
         context_xmax += left_pad
         context_ymin += top_pad
         context_ymax += top_pad
         r, c, k = im.shape
 
-        if any([top_pad, bottom_pad, left_pad, right_pad]):
-            te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8)
+        if any([top_pad, bot_pad, left_pad, right_pad]):
+            te_im = np.zeros((
+                r + top_pad + bot_pad, c + left_pad + right_pad, k), np.uint8)
             te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
             if top_pad:
                 te_im[0:top_pad, left_pad:left_pad + c, :] = self.avg_chans
-            if bottom_pad:
+            if bot_pad:
                 te_im[r + top_pad:, left_pad:left_pad + c, :] = self.avg_chans
             if left_pad:
                 te_im[:, 0:left_pad, :] = self.avg_chans
@@ -195,23 +211,22 @@ class DaSiamRPNTracker:
 
         if not np.array_equal(model_size, original_sz):
             im_patch_original = cv.resize(im_patch_original, (model_size, model_size))
-
         return im_patch_original
 
-#function for reading paths, bounding box drawing, showing results
+# Sample for using DaSiamRPN tracker
 def main():
     parser = argparse.ArgumentParser(description="Run tracker")
+    parser.add_argument("--input", type=str, help="Full path to input (empty for camera)")
     parser.add_argument("--net", type=str, default="dasiamrpn_model.onnx", help="Full path to onnx model of net")
     parser.add_argument("--kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Full path to onnx model of kernel_r1")
     parser.add_argument("--kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Full path to onnx model of kernel_cls1")
-    parser.add_argument("--input", type=str, help="Full path to input. Do not use if input is camera")
     args = parser.parse_args()
     point1 = ()
     point2 = ()
     mark = True
     drawing = False
     cx, cy, w, h = 0.0, 0.0, 0, 0
-
+    # Fucntion for drawing during videostream
     def get_bb(event, x, y, flag, param):
         nonlocal point1, point2, cx, cy, w, h, drawing, mark
 
@@ -233,12 +248,7 @@ def main():
             h = abs(point1[1] - point2[1])
             mark = False
 
-    #loading network`s and kernel`s models
-    net = cv.dnn.readNet(args.net)
-    kernel_r1 = cv.dnn.readNet(args.kernel_r1)
-    kernel_cls1 = cv.dnn.readNet(args.kernel_cls1)
-
-    #initializing bounding box
+    # Creating window for visualization
     cap = cv.VideoCapture(args.input if args.input else 0)
     cv.namedWindow("DaSiamRPN")
     cv.setMouseCallback("DaSiamRPN", get_bb)
@@ -257,17 +267,17 @@ def main():
         cv.imshow("DaSiamRPN", twin)
         cv.waitKey(40)
 
-    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
-    tracker = DaSiamRPNTracker(frame, target_pos, target_sz, net, kernel_r1, kernel_cls1)
+    init_bb = (cx, cy, w, h)
+    tracker = DaSiamRPNTracker(args.net, args.kernel_r1, args.kernel_cls1)
+    tracker.init(frame, init_bb)
 
-    #tracking loop
+    # Tracking loop
     while cap.isOpened():
         has_frame, frame = cap.read()
         if not has_frame:
             sys.exit(0)
-        tracker.track(frame)
-        w, h = tracker.target_sz
-        cx, cy = tracker.target_pos
+        _, new_bb = tracker.update(frame)
+        cx, cy, w, h = new_bb
         cv.rectangle(frame, (int(cx - w // 2), int(cy - h // 2)), (int(cx - w // 2) + int(w), int(cy - h // 2) + int(h)),(0, 255, 255), 3)
         cv.imshow("DaSiamRPN", frame)
         key = cv.waitKey(1)

From 98de57c6c459765a085f50c06982a1cb60b36505 Mon Sep 17 00:00:00 2001
From: Pierre-Emmanuel Viel <p.emmanuel.viel@gmail.com>
Date: Fri, 26 Jun 2020 23:08:04 +0200
Subject: [PATCH 06/12] Refactoring to prepare for other vector types while
 mutualizing some methods

---
 .../include/opencv2/flann/kmeans_index.h      | 781 ++++++++++--------
 1 file changed, 414 insertions(+), 367 deletions(-)

diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h
index a823986e09..98ec68a87b 100644
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@@ -463,14 +463,10 @@ public:
             root_[i] = pool_.allocate<KMeansNode>();
             std::memset(root_[i], 0, sizeof(KMeansNode));
 
-            if(is_kdtree_distance::val || is_vector_space_distance::val) {
-                computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_);
-                computeClustering(root_[i], indices_[i], (int)size_, branching_,0);
-            }
-            else {
-                computeBitfieldNodeStatistics(root_[i], indices_[i], (unsigned int)size_);
-                computeBitfieldClustering(root_[i], indices_[i], (int)size_, branching_,0);
-            }
+            Distance* dummy = NULL;
+            computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_, dummy);
+
+            computeClustering(root_[i], indices_[i], (int)size_, branching_,0);
         }
     }
 
@@ -829,6 +825,413 @@ private:
     }
 
 
+    template<typename DistType>
+    void computeNodeStatistics(KMeansNodePtr node, int* indices,
+                               unsigned int indices_length,
+                               const DistType* identifier)
+    {
+        (void)identifier;
+        computeNodeStatistics(node, indices, indices_length);
+    }
+
+    void computeNodeStatistics(KMeansNodePtr node, int* indices,
+                               unsigned int indices_length,
+                               const cvflann::HammingLUT* identifier)
+    {
+        (void)identifier;
+        computeBitfieldNodeStatistics(node, indices, indices_length);
+    }
+
+    void computeNodeStatistics(KMeansNodePtr node, int* indices,
+                               unsigned int indices_length,
+                               const cvflann::Hamming<unsigned char>* identifier)
+    {
+        (void)identifier;
+        computeBitfieldNodeStatistics(node, indices, indices_length);
+    }
+
+    void computeNodeStatistics(KMeansNodePtr node, int* indices,
+                               unsigned int indices_length,
+                               const cvflann::Hamming2<unsigned char>* identifier)
+    {
+        (void)identifier;
+        computeBitfieldNodeStatistics(node, indices, indices_length);
+    }
+
+
+    void refineClustering(int* indices, int indices_length, int branching, CentersType** centers,
+                          std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
+    {
+        cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
+        Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
+
+        bool converged = false;
+        int iteration = 0;
+        while (!converged && iteration<iterations_) {
+            converged = true;
+            iteration++;
+
+            // compute the new cluster centers
+            for (int i=0; i<branching; ++i) {
+                memset(dcenters[i],0,sizeof(double)*veclen_);
+                radiuses[i] = 0;
+            }
+            for (int i=0; i<indices_length; ++i) {
+                ElementType* vec = dataset_[indices[i]];
+                double* center = dcenters[belongs_to[i]];
+                for (size_t k=0; k<veclen_; ++k) {
+                    center[k] += vec[k];
+                }
+            }
+            for (int i=0; i<branching; ++i) {
+                int cnt = count[i];
+                for (size_t k=0; k<veclen_; ++k) {
+                    dcenters[i][k] /= cnt;
+                }
+            }
+
+            std::vector<int> new_centroids(indices_length);
+            std::vector<DistanceType> sq_dists(indices_length);
+
+            // reassign points to clusters
+            KMeansDistanceComputer<Matrix<double> > invoker(
+                        distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
+            parallel_for_(cv::Range(0, (int)indices_length), invoker);
+
+            for (int i=0; i < (int)indices_length; ++i) {
+                DistanceType sq_dist(sq_dists[i]);
+                int new_centroid(new_centroids[i]);
+                if (sq_dist > radiuses[new_centroid]) {
+                    radiuses[new_centroid] = sq_dist;
+                }
+                if (new_centroid != belongs_to[i]) {
+                    count[belongs_to[i]]--;
+                    count[new_centroid]++;
+                    belongs_to[i] = new_centroid;
+                    converged = false;
+                }
+            }
+
+            for (int i=0; i<branching; ++i) {
+                // if one cluster converges to an empty cluster,
+                // move an element into that cluster
+                if (count[i]==0) {
+                    int j = (i+1)%branching;
+                    while (count[j]<=1) {
+                        j = (j+1)%branching;
+                    }
+
+                    for (int k=0; k<indices_length; ++k) {
+                        if (belongs_to[k]==j) {
+                            // for cluster j, we move the furthest element from the center to the empty cluster i
+                            if ( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
+                                belongs_to[k] = i;
+                                count[j]--;
+                                count[i]++;
+                                break;
+                            }
+                        }
+                    }
+                    converged = false;
+                }
+            }
+        }
+
+       for (int i=0; i<branching; ++i) {
+           centers[i] = new CentersType[veclen_];
+           memoryCounter_ += (int)(veclen_*sizeof(CentersType));
+           for (size_t k=0; k<veclen_; ++k) {
+               centers[i][k] = (CentersType)dcenters[i][k];
+           }
+       }
+    }
+
+
+    void refineBitfieldClustering(int* indices, int indices_length, int branching, CentersType** centers,
+                                  std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
+    {
+        for (int i=0; i<branching; ++i) {
+            centers[i] = new CentersType[veclen_];
+            memoryCounter_ += (int)(veclen_*sizeof(CentersType));
+        }
+
+        const unsigned int accumulator_veclen = static_cast<unsigned int>(
+                                                veclen_*sizeof(ElementType)*BITS_PER_CHAR);
+        cv::AutoBuffer<unsigned int> dcenters_buf(branching*accumulator_veclen);
+        Matrix<unsigned int> dcenters(dcenters_buf.data(), branching, accumulator_veclen);
+
+        bool converged = false;
+        int iteration = 0;
+        while (!converged && iteration<iterations_) {
+            converged = true;
+            iteration++;
+
+            // compute the new cluster centers
+            for (int i=0; i<branching; ++i) {
+                memset(dcenters[i],0,sizeof(unsigned int)*accumulator_veclen);
+                radiuses[i] = 0;
+            }
+            for (int i=0; i<indices_length; ++i) {
+                unsigned char* vec = (unsigned char*)dataset_[indices[i]];
+                unsigned int* dcenter = dcenters[belongs_to[i]];
+                for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
+                    dcenter[k]   += (vec[l])    & 0x01;
+                    dcenter[k+1] += (vec[l]>>1) & 0x01;
+                    dcenter[k+2] += (vec[l]>>2) & 0x01;
+                    dcenter[k+3] += (vec[l]>>3) & 0x01;
+                    dcenter[k+4] += (vec[l]>>4) & 0x01;
+                    dcenter[k+5] += (vec[l]>>5) & 0x01;
+                    dcenter[k+6] += (vec[l]>>6) & 0x01;
+                    dcenter[k+7] += (vec[l]>>7) & 0x01;
+                }
+            }
+            for (int i=0; i<branching; ++i) {
+                double cnt = static_cast<double>(count[i]);
+                unsigned int* dcenter = dcenters[i];
+                unsigned char* charCenter = (unsigned char*)centers[i];
+                for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
+                    charCenter[l] = static_cast<unsigned char>(
+                                      (((int)(0.5 + (double)(dcenter[k])   / cnt)))
+                                    | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1)
+                                    | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2)
+                                    | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3)
+                                    | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4)
+                                    | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5)
+                                    | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6)
+                                    | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7));
+                }
+            }
+
+            std::vector<int> new_centroids(indices_length);
+            std::vector<DistanceType> dists(indices_length);
+
+            // reassign points to clusters
+            KMeansDistanceComputer<ElementType**> invoker(
+                        distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
+            parallel_for_(cv::Range(0, (int)indices_length), invoker);
+
+            for (int i=0; i < indices_length; ++i) {
+                DistanceType dist(dists[i]);
+                int new_centroid(new_centroids[i]);
+                if (dist > radiuses[new_centroid]) {
+                    radiuses[new_centroid] = dist;
+                }
+                if (new_centroid != belongs_to[i]) {
+                    count[belongs_to[i]]--;
+                    count[new_centroid]++;
+                    belongs_to[i] = new_centroid;
+                    converged = false;
+                }
+            }
+
+            for (int i=0; i<branching; ++i) {
+                // if one cluster converges to an empty cluster,
+                // move an element into that cluster
+                if (count[i]==0) {
+                    int j = (i+1)%branching;
+                    while (count[j]<=1) {
+                        j = (j+1)%branching;
+                    }
+
+                    for (int k=0; k<indices_length; ++k) {
+                        if (belongs_to[k]==j) {
+                            // for cluster j, we move the furthest element from the center to the empty cluster i
+                            if ( distance_(dataset_[indices[k]], centers[j], veclen_) == radiuses[j] ) {
+                                belongs_to[k] = i;
+                                count[j]--;
+                                count[i]++;
+                                break;
+                            }
+                        }
+                    }
+                    converged = false;
+                }
+            }
+        }
+    }
+
+
+    void computeSubClustering(KMeansNodePtr node, int* indices, int indices_length,
+                              int branching, int level, CentersType** centers,
+                              std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
+    {
+        // compute kmeans clustering for each of the resulting clusters
+        node->childs = pool_.allocate<KMeansNodePtr>(branching);
+        int start = 0;
+        int end = start;
+        for (int c=0; c<branching; ++c) {
+            int s = count[c];
+
+            DistanceType variance = 0;
+            DistanceType mean_radius =0;
+            for (int i=0; i<indices_length; ++i) {
+                if (belongs_to[i]==c) {
+                    DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
+                    variance += d;
+                    mean_radius += static_cast<DistanceType>( sqrt(d) );
+                    std::swap(indices[i],indices[end]);
+                    std::swap(belongs_to[i],belongs_to[end]);
+                    end++;
+                }
+            }
+            variance /= s;
+            mean_radius /= s;
+            variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
+
+            node->childs[c] = pool_.allocate<KMeansNode>();
+            std::memset(node->childs[c], 0, sizeof(KMeansNode));
+            node->childs[c]->radius = radiuses[c];
+            node->childs[c]->pivot = centers[c];
+            node->childs[c]->variance = variance;
+            node->childs[c]->mean_radius = mean_radius;
+            computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
+            start=end;
+        }
+    }
+
+
+    void computeAnyBitfieldSubClustering(KMeansNodePtr node, int* indices, int indices_length,
+                              int branching, int level, CentersType** centers,
+                              std::vector<DistanceType>& radiuses, int* belongs_to, int* count)
+    {
+        // compute kmeans clustering for each of the resulting clusters
+        node->childs = pool_.allocate<KMeansNodePtr>(branching);
+        int start = 0;
+        int end = start;
+        for (int c=0; c<branching; ++c) {
+            int s = count[c];
+
+            unsigned long long variance = 0ull;
+            DistanceType mean_radius =0;
+            for (int i=0; i<indices_length; ++i) {
+                if (belongs_to[i]==c) {
+                    DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
+                    variance += static_cast<unsigned long long>( ensureSquareDistance<Distance>(d) );
+                    mean_radius += ensureSimpleDistance<Distance>(d);
+                    std::swap(indices[i],indices[end]);
+                    std::swap(belongs_to[i],belongs_to[end]);
+                    end++;
+                }
+            }
+            mean_radius = static_cast<DistanceType>(
+                        0.5f + static_cast<float>(mean_radius) / static_cast<float>(s));
+            variance = static_cast<unsigned long long>(
+                        0.5 + static_cast<double>(variance) / static_cast<double>(s));
+            variance -= static_cast<unsigned long long>(
+                        ensureSquareDistance<Distance>(
+                            distance_(centers[c], ZeroIterator<ElementType>(), veclen_)));
+
+            node->childs[c] = pool_.allocate<KMeansNode>();
+            std::memset(node->childs[c], 0, sizeof(KMeansNode));
+            node->childs[c]->radius = radiuses[c];
+            node->childs[c]->pivot = centers[c];
+            node->childs[c]->variance = static_cast<DistanceType>(variance);
+            node->childs[c]->mean_radius = mean_radius;
+            computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
+            start=end;
+        }
+    }
+
+
+    template<typename DistType>
+    void refineAndSplitClustering(
+            KMeansNodePtr node, int* indices, int indices_length, int branching,
+            int level, CentersType** centers, std::vector<DistanceType>& radiuses,
+            int* belongs_to, int* count, const DistType* identifier)
+    {
+        (void)identifier;
+        refineClustering(indices, indices_length, branching, centers, radiuses, belongs_to, count);
+
+        computeSubClustering(node, indices, indices_length, branching,
+                             level, centers, radiuses, belongs_to, count);
+    }
+
+
+    /**
+     * The methods responsible with doing the recursive hierarchical clustering on
+     * binary vectors.
+     * As some might have heared that KMeans on binary data doesn't make sense,
+     * it's worth a little explanation why it actually fairly works. As
+     * with the Hierarchical Clustering algortihm, we seed several centers for the
+     * current node by picking some of its points. Then in a first pass each point
+     * of the node is then related to its closest center. Now let's have a look at
+     * the 5 central dimensions of the 9 following points:
+     *
+     * xxxxxx11100xxxxx (1)
+     * xxxxxx11010xxxxx (2)
+     * xxxxxx11001xxxxx (3)
+     * xxxxxx10110xxxxx (4)
+     * xxxxxx10101xxxxx (5)
+     * xxxxxx10011xxxxx (6)
+     * xxxxxx01110xxxxx (7)
+     * xxxxxx01101xxxxx (8)
+     * xxxxxx01011xxxxx (9)
+     * sum   _____
+     * of 1: 66555
+     *
+     * Even if the barycenter notion doesn't apply, we can set a center
+     * xxxxxx11111xxxxx that will better fit the five dimensions we are focusing
+     * on for these points.
+     *
+     * Note that convergence isn't ensured anymore. In practice, using Gonzales
+     * as seeding algorithm should be fine for getting convergence ("iterations"
+     * value can be set to -1). But with KMeans++ seeding you should definitely
+     * set a maximum number of iterations (but make it higher than the "iterations"
+     * default value of 11).
+     *
+     * Params:
+     *     node = the node to cluster
+     *     indices = indices of the points belonging to the current node
+     *     indices_length = number of points in the current node
+     *     branching = the branching factor to use in the clustering
+     *     level = 0 for the root node, it increases with the subdivision levels
+     *     centers = clusters centers to compute
+     *     radiuses = radiuses of clusters
+     *     belongs_to = LookUp Table returning, for a given indice id, the center id it belongs to
+     *     count = array storing the number of indices for a given center id
+     *     identifier = dummy pointer on an instance of Distance (use to branch correctly among templates)
+     */
+    void refineAndSplitClustering(
+            KMeansNodePtr node, int* indices, int indices_length, int branching,
+            int level, CentersType** centers, std::vector<DistanceType>& radiuses,
+            int* belongs_to, int* count, const cvflann::HammingLUT* identifier)
+    {
+        (void)identifier;
+        refineBitfieldClustering(
+                    indices, indices_length, branching, centers, radiuses, belongs_to, count);
+
+        computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
+                                        level, centers, radiuses, belongs_to, count);
+    }
+
+
+    void refineAndSplitClustering(
+            KMeansNodePtr node, int* indices, int indices_length, int branching,
+            int level, CentersType** centers, std::vector<DistanceType>& radiuses,
+            int* belongs_to, int* count, const cvflann::Hamming<unsigned char>* identifier)
+    {
+        (void)identifier;
+        refineBitfieldClustering(
+                    indices, indices_length, branching, centers, radiuses, belongs_to, count);
+
+        computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
+                                        level, centers, radiuses, belongs_to, count);
+    }
+
+
+    void refineAndSplitClustering(
+            KMeansNodePtr node, int* indices, int indices_length, int branching,
+            int level, CentersType** centers, std::vector<DistanceType>& radiuses,
+            int* belongs_to, int* count, const cvflann::Hamming2<unsigned char>* identifier)
+    {
+        (void)identifier;
+        refineBitfieldClustering(
+                    indices, indices_length, branching, centers, radiuses, belongs_to, count);
+
+        computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
+                                        level, centers, radiuses, belongs_to, count);
+    }
+
 
     /**
      * The method responsible with actually doing the recursive hierarchical
@@ -893,372 +1296,16 @@ private:
             count[belongs_to[i]]++;
         }
 
-        cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
-        Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
-        for (int i=0; i<centers_length; ++i) {
-            ElementType* vec = dataset_[centers_idx[i]];
-            for (size_t k=0; k<veclen_; ++k) {
-                dcenters[i][k] = double(vec[k]);
-            }
-        }
-
-        bool converged = false;
-        int iteration = 0;
-        while (!converged && iteration<iterations_) {
-            converged = true;
-            iteration++;
-
-            // compute the new cluster centers
-            for (int i=0; i<branching; ++i) {
-                memset(dcenters[i],0,sizeof(double)*veclen_);
-                radiuses[i] = 0;
-            }
-            for (int i=0; i<indices_length; ++i) {
-                ElementType* vec = dataset_[indices[i]];
-                double* center = dcenters[belongs_to[i]];
-                for (size_t k=0; k<veclen_; ++k) {
-                    center[k] += vec[k];
-                }
-            }
-            for (int i=0; i<branching; ++i) {
-                int cnt = count[i];
-                for (size_t k=0; k<veclen_; ++k) {
-                    dcenters[i][k] /= cnt;
-                }
-            }
-
-            std::vector<int> new_centroids(indices_length);
-            std::vector<DistanceType> sq_dists(indices_length);
-
-            // reassign points to clusters
-            KMeansDistanceComputer<Matrix<double> > invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
-            parallel_for_(cv::Range(0, (int)indices_length), invoker);
-
-            for (int i=0; i < (int)indices_length; ++i) {
-                DistanceType sq_dist(sq_dists[i]);
-                int new_centroid(new_centroids[i]);
-                if (sq_dist > radiuses[new_centroid]) {
-                    radiuses[new_centroid] = sq_dist;
-                }
-                if (new_centroid != belongs_to[i]) {
-                    count[belongs_to[i]]--;
-                    count[new_centroid]++;
-                    belongs_to[i] = new_centroid;
-                    converged = false;
-                }
-            }
-
-            for (int i=0; i<branching; ++i) {
-                // if one cluster converges to an empty cluster,
-                // move an element into that cluster
-                if (count[i]==0) {
-                    int j = (i+1)%branching;
-                    while (count[j]<=1) {
-                        j = (j+1)%branching;
-                    }
-
-                    for (int k=0; k<indices_length; ++k) {
-                        if (belongs_to[k]==j) {
-                            // for cluster j, we move the furthest element from the center to the empty cluster i
-                            if ( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
-                                belongs_to[k] = i;
-                                count[j]--;
-                                count[i]++;
-                                break;
-                            }
-                        }
-                    }
-                    converged = false;
-                }
-            }
-
-        }
-
         CentersType** centers = new CentersType*[branching];
 
-        for (int i=0; i<branching; ++i) {
-            centers[i] = new CentersType[veclen_];
-            memoryCounter_ += (int)(veclen_*sizeof(CentersType));
-            for (size_t k=0; k<veclen_; ++k) {
-                centers[i][k] = (CentersType)dcenters[i][k];
-            }
-        }
-
-
-        // compute kmeans clustering for each of the resulting clusters
-        node->childs = pool_.allocate<KMeansNodePtr>(branching);
-        int start = 0;
-        int end = start;
-        for (int c=0; c<branching; ++c) {
-            int s = count[c];
-
-            DistanceType variance = 0;
-            DistanceType mean_radius =0;
-            for (int i=0; i<indices_length; ++i) {
-                if (belongs_to[i]==c) {
-                    DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
-                    variance += d;
-                    mean_radius += static_cast<DistanceType>( sqrt(d) );
-                    std::swap(indices[i],indices[end]);
-                    std::swap(belongs_to[i],belongs_to[end]);
-                    end++;
-                }
-            }
-            variance /= s;
-            mean_radius /= s;
-            variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
-
-            node->childs[c] = pool_.allocate<KMeansNode>();
-            std::memset(node->childs[c], 0, sizeof(KMeansNode));
-            node->childs[c]->radius = radiuses[c];
-            node->childs[c]->pivot = centers[c];
-            node->childs[c]->variance = variance;
-            node->childs[c]->mean_radius = mean_radius;
-            computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
-            start=end;
-        }
+        Distance* dummy = NULL;
+        refineAndSplitClustering(node, indices, indices_length, branching, level,
+                                 centers, radiuses, belongs_to, count, dummy);
 
         delete[] centers;
     }
 
 
-    /**
-     * The method responsible with doing the recursive hierarchical clustering on
-     * binary vectors.
-     * As some might have heared that KMeans on binary data doesn't make sense,
-     * it's worth a little explanation why it actually fairly works. As
-     * with the Hierarchical Clustering algortihm, we seed several centers for the
-     * current node by picking some of its points. Then in a first pass each point
-     * of the node is then related to its closest center. Now let's have a look at
-     * the 5 central dimensions of the 9 following points:
-     *
-     * xxxxxx11100xxxxx (1)
-     * xxxxxx11010xxxxx (2)
-     * xxxxxx11001xxxxx (3)
-     * xxxxxx10110xxxxx (4)
-     * xxxxxx10101xxxxx (5)
-     * xxxxxx10011xxxxx (6)
-     * xxxxxx01110xxxxx (7)
-     * xxxxxx01101xxxxx (8)
-     * xxxxxx01011xxxxx (9)
-     * sum   _____
-     * of 1: 66555
-     *
-     * Even if the barycenter notion doesn't apply, we can set a center
-     * xxxxxx11111xxxxx that will better fit the five dimensions we are focusing
-     * on for these points.
-     *
-     * Note that convergence isn't ensured anymore. In practice, using Gonzales
-     * as seeding algorithm should be fine for getting convergence ("iterations"
-     * value can be set to -1). But with KMeans++ seeding you should definitely
-     * set a maximum number of iterations (but make it higher than the "iterations"
-     * default value of 11).
-     *
-     * Params:
-     *     node = the node to cluster
-     *     indices = indices of the points belonging to the current node
-     *     indices_length = number of points in the current node
-     *     branching = the branching factor to use in the clustering
-     *     level = 0 for the root node, it increases with the subdivision levels
-     */
-    void computeBitfieldClustering(KMeansNodePtr node, int* indices,
-                                   int indices_length, int branching, int level)
-    {
-        node->size = indices_length;
-        node->level = level;
-
-        if (indices_length < branching) {
-            node->indices = indices;
-            std::sort(node->indices,node->indices+indices_length);
-            node->childs = NULL;
-            return;
-        }
-
-        cv::AutoBuffer<int> centers_idx_buf(branching);
-        int* centers_idx = centers_idx_buf.data();
-        int centers_length;
-        (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
-
-        if (centers_length<branching) {
-            node->indices = indices;
-            std::sort(node->indices,node->indices+indices_length);
-            node->childs = NULL;
-            return;
-        }
-
-        const unsigned int accumulator_veclen = static_cast<unsigned int>(
-                                                veclen_*sizeof(ElementType)*BITS_PER_CHAR);
-        cv::AutoBuffer<unsigned int> dcenters_buf(branching*accumulator_veclen);
-        Matrix<unsigned int> dcenters(dcenters_buf.data(), branching, accumulator_veclen);
-
-        CentersType** centers = new CentersType*[branching];
-
-        for (int i=0; i<branching; ++i) {
-            centers[i] = new CentersType[veclen_];
-            memoryCounter_ += (int)(veclen_*sizeof(CentersType));
-        }
-
-        std::vector<DistanceType> radiuses(branching);
-        cv::AutoBuffer<int> count_buf(branching);
-        int* count = count_buf.data();
-        for (int i=0; i<branching; ++i) {
-            radiuses[i] = 0;
-            count[i] = 0;
-        }
-
-        //	assign points to clusters
-        cv::AutoBuffer<int> belongs_to_buf(indices_length);
-        int* belongs_to = belongs_to_buf.data();
-        for (int i=0; i<indices_length; ++i) {
-
-            DistanceType dist = distance_(dataset_[indices[i]], dataset_[centers_idx[0]], veclen_);
-            belongs_to[i] = 0;
-            for (int j=1; j<branching; ++j) {
-                DistanceType new_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[j]], veclen_);
-                if (dist>new_dist) {
-                    belongs_to[i] = j;
-                    dist = new_dist;
-                }
-            }
-            if (dist>radiuses[belongs_to[i]]) {
-                radiuses[belongs_to[i]] = dist;
-            }
-            count[belongs_to[i]]++;
-        }
-
-        bool converged = false;
-        int iteration = 0;
-        while (!converged && iteration<iterations_) {
-            converged = true;
-            iteration++;
-
-            // compute the new cluster centers
-            for (int i=0; i<branching; ++i) {
-                memset(dcenters[i],0,sizeof(unsigned int)*accumulator_veclen);
-                radiuses[i] = 0;
-            }
-            for (int i=0; i<indices_length; ++i) {
-                unsigned char* vec = (unsigned char*)dataset_[indices[i]];
-                unsigned int* dcenter = dcenters[belongs_to[i]];
-                for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
-                    dcenter[k]   += (vec[l])    & 0x01;
-                    dcenter[k+1] += (vec[l]>>1) & 0x01;
-                    dcenter[k+2] += (vec[l]>>2) & 0x01;
-                    dcenter[k+3] += (vec[l]>>3) & 0x01;
-                    dcenter[k+4] += (vec[l]>>4) & 0x01;
-                    dcenter[k+5] += (vec[l]>>5) & 0x01;
-                    dcenter[k+6] += (vec[l]>>6) & 0x01;
-                    dcenter[k+7] += (vec[l]>>7) & 0x01;
-                }
-            }
-            for (int i=0; i<branching; ++i) {
-                double cnt = static_cast<double>(count[i]);
-                unsigned int* dcenter = dcenters[i];
-                unsigned char* charCenter = (unsigned char*)centers[i];
-                for (size_t k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
-                    charCenter[l] = static_cast<unsigned char>(
-                                      (((int)(0.5 + (double)(dcenter[k])   / cnt)))
-                                    | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1)
-                                    | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2)
-                                    | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3)
-                                    | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4)
-                                    | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5)
-                                    | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6)
-                                    | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7));
-                }
-            }
-
-            std::vector<int> new_centroids(indices_length);
-            std::vector<DistanceType> dists(indices_length);
-
-            // reassign points to clusters
-            KMeansDistanceComputer<ElementType**> invoker(distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
-            parallel_for_(cv::Range(0, (int)indices_length), invoker);
-
-            for (int i=0; i < indices_length; ++i) {
-                DistanceType dist(dists[i]);
-                int new_centroid(new_centroids[i]);
-                if (dist > radiuses[new_centroid]) {
-                    radiuses[new_centroid] = dist;
-                }
-                if (new_centroid != belongs_to[i]) {
-                    count[belongs_to[i]]--;
-                    count[new_centroid]++;
-                    belongs_to[i] = new_centroid;
-                    converged = false;
-                }
-            }
-
-            for (int i=0; i<branching; ++i) {
-                // if one cluster converges to an empty cluster,
-                // move an element into that cluster
-                if (count[i]==0) {
-                    int j = (i+1)%branching;
-                    while (count[j]<=1) {
-                        j = (j+1)%branching;
-                    }
-
-                    for (int k=0; k<indices_length; ++k) {
-                        if (belongs_to[k]==j) {
-                            // for cluster j, we move the furthest element from the center to the empty cluster i
-                            if ( distance_(dataset_[indices[k]], centers[j], veclen_) == radiuses[j] ) {
-                                belongs_to[k] = i;
-                                count[j]--;
-                                count[i]++;
-                                break;
-                            }
-                        }
-                    }
-                    converged = false;
-                }
-            }
-
-        }
-
-
-        // compute kmeans clustering for each of the resulting clusters
-        node->childs = pool_.allocate<KMeansNodePtr>(branching);
-        int start = 0;
-        int end = start;
-        for (int c=0; c<branching; ++c) {
-            int s = count[c];
-
-            unsigned long long variance = 0ull;
-            DistanceType mean_radius =0;
-            for (int i=0; i<indices_length; ++i) {
-                if (belongs_to[i]==c) {
-                    DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
-                    variance += static_cast<unsigned long long>( ensureSquareDistance<Distance>(d) );
-                    mean_radius += ensureSimpleDistance<Distance>(d);
-                    std::swap(indices[i],indices[end]);
-                    std::swap(belongs_to[i],belongs_to[end]);
-                    end++;
-                }
-            }
-            mean_radius = static_cast<DistanceType>(
-                        0.5f + static_cast<float>(mean_radius) / static_cast<float>(s));
-            variance = static_cast<unsigned long long>(
-                        0.5 + static_cast<double>(variance) / static_cast<double>(s));
-            variance -= static_cast<unsigned long long>(
-                        ensureSquareDistance<Distance>(
-                            distance_(centers[c], ZeroIterator<ElementType>(), veclen_)));
-
-            node->childs[c] = pool_.allocate<KMeansNode>();
-            std::memset(node->childs[c], 0, sizeof(KMeansNode));
-            node->childs[c]->radius = radiuses[c];
-            node->childs[c]->pivot = centers[c];
-            node->childs[c]->variance = static_cast<DistanceType>(variance);
-            node->childs[c]->mean_radius = mean_radius;
-            computeBitfieldClustering(node->childs[c],indices+start, end-start, branching, level+1);
-            start=end;
-        }
-
-        delete[] centers;
-    }
-
-
-
-
     /**
      * Performs one descent in the hierarchical k-means tree. The branches not
      * visited are stored in a priority queue.

From f3cebb3e1bac73e8bdddd3de96049a46122a0214 Mon Sep 17 00:00:00 2001
From: Liubov Batanina <piccione-mail@yandex.ru>
Date: Wed, 12 Aug 2020 17:32:16 +0300
Subject: [PATCH 07/12] Merge pull request #18077 from l-bat:reduce_sum

* Supported ReduceSum op

* Skip test
---
 modules/dnn/src/layers/pooling_layer.cpp   | 26 +++++++++++---
 modules/dnn/src/tensorflow/tf_importer.cpp | 41 +++++++++++++++++++---
 modules/dnn/test/test_darknet_importer.cpp |  2 ++
 modules/dnn/test/test_tf_importer.cpp      | 12 +++++++
 4 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 3f2a0f7d03..fd08fdbeb3 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -98,6 +98,8 @@ public:
                 type = AVE;
             else if (pool == "stochastic")
                 type = STOCHASTIC;
+            else if (pool == "sum")
+                type = SUM;
             else
                 CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
 
@@ -195,7 +197,7 @@ public:
                 return type == MAX || type == AVE;
             }
             else
-                return type != STOCHASTIC;
+                return type != STOCHASTIC && type != SUM;
         }
 #endif
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
@@ -288,7 +290,7 @@ public:
                 maxPooling(inputs[0], outputs[0], mask);
                 break;
             }
-            case AVE:
+            case AVE: case SUM:
                 CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
                 avePooling(inputs[0], outputs[0]);
                 break;
@@ -366,7 +368,7 @@ public:
 virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                     const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
 {
-    CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size());
+    CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE || type == SUM)) || inputs.size() == 2, nodes.size() == inputs.size());
     auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
 
     ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
@@ -381,6 +383,19 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
                         exclude_pad, rounding_type, pad_type);
         return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
     }
+    else if (type == SUM) {
+        ngraph::Shape inpShape = ieInpNode->get_shape();
+        CV_Assert(inpShape.size() == 2 + kernel_size.size());
+        std::vector<int64_t> axes;
+        for (size_t i = 0; i < kernel_size.size(); i++)
+        {
+            if (inpShape[2 + i] == kernel_size[i])
+                axes.push_back(2 + i);
+        }
+        auto reduction_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
+        auto reduce_sum = std::make_shared<ngraph::op::v1::ReduceSum>(ieInpNode, reduction_axes, true);
+        return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
+    }
     else if (type == MAX) {
         auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
                         ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
@@ -739,7 +754,7 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
                             }
                         }
                     }
-                else if (poolingType == AVE)
+                else if (poolingType == AVE || poolingType == SUM)
                 {
                     for( ; x0 < x1; ++x0)
                     {
@@ -750,7 +765,7 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
                         xend = min(xend, inp_width);
                         float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta * ddelta :
                                                 ((dend - dstart) * (yend - ystart) * (xend - xstart));
-                        inv_kernel_area = 1.0 / inv_kernel_area;
+                        inv_kernel_area = poolingType == AVE ? 1.0 / inv_kernel_area : 1.0;
 #if CV_SIMD128
                         if( isPool2D && xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
                         {
@@ -1095,6 +1110,7 @@ private:
         MAX,
         AVE,
         STOCHASTIC,
+        SUM,
         ROI,   // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
         PSROI  // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
     };
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index c005c99b58..9083a4d4f9 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -2067,7 +2067,7 @@ void TFImporter::populateNet(Net dstNet)
             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1);
         }
-        else if (type == "Mean")
+        else if (type == "Mean" || type == "Sum")
         {
             // Computes the mean of elements across dimensions of a tensor.
             // If keepdims is false (default) reduces input_tensor along the dimensions given in axis,
@@ -2116,7 +2116,7 @@ void TFImporter::populateNet(Net dstNet)
                 LayerParams avgLp;
                 std::string avgName = name + "/avg";
                 CV_Assert(layer_id.find(avgName) == layer_id.end());
-                avgLp.set("pool", "ave");
+                avgLp.set("pool", type == "Mean" ? "ave" : "sum");
                 // pooling kernel H x 1
                 avgLp.set("global_pooling_h", true);
                 avgLp.set("kernel_w", 1);
@@ -2153,11 +2153,44 @@ void TFImporter::populateNet(Net dstNet)
                 layer_id[name] = id;
                 connect(layer_id, dstNet, Pin(avgName), id, 0);
                 connect(layer_id, dstNet, Pin(layerShapeName), id, 1);
+            } else if (indices.total() == 1) {
+                int axis = toNCHW(indices.at<int>(0));
+                if (axis == 2 || axis == 3)
+                {
+                    layerParams.set("pool", type == "Mean" ? "ave" : "sum");
+                    layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1);
+                    layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true);
+                    int id = dstNet.addLayer(name, "Pooling", layerParams);
+                    layer_id[name] = id;
+                    connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+
+                    if (!keepDims)
+                    {
+                        // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC
+                        LayerParams permLP;
+                        int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
+                        permLP.set("order", DictValue::arrayInt<int*>(order, 4));
+                        std::string permName = name + "/nchw";
+                        CV_Assert(layer_id.find(permName) == layer_id.end());
+                        int permId = dstNet.addLayer(permName, "Permute", permLP);
+                        layer_id[permName] = permId;
+                        connect(layer_id, dstNet, Pin(name), permId, 0);
+
+                        LayerParams squeezeLp;
+                        std::string squeezeName = name + "/squeeze";
+                        CV_Assert(layer_id.find(squeezeName) == layer_id.end());
+                        squeezeLp.set("axis", indices.at<int>(0));
+                        squeezeLp.set("end_axis", indices.at<int>(0) + 1);
+                        int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
+                        layer_id[squeezeName] = squeezeId;
+                        connect(layer_id, dstNet, Pin(permName), squeezeId, 0);
+                    }
+                }
             } else {
                 if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
-                    CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
+                    CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation.");
 
-                layerParams.set("pool", "ave");
+                layerParams.set("pool", type == "Mean" ? "ave" : "sum");
                 layerParams.set("global_pooling", true);
                 int id = dstNet.addLayer(name, "Pooling", layerParams);
                 layer_id[name] = id;
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index 45edf405ac..4986e8e399 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -755,6 +755,8 @@ TEST_P(Test_Darknet_layers, connected)
 
 TEST_P(Test_Darknet_layers, relu)
 {
+     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
     testDarknetLayer("relu");
 }
 
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index d95c46b5d3..68b720a375 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -128,6 +128,13 @@ TEST_P(Test_TensorFlow_layers, reduce_mean)
     runTensorFlowNet("global_pool_by_axis");
 }
 
+TEST_P(Test_TensorFlow_layers, reduce_sum)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    runTensorFlowNet("sum_pool_by_axis");
+}
+
 TEST_P(Test_TensorFlow_layers, conv_single_conv)
 {
     runTensorFlowNet("single_conv");
@@ -340,6 +347,11 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_mean)
     runTensorFlowNet("reduce_mean");  // an average pooling over all spatial dimensions.
 }
 
+TEST_P(Test_TensorFlow_layers, pooling_reduce_sum)
+{
+    runTensorFlowNet("reduce_sum");  // a SUM pooling over all spatial dimensions.
+}
+
 TEST_P(Test_TensorFlow_layers, max_pool_grad)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)

From 2171cae8ff3da3693f87274437c0fc7b48293ed0 Mon Sep 17 00:00:00 2001
From: Yashas Samaga B L <yashas_2010@yahoo.com>
Date: Thu, 13 Aug 2020 16:25:41 +0530
Subject: [PATCH 08/12] Merge pull request #17976 from
 YashasSamaga:dnn-fusion-tests-fix-ocl

dnn: add exhaustive fusion tests, enable more eltwise fusions

* add eltwise fusion tests, enable more eltwise fusions

* merge weighted eltwise tests with eltwise tests
---
 modules/dnn/src/dnn.cpp          |   2 +-
 modules/dnn/test/test_layers.cpp | 432 +++++++++++++++++++++++++++++++
 2 files changed, 433 insertions(+), 1 deletion(-)

diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index f650a71fc2..8947791061 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -2458,7 +2458,7 @@ struct Net::Impl : public detail::NetImplBase
                                     if( nextData )
                                         nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
 
-                                    if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
+                                    if( !nextActivLayer.empty() &&
                                             (!nextData->type.compare("ReLU") ||
                                              !nextData->type.compare("ChannelsPReLU") ||
                                              !nextData->type.compare("Power")) &&
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 0c4ce11ca5..648e0aaa16 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -2053,4 +2053,436 @@ TEST_P(Layer_Test_BatchNorm, fusion)
 
 INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets());
 
+class TestLayerFusion : public DNNTestLayer {
+public:
+    static void makeDefaultTestConvolutionLayer(LayerParams& convParams, int in_channels, int num_filters, bool bias_term)
+    {
+        const int kernel_h = 3, kernel_w = 3;
+        const int pad_h = kernel_h / 2, pad_w = kernel_w / 2;
+
+        convParams.set("kernel_h", kernel_h);
+        convParams.set("kernel_w", kernel_w);
+        convParams.set("pad_h", pad_h);
+        convParams.set("pad_w", pad_w);
+        convParams.set("num_output", num_filters);
+        convParams.set("bias_term", bias_term);
+        convParams.type = "Convolution";
+        convParams.name = "convolution";
+
+        float conv_init_magnitude = 1.0f / in_channels / kernel_h / kernel_w;
+        int weightsShape[] = {num_filters, in_channels, kernel_h, kernel_w};
+        Mat weights(4, &weightsShape[0], CV_32F);
+        randu(weights, -conv_init_magnitude, conv_init_magnitude);
+        convParams.blobs.push_back(weights);
+        if (bias_term)
+        {
+            Mat bias(1, num_filters, CV_32F);
+            randu(bias, -1.0f, 1.0f);
+            convParams.blobs.push_back(bias);
+        }
+    }
+
+    static void makeDefaultTestActivationLayer(LayerParams& activationParams, const std::string& type, int in_channels)
+    {
+        activationParams.type = type;
+        activationParams.name = "activation";
+        if (activationParams.type == "ReLU")
+            activationParams.set("negative_slope", 0.1f);
+        else if (activationParams.type == "Power")
+        {
+            activationParams.set("power", 2.0f);
+            activationParams.set("scale", 0.5f);
+            activationParams.set("shift", 0.3f);
+        }
+        else if (activationParams.type == "ReLU6")
+        {
+            activationParams.set("min_value", -1.0f);
+            activationParams.set("max_value", 1.0f);
+        }
+        else if (activationParams.type == "ChannelsPReLU")
+        {
+            Mat scales(1, in_channels, CV_32F);
+            randu(scales, -1.0f, 1.0f);
+            activationParams.blobs.push_back(scales);
+        }
+    }
+
+    static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients)
+    {
+        eltwiseParams.type = "Eltwise";
+        eltwiseParams.name = "eltwise";
+        eltwiseParams.set("operation", op);
+        if (withCoefficients)
+        {
+            float coeff[] = {0.3f, 0.5f};
+            eltwiseParams.set("coeff", DictValue::arrayReal<float*>(coeff, 2));
+        }
+    }
+
+    static void test(Mat& input, Net& net, Backend backendId, Target targetId, std::vector<int> expectedFusedLayers = std::vector<int>(), double l1 = 0.0, double lInf = 0.0)
+    {
+        DNNTestLayer::checkBackend(backendId, targetId);
+
+        net.enableFusion(false);
+        net.setPreferableBackend(DNN_BACKEND_OPENCV);
+        net.setPreferableTarget(DNN_TARGET_CPU);
+        net.setInput(input);
+        Mat outputReference = net.forward().clone();
+        std::vector<double> refTimings;
+        net.getPerfProfile(refTimings);
+        for (int i = 0; i < refTimings.size(); i++)
+        {
+            CV_Assert(refTimings[i] != 0.0);
+        }
+
+        net.enableFusion(true);
+        net.setPreferableBackend(backendId);
+        net.setPreferableTarget(targetId);
+        net.setInput(input);
+        Mat outputTest = net.forward().clone();
+        std::vector<double> testTimings;
+        net.getPerfProfile(testTimings);
+        for (int i = 0; i < testTimings.size(); i++)
+        {
+            if(std::find(expectedFusedLayers.begin(), expectedFusedLayers.end(), i + 1) != expectedFusedLayers.end())
+            {
+                EXPECT_EQ(testTimings[i], 0.0);
+            }
+            else
+            {
+                EXPECT_NE(testTimings[i], 0.0);
+            }
+        }
+
+        // double ref_max_value, ref_min_value;
+        // minMaxLoc(outputReference.reshape(1, 1), &ref_min_value, &ref_max_value);
+        // std::cout << "reference range: " << ref_min_value << ' ' << ref_max_value << std::endl;
+
+        double default_l1, default_lInf;
+        DNNTestLayer::getDefaultThresholds(backendId, targetId, &default_l1, &default_lInf);
+        if (l1 == 0.0)
+            l1 = default_l1;
+        if (lInf == 0.0)
+            lInf = default_lInf;
+        normAssert(outputReference, outputTest, "", l1, lInf);
+    }
+
+    static testing::internal::ParamGenerator<std::string> eltwiseOpList()
+    {
+        // TODO: automate list generation
+        return Values("sum", "max", "prod", "div");
+    }
+
+    static testing::internal::ParamGenerator<std::string> activationLayersList()
+    {
+        // TODO: automate list generation
+        return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power");
+    }
+
+    static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsForFusionTests()
+    {
+        return dnnBackendsAndTargets(false, false, true, false); // OCV OpenCL + OCV CPU
+    }
+};
+
+typedef TestWithParam<tuple<bool, std::string, tuple<Backend, Target> > > ConvolutionActivationFusion;
+TEST_P(ConvolutionActivationFusion, Accuracy)
+{
+    //          input
+    //            |
+    // -----------------------
+    // |     convolution     |
+    // -----------------------
+    //            |
+    // -----------------------
+    // |     activation      |
+    // -----------------------
+    //            |
+    //         output
+
+    const int batch_size = 2, in_channels = 16;
+    const int in_height = 16, in_width = 16;
+    int inputShape[] = {batch_size, in_channels, in_height, in_width};
+    Mat input(4, &inputShape[0], CV_32F);
+    randu(input, 1.0f, 2.0f);
+
+    bool bias_term = get<0>(GetParam());
+    LayerParams convParams;
+    TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
+
+    std::string actType = get<1>(GetParam());
+    LayerParams activationParams;
+    TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
+
+    Backend backendId = get<0>(get<2>(GetParam()));
+    Target targetId = get<1>(get<2>(GetParam()));
+
+    // bug: https://github.com/opencv/opencv/issues/17964
+    if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+
+    // bug: https://github.com/opencv/opencv/issues/17953
+    if (actType == "ChannelsPReLU" && bias_term == false &&
+        backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+    {
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    }
+
+    Net net;
+    int convId = net.addLayer(convParams.name, convParams.type, convParams);
+    int activId = net.addLayerToPrev(activationParams.name, activationParams.type, activationParams);
+    net.connect(0, 0, convId, 0);
+
+    std::vector<int> expectedFusedLayers;
+    if (backendId == DNN_BACKEND_OPENCV)
+    {
+        if (targetId == DNN_TARGET_CPU)
+            expectedFusedLayers.push_back(activId); // all activations are fused
+        else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
+        {
+            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
+                expectedFusedLayers.push_back(activId);
+        }
+    }
+
+    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
+}
+INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine(
+/* bias */       testing::Bool(),
+/* activation */ TestLayerFusion::activationLayersList(),
+                 TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
+));
+
+typedef TestWithParam<tuple<bool, std::string, bool, tuple<Backend, Target> > > ConvolutionEltwiseFusion;
+TEST_P(ConvolutionEltwiseFusion, Accuracy)
+{
+    //                 input
+    //                   |
+    //    -------------------------------
+    //    |                             |
+    //    |                      ---------------
+    //    |                      | convolution |
+    //    |                      ---------------
+    //    |                             |
+    //    |       ----------------      |
+    //    --------|  eltwise op  |-------
+    //            ----------------
+    //                   |
+    //                 output
+
+    const int batch_size = 2, in_channels = 16;
+    const int in_height = 16, in_width = 16;
+    int inputShape[] = {batch_size, in_channels, in_height, in_width};
+    Mat input(4, &inputShape[0], CV_32F);
+    randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
+
+    bool bias_term = get<0>(GetParam());
+    LayerParams convParams;
+    TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
+
+    std::string eltwiseOp = get<1>(GetParam());
+    bool weightedEltwise = get<2>(GetParam());
+    if (eltwiseOp != "sum" && weightedEltwise)
+            throw SkipTestException("weighted eltwise not supported");
+    LayerParams eltwiseParams;
+    TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise);
+
+    Net net;
+    int convId = net.addLayer(convParams.name, convParams.type, convParams);
+    int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
+    net.connect(0, 0, convId, 0);
+    net.connect(convId, 0, eltwiseId, 0);
+    net.connect(0, 0, eltwiseId, 1);
+
+    Backend backendId = get<0>(get<3>(GetParam()));
+    Target targetId = get<1>(get<3>(GetParam()));
+    TestLayerFusion::test(input, net, backendId, targetId);
+}
+INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine(
+/* bias */              testing::Bool(),
+/* eltwise op */        TestLayerFusion::eltwiseOpList(),
+/* eltwise weighted */  testing::Bool(),
+                        TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
+));
+
+typedef TestWithParam<tuple<bool, std::string, bool, std::string, tuple<Backend, Target> > > ConvolutionEltwiseActivationFusion;
+TEST_P(ConvolutionEltwiseActivationFusion, Accuracy)
+{
+    //                 input
+    //                   |
+    //    -------------------------------
+    //    |                             |
+    //    |                      ---------------
+    //    |                      | convolution |
+    //    |                      ---------------
+    //    |                             |
+    //    |       ----------------      |
+    //    --------|  eltwise op  |-------
+    //            ----------------
+    //                   |
+    //            ----------------
+    //            |  activation  |
+    //            ----------------
+    //                   |
+    //                output
+
+    const int batch_size = 2, in_channels = 16;
+    const int in_height = 16, in_width = 16;
+    int inputShape[] = {batch_size, in_channels, in_height, in_width};
+    Mat input(4, &inputShape[0], CV_32F);
+    randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
+
+    bool bias_term = get<0>(GetParam());
+    LayerParams convParams;
+    TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
+
+    std::string eltwiseOp = get<1>(GetParam());
+    bool weightedEltwise = get<2>(GetParam());
+    if (eltwiseOp != "sum" && weightedEltwise)
+            throw SkipTestException("weighted eltwise not supported");
+    LayerParams eltwiseParams;
+    TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, false);
+
+    std::string actType = get<3>(GetParam());
+    LayerParams activationParams;
+    TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
+
+    Backend backendId = get<0>(get<4>(GetParam()));
+    Target targetId = get<1>(get<4>(GetParam()));
+
+    // bug: https://github.com/opencv/opencv/issues/17945
+    if (eltwiseOp != "sum" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+
+    // bug: https://github.com/opencv/opencv/issues/17953
+    if (eltwiseOp == "sum" && actType == "ChannelsPReLU" && bias_term == false &&
+        backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+    {
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    }
+
+    // bug: https://github.com/opencv/opencv/issues/17964
+    if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+
+    Net net;
+    int convId = net.addLayer(convParams.name, convParams.type, convParams);
+    int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
+    int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
+    net.connect(0, 0, convId, 0);
+    net.connect(convId, 0, eltwiseId, 0);
+    net.connect(0, 0, eltwiseId, 1);
+    net.connect(eltwiseId, 0, activId, 0);
+
+    std::vector<int> expectedFusedLayers;
+    if (backendId == DNN_BACKEND_OPENCV)
+    {
+        if (targetId == DNN_TARGET_CPU)
+            expectedFusedLayers.push_back(activId); // activation is fused with eltwise layer
+        else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
+        {
+            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "Power")
+            {
+                expectedFusedLayers.push_back(eltwiseId);
+                expectedFusedLayers.push_back(activId);
+            }
+        }
+    }
+
+    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
+}
+INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine(
+/* bias */              testing::Bool(),
+/* eltwise op */        TestLayerFusion::eltwiseOpList(),
+/* eltwise weighted */  testing::Bool(),
+/* activation */        TestLayerFusion::activationLayersList(),
+                        TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
+));
+
+typedef TestWithParam<tuple<bool, std::string, std::string, bool, tuple<Backend, Target> > > ConvolutionActivationEltwiseFusion;
+TEST_P(ConvolutionActivationEltwiseFusion, Accuracy)
+{
+    //                 input
+    //                   |
+    //    -------------------------------
+    //    |                             |
+    //    |                     ----------------
+    //    |                     |  convolution |
+    //    |                     ----------------
+    //    |                             |
+    //    |                     ----------------
+    //    |                     |  activation  |
+    //    |                     ----------------
+    //    |                             |
+    //    |       ----------------      |
+    //    --------| eltwise sum  |-------
+    //            ----------------
+    //                   |
+
+    const int batch_size = 2, in_channels = 16;
+    const int in_height = 16, in_width = 16;
+    int inputShape[] = {batch_size, in_channels, in_height, in_width};
+    Mat input(4, &inputShape[0], CV_32F);
+    randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div
+
+    bool bias_term = get<0>(GetParam());
+    LayerParams convParams;
+    TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term);
+
+    std::string actType = get<1>(GetParam());
+    LayerParams activationParams;
+    TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels);
+
+    std::string eltwiseOp = get<2>(GetParam());
+    bool weightedEltwise = get<3>(GetParam());
+    if (eltwiseOp != "sum" && weightedEltwise)
+            throw SkipTestException("weighted eltwise not supported");
+    LayerParams eltwiseParams;
+    TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, false);
+
+    Backend backendId = get<0>(get<4>(GetParam()));
+    Target targetId = get<1>(get<4>(GetParam()));
+
+    // bug: https://github.com/opencv/opencv/issues/17964
+    if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+
+    // bug: https://github.com/opencv/opencv/issues/17953
+    if (actType == "ChannelsPReLU" && bias_term == false &&
+        backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16))
+    {
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    }
+
+    Net net;
+    int convId = net.addLayer(convParams.name, convParams.type, convParams);
+    int activId = net.addLayer(activationParams.name, activationParams.type, activationParams);
+    int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams);
+    net.connect(0, 0, convId, 0);
+    net.connect(convId, 0, activId, 0);
+    net.connect(activId, 0, eltwiseId, 0);
+    net.connect(0, 0, eltwiseId, 1);
+
+    std::vector<int> expectedFusedLayers;
+    if (backendId == DNN_BACKEND_OPENCV)
+    {
+        if (targetId == DNN_TARGET_CPU)
+            expectedFusedLayers.push_back(activId); // activation fused with convolution
+        else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)
+        {
+            if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power")
+                expectedFusedLayers.push_back(activId); // activation fused with convolution
+        }
+    }
+
+    TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers);
+}
+INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine(
+/* bias */              testing::Bool(),
+/* activation */        TestLayerFusion::activationLayersList(),
+/* eltwise op */        TestLayerFusion::eltwiseOpList(),
+/* eltwise weighted */  testing::Bool(),
+                        TestLayerFusion::dnnBackendsAndTargetsForFusionTests()
+));
+
 }} // namespace

From 00890aecdf6a117ecf2a74632ffe9b7eed6e6606 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Thu, 13 Aug 2020 18:33:18 +0000
Subject: [PATCH 09/12] core(ocl): fix ocl::Image2d::isFormatSupported()

in case of OPENCV_OPENCL_DEVICE=disabled
---
 modules/core/src/ocl.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index c6b6e2f0f0..62de280812 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -6458,6 +6458,9 @@ struct Image2D::Impl
             CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
 
         cl_context context = (cl_context)Context::getDefault().ptr();
+        if (!context)
+            return false;
+
         // Figure out how many formats are supported by this context.
         cl_uint numFormats = 0;
         cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,

From 339b963e6b86935788e1338397e9a0d31430fc66 Mon Sep 17 00:00:00 2001
From: Liubov Batanina <piccione-mail@yandex.ru>
Date: Wed, 12 Aug 2020 15:03:46 +0300
Subject: [PATCH 10/12] Fix MatMul and Add axes

---
 modules/dnn/src/onnx/onnx_importer.cpp  | 18 ++++++++++++++++++
 modules/dnn/test/test_onnx_importer.cpp |  9 +++++++++
 2 files changed, 27 insertions(+)

diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index f6dc285fad..e65c7ac3e9 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -641,6 +641,17 @@ void ONNXImporter::populateNet(Net dstNet)
                     {
                         layerParams.type = "Scale";
                         layerParams.set("bias_term", true);
+                        int axis = 1;
+                        for (int i = 0; i < graph_proto.initializer_size(); i++)
+                        {
+                            opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
+                            if (tensor_proto.name() == node_proto.input(const_blob_id))
+                            {
+                                axis = inpShape.size() - tensor_proto.dims_size();
+                                break;
+                            }
+                        }
+                        layerParams.set("axis", axis);
                         blob = blob.reshape(1, 1);
                         layerParams.blobs.push_back((isSub ? -1 : 1) * blob);
                     }
@@ -911,13 +922,20 @@ void ONNXImporter::populateNet(Net dstNet)
             CV_Assert(node_proto.input_size() == 2);
             layerParams.type = "InnerProduct";
             layerParams.set("bias_term", false);
+            CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
+            int firstInpDims = outShapes[node_proto.input(0)].size();
+            int secondInpDims;
 
             if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
             {
                 Mat blob = getBlob(node_proto, constBlobs, 1);
+                secondInpDims = blob.dims;
                 layerParams.blobs.push_back(blob.t());
                 layerParams.set("num_output", layerParams.blobs[0].size[0]);
+            } else {
+                secondInpDims = outShapes[node_proto.input(1)].size();
             }
+            layerParams.set("axis", firstInpDims - secondInpDims + 1);
         }
         else if (layer_type == "Mul" || layer_type == "Div")
         {
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 6a9e68dbc5..a317be71fb 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -404,6 +404,15 @@ TEST_P(Test_ONNX_layers, MatMul)
     testONNXModels("matmul_4d");
 }
 
+TEST_P(Test_ONNX_layers, MatMulAdd)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
+    testONNXModels("matmul_add");
+}
+
 TEST_P(Test_ONNX_layers, Expand)
 {
     testONNXModels("expand_batch");

From ad63d24dbaaa6801d9afc4afdccd84fbc5a68e89 Mon Sep 17 00:00:00 2001
From: Liubov Batanina <piccione-mail@yandex.ru>
Date: Fri, 14 Aug 2020 19:49:42 +0300
Subject: [PATCH 11/12] Merge pull request #18096 from
 l-bat:update_onnx_importer

* Added ReduceSum to ONNX importer

* Fix comments

* Fix Mul
---
 .../dnn/src/layers/fully_connected_layer.cpp  |   1 -
 .../dnn/src/onnx/onnx_graph_simplifier.cpp    |  19 +++
 modules/dnn/src/onnx/onnx_importer.cpp        | 132 +++++++++++++-----
 modules/dnn/test/test_onnx_importer.cpp       |   6 +
 4 files changed, 124 insertions(+), 34 deletions(-)

diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index 03349253c0..4746403504 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -116,7 +116,6 @@ public:
             CV_CheckEQ(inputs.size(), (size_t)2, "");
             numOutput = inputs[1].back();
             cAxis = inputs[0].size() - 1;
-            CV_CheckEQ(numOutput, inputs[0][cAxis - 1], "");
             int dims = inputs[0].size();
             CV_CheckEQ(inputs[1].size(), (size_t)dims, "");
             CV_CheckGE(dims, 2, "");
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
index 61ef8b7da6..e8b237cab4 100644
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -262,6 +262,24 @@ public:
     }
 };
 
+class ExpandSubgraph : public Subgraph
+{
+public:
+    ExpandSubgraph()
+    {
+        int input = addNodeToMatch("");
+        int values = addNodeToMatch("");
+        int init = addNodeToMatch("ConstantOfShape", values);
+        int coeff = addNodeToMatch("Constant");
+        int mul = addNodeToMatch("Mul", init, coeff);
+        int shape = addNodeToMatch("Constant");
+        int condition = addNodeToMatch("Equal", shape, mul);
+        int where = addNodeToMatch("Where", condition, init, addNodeToMatch("Constant"));
+        addNodeToMatch("Expand", input, where);
+        setFusedNode("Expand", input, shape);
+    }
+};
+
 class MulCastSubgraph : public Subgraph
 {
 public:
@@ -459,6 +477,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
     subgraphs.push_back(makePtr<NormalizeSubgraph3>());
     subgraphs.push_back(makePtr<BatchNormalizationSubgraph1>());
     subgraphs.push_back(makePtr<BatchNormalizationSubgraph2>());
+    subgraphs.push_back(makePtr<ExpandSubgraph>());
 
     simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
 }
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index e65c7ac3e9..7d37b065ab 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -387,26 +387,42 @@ void ONNXImporter::populateNet(Net dstNet)
             layerParams.set("ceil_mode", layerParams.has("pad_mode"));
             layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
         }
-        else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" || layer_type == "ReduceMean")
+        else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" ||
+                layer_type == "ReduceMean" || layer_type == "ReduceSum")
         {
             CV_Assert(node_proto.input_size() == 1);
             layerParams.type = "Pooling";
-            layerParams.set("pool", layer_type == "GlobalMaxPool"? "MAX" : "AVE");
+            String pool;
+            if (layer_type == "GlobalMaxPool")
+                pool = "MAX";
+            else if (layer_type == "ReduceSum")
+                pool = "SUM";
+            else
+                pool = "AVE";
+            layerParams.set("pool", pool);
             layerParams.set("global_pooling", layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool");
-
-            if (layer_type == "ReduceMean")
+            if (layer_type == "ReduceMean" || layer_type == "ReduceSum")
             {
-                if (layerParams.get<int>("keepdims") == 0 || !layerParams.has("axes"))
-                    CV_Error(Error::StsNotImplemented, "Unsupported mode of ReduceMean operation.");
+                if (!layerParams.has("axes"))
+                    CV_Error(Error::StsNotImplemented, "Unsupported mode of " + layer_type + " operation.");
 
                 MatShape inpShape = outShapes[node_proto.input(0)];
                 DictValue axes = layerParams.get("axes");
+                bool keepdims = layerParams.get<int>("keepdims");
+                MatShape targetShape = inpShape;
+                for (int i = 0; i < axes.size(); i++) {
+                    int axis = clamp(axes.get<int>(i), inpShape.size());
+                    if (keepdims) {
+                        targetShape[axis] = 1;
+                    } else {
+                        targetShape.erase(targetShape.begin() + axis);
+                    }
+                }
+
                 if (inpShape.size() == 3 && axes.size() <= 2)
                 {
-                    int axis = axes.get<int>(0);
+                    int axis = clamp(axes.get<int>(0), inpShape.size());
                     CV_CheckNE(axis, 0, "");
-                    outShapes[layerParams.name] = inpShape;
-                    outShapes[layerParams.name][axis] = 1;
 
                     LayerParams reshapeLp;
                     reshapeLp.name = layerParams.name + "/reshape";
@@ -426,13 +442,12 @@ void ONNXImporter::populateNet(Net dstNet)
                     avgLp.name = layerParams.name + "/avg";
                     avgLp.type = "Pooling";
                     CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
-                    avgLp.set("pool", "ave");
+                    avgLp.set("pool", pool);
                     if (axes.size() == 2)
                     {
-                        CV_CheckEQ(axes.get<int>(0), 1, "Unsupported ReduceMean mode");
-                        CV_CheckEQ(axes.get<int>(1), 2, "Unsupported ReduceMean mode");
+                        CV_CheckEQ(clamp(axes.get<int>(0), inpShape.size()), 1, ("Unsupported " + layer_type  + " mode").c_str());
+                        CV_CheckEQ(clamp(axes.get<int>(1), inpShape.size()), 2, ("Unsupported " + layer_type  + " mode").c_str());
                         avgLp.set("global_pooling", true);
-                        outShapes[layerParams.name][axes.get<int>(1)] = 1;
                     }
                     else
                     {
@@ -443,28 +458,33 @@ void ONNXImporter::populateNet(Net dstNet)
                     node_proto.set_input(0, reshapeLp.name);
                     node_proto.set_output(0, avgLp.name);
                     addLayer(dstNet, avgLp, node_proto, layer_id, outShapes);
-
-                    layerParams.type = "Flatten";
-                    layerParams.set("axis", 0);
-                    layerParams.set("end_axis", 1);
-
-                    node_proto.set_input(0, avgLp.name);
-                    node_proto.set_output(0, layerParams.name);
                 }
                 else
                 {
                     if (inpShape.size() != 4 && inpShape.size() != 5)
-                    CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation.");
+                        CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation.");
 
                     CV_Assert(axes.size() <= inpShape.size() - 2);
                     std::vector<int> kernel_size(inpShape.size() - 2, 1);
                     for (int i = 0; i < axes.size(); i++) {
-                        int axis = axes.get<int>(i);
+                        int axis = clamp(axes.get<int>(i), inpShape.size());
                         CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
                         kernel_size[axis - 2] = inpShape[axis];
                     }
-                    layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
+                    LayerParams poolLp = layerParams;
+                    poolLp.name = layerParams.name + "/avg";
+                    CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
+                    poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
+
+                    node_proto.set_output(0, poolLp.name);
+                    addLayer(dstNet, poolLp, node_proto, layer_id, outShapes);
                 }
+
+                layerParams.type = "Reshape";
+                layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
+
+                node_proto.set_input(0, node_proto.output(0));
+                node_proto.set_output(0, layerParams.name);
             }
         }
         else if (layer_type == "Slice")
@@ -1001,15 +1021,10 @@ void ONNXImporter::populateNet(Net dstNet)
             {
                 Mat inp0 = getBlob(node_proto, constBlobs, 0);
                 Mat inp1 = getBlob(node_proto, constBlobs, 1);
-                if (inp0.size != inp1.size)
+                if (inp0.size != inp1.size && inp1.total() != 1)
                     CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes");
 
-                Mat out;
-                if (isDiv)
-                    divide(inp0, inp1, out);
-                else
-                    multiply(inp0, inp1, out);
-
+                Mat out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
                 out = out.reshape(1, inp0.dims, inp0.size);
                 out.dims = inp0.dims;  // to workaround dims == 1
                 addConstant(layerParams.name, out, constBlobs, outShapes);
@@ -1180,9 +1195,45 @@ void ONNXImporter::populateNet(Net dstNet)
             Mat newShapeMat = getBlob(node_proto, constBlobs, 1);
             MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
 
-            shapeIt = outShapes.find(node_proto.input(0));
-            CV_Assert(shapeIt != outShapes.end());
-            MatShape inpShape = shapeIt->second;
+            MatShape inpShape;
+            bool haveVariables = constBlobs.find(node_proto.input(0)) == constBlobs.end();
+            if (haveVariables)
+            {
+                shapeIt = outShapes.find(node_proto.input(0));
+                CV_Assert(shapeIt != outShapes.end());
+                inpShape = shapeIt->second;
+            }
+            else
+            {
+                inpShape = shape(getBlob(node_proto, constBlobs, 0));
+            }
+
+            String srcName = node_proto.input(0);
+            // Unsqueeze and repeat along new axis
+            if (targetShape.size() == inpShape.size() + 1)
+            {
+                for (int i = 0; i < targetShape.size(); i++)
+                {
+                    if (targetShape[i] == -1 && i < inpShape.size())
+                        targetShape[i] = inpShape[i];
+                    else if (i < inpShape.size() && targetShape[i] != inpShape[i])
+                        inpShape.insert(inpShape.begin() + i, 1);
+                }
+                if (haveVariables)
+                {
+                    LayerParams reshapeLp;
+                    reshapeLp.name = layerParams.name + "/reshape";
+                    reshapeLp.type = "Reshape";
+                    CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                    reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
+
+                    opencv_onnx::NodeProto proto;
+                    proto.add_input(node_proto.input(0));
+                    proto.add_output(reshapeLp.name);
+                    addLayer(dstNet, reshapeLp, proto, layer_id, outShapes);
+                    srcName = reshapeLp.name;
+                }
+            }
             CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
 
             std::vector<int> broadcast_axes;
@@ -1197,6 +1248,19 @@ void ONNXImporter::populateNet(Net dstNet)
                 }
             }
 
+            if (!haveVariables)
+            {
+                if (broadcast_axes.size() != 1)
+                    CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");
+
+                Mat input = getBlob(node_proto, constBlobs, 0);
+                input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
+                Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
+                output = output.reshape(0, targetShape);
+                addConstant(layerParams.name, output, constBlobs, outShapes);
+                continue;
+            }
+
             if (broadcast_axes.size() == 2 &&
                 broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
             {
@@ -1231,6 +1295,7 @@ void ONNXImporter::populateNet(Net dstNet)
                     CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
                     input_names.push_back(copyLP.name);
 
+                    node_proto.set_input(0, srcName);
                     node_proto.set_output(0, copyLP.name);
                     addLayer(dstNet, copyLP, node_proto, layer_id, outShapes);
                 }
@@ -1241,6 +1306,7 @@ void ONNXImporter::populateNet(Net dstNet)
                 }
                 layerParams.set("axis", broadcast_axes[0]);
                 layerParams.type = "Concat";
+                node_proto.set_output(0, layerParams.name);
             }
             else
                 CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index a317be71fb..25efcbb3ca 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -257,6 +257,11 @@ TEST_P(Test_ONNX_layers, ReduceMean)
     testONNXModels("reduce_mean_axis2");
 }
 
+TEST_P(Test_ONNX_layers, ReduceSum)
+{
+    testONNXModels("reduce_sum");
+}
+
 TEST_P(Test_ONNX_layers, ReduceMean3D)
 {
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU)
@@ -417,6 +422,7 @@ TEST_P(Test_ONNX_layers, Expand)
 {
     testONNXModels("expand_batch");
     testONNXModels("expand_channels");
+    testONNXModels("expand_neg_batch");
 }
 
 TEST_P(Test_ONNX_layers, ExpandHW)

From 68f527267bdeb6179d0b37665b29b4412d69794a Mon Sep 17 00:00:00 2001
From: nhlsm <nhlsm@naver.com>
Date: Sat, 15 Aug 2020 02:21:23 +0900
Subject: [PATCH 12/12] Merge pull request #18080 from
 nhlsm:improve-mat-operator-assign-scalar

* improve Mat::operator=(Scalar)

* touch

* remove trailing whitespace

* TEST: check if old code pass test or not

* remove CV_Error

* remove warning

* fix: is -> Scalar

* 1) Mat *mat -> Mat &mat 2) return bool, add output param

* add comment
---
 modules/core/src/copy.cpp | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 48440ef265..7f4329df78 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -414,6 +414,29 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const
         copymask(ptrs[0], 0, ptrs[2], 0, ptrs[1], 0, sz, &esz);
 }
 
+
+static bool can_apply_memset(const Mat &mat, const Scalar &s, int &fill_value)
+{
+    // check if depth is 1 byte.
+    switch (mat.depth())
+    {
+    case CV_8U: fill_value = saturate_cast<uchar>( s.val[0] ); break;
+    case CV_8S: fill_value = saturate_cast<schar>( s.val[0] ); break;
+    default: return false;
+    }
+
+    // check if all element is same.
+    const int64* is = (const int64*)&s.val[0];
+    switch (mat.channels())
+    {
+    case 1: return true;
+    case 2: return (is[0] == is[1]);
+    case 3: return (is[0] == is[1] && is[1] == is[2]);
+    case 4: return (is[0] == is[1] && is[1] == is[2] && is[2] == is[3]);
+    default: return false;
+    }
+}
+
 Mat& Mat::operator = (const Scalar& s)
 {
     CV_INSTRUMENT_REGION();
@@ -434,6 +457,14 @@ Mat& Mat::operator = (const Scalar& s)
     }
     else
     {
+        int fill_value = 0;
+        if ( can_apply_memset(*this, s, fill_value) )
+        {
+            for (size_t i = 0; i < it.nplanes; i++, ++it)
+                memset(dptr, fill_value, elsize);
+            return *this;
+        }
+
         if( it.nplanes > 0 )
         {
             double scalar[12];