From 2241bfb0dfa788e0b516a7da9a1f70733f5431d3 Mon Sep 17 00:00:00 2001 From: Namgoo Lee Date: Thu, 30 Jul 2020 01:03:34 +0900 Subject: [PATCH 01/12] Use "src" not "*this" for source GpuMat --- modules/core/src/cuda/gpu_mat.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu index e1b0c1b22d..f31f78a87a 100644 --- a/modules/core/src/cuda/gpu_mat.cu +++ b/modules/core/src/cuda/gpu_mat.cu @@ -561,7 +561,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) co {convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, 0} }; - funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream); + funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), stream); } void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const @@ -591,7 +591,7 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale} }; - funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream); + funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), alpha, beta, stream); } void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream) From 11ac26bfb43ad6e10c2bd50bbba0489e2bc26a79 Mon Sep 17 00:00:00 2001 From: Namgoo Lee Date: Thu, 30 Jul 2020 01:24:25 +0900 Subject: [PATCH 02/12] test code --- modules/cudaarithm/test/test_gpumat.cpp | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/modules/cudaarithm/test/test_gpumat.cpp b/modules/cudaarithm/test/test_gpumat.cpp index e2fed16ad5..b4d59b1644 100644 --- a/modules/cudaarithm/test/test_gpumat.cpp +++ b/modules/cudaarithm/test/test_gpumat.cpp @@ -320,6 +320,65 @@ CUDA_TEST_P(GpuMat_ConvertTo, WithScaling) } } +CUDA_TEST_P(GpuMat_ConvertTo, InplaceWithOutScaling) +{ + cv::Mat src = randomMat(size, depth1); + + if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE)) + { + try + { + cv::cuda::GpuMat d_srcDst = loadMat(src); + d_srcDst.convertTo(d_srcDst, depth2); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::cuda::GpuMat d_srcDst = loadMat(src, useRoi); + d_srcDst.convertTo(d_srcDst, depth2); + + cv::Mat dst_gold; + src.convertTo(dst_gold, depth2); + + EXPECT_MAT_NEAR(dst_gold, d_srcDst, depth2 < CV_32F ? 1.0 : 1e-4); + } +} + + +CUDA_TEST_P(GpuMat_ConvertTo, InplaceWithScaling) +{ + cv::Mat src = randomMat(size, depth1); + double a = randomDouble(0.0, 1.0); + double b = randomDouble(-10.0, 10.0); + + if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE)) + { + try + { + cv::cuda::GpuMat d_srcDst = loadMat(src); + d_srcDst.convertTo(d_srcDst, depth2, a, b); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::cuda::GpuMat d_srcDst = loadMat(src, useRoi); + d_srcDst.convertTo(d_srcDst, depth2, a, b); + + cv::Mat dst_gold; + src.convertTo(dst_gold, depth2, a, b); + + EXPECT_MAT_NEAR(dst_gold, d_srcDst, depth2 < CV_32F ? 1.0 : 1e-4); + } +} + INSTANTIATE_TEST_CASE_P(CUDA, GpuMat_ConvertTo, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, From 6226ea00859eb49b8b83ea80db42aa6a2f58fa9a Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Thu, 6 Aug 2020 15:47:34 +0300 Subject: [PATCH 03/12] Fix bug in ONNX Gather op --- modules/dnn/src/onnx/onnx_importer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 407dcdc570..f6dc285fad 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1395,6 +1395,7 @@ void ONNXImporter::populateNet(Net dstNet) inpShape.erase(inpShape.begin() + axis); layerParams.type = "Reshape"; + layerParams.set("axis", 0); layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); node_proto.set_input(0, sliceLp.name); } From fe9ff64d641be19ce38fd3c36489615704b339c2 Mon Sep 17 00:00:00 2001 From: pemmanuelviel Date: Mon, 10 Aug 2020 15:26:40 +0200 Subject: [PATCH 04/12] Merge pull request #17643 from pemmanuelviel:pev--new-flann-demo * Add a FLANN example showing how to search a query image in a dataset * Clean: remove warning * Replace dependency to boost::filesystem by calls to core/utils/filesystem * Wait for escape key to exit * Add an example of binary descriptors support * Add program options for saving and loading the flann structure * Fix warnings on Win64 * Fix warnings on 3.4 branch still relying on C++03 * Add ctor to img_info structure * Comments modification * * Demo file of FLANN moved and renamed * Fix distances type when using binary vectors in the FLANN example * Rename FLANN example file * Remove dependency of the flann example to opencv_contrib's SURF. * Remove mention of FLANN and other descriptors that aimed at giving hint on the other options * Cleaner program options management * Make waitKey usage minimal in FLANN example * Fix the conditions order * Use cv::Ptr --- samples/cpp/flann_search_dataset.cpp | 250 +++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 samples/cpp/flann_search_dataset.cpp diff --git a/samples/cpp/flann_search_dataset.cpp b/samples/cpp/flann_search_dataset.cpp new file mode 100644 index 0000000000..01ef93f821 --- /dev/null +++ b/samples/cpp/flann_search_dataset.cpp @@ -0,0 +1,250 @@ +// flann_search_dataset.cpp +// Naive program to search a query picture in a dataset illustrating usage of FLANN + +#include +#include +#include "opencv2/core.hpp" +#include "opencv2/core/utils/filesystem.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/features2d.hpp" +#include "opencv2/flann.hpp" + +using namespace cv; +using std::cout; +using std::endl; + +#define _ORB_ + +const char* keys = + "{ help h | | Print help message. }" + "{ dataset | | Path to the images folder used as dataset. }" + "{ image | | Path to the image to search for in the dataset. }" + "{ save | | Path and filename where to save the flann structure to. }" + "{ load | | Path and filename where to load the flann structure from. }"; + +struct img_info { + int img_index; + unsigned int nbr_of_matches; + + img_info(int _img_index, unsigned int _nbr_of_matches) + : img_index(_img_index) + , nbr_of_matches(_nbr_of_matches) + {} +}; + + +int main( int argc, char* argv[] ) +{ + //-- Test the program options + CommandLineParser parser( argc, argv, keys ); + if (parser.has("help")) + { + parser.printMessage(); + return -1; + } + + const cv::String img_path = parser.get("image"); + Mat img = imread( samples::findFile( img_path ), IMREAD_GRAYSCALE ); + if (img.empty() ) + { + cout << "Could not open the image "<< img_path << endl; + return -1; + } + + const cv::String db_path = parser.get("dataset"); + if (!utils::fs::isDirectory(db_path)) + { + cout << "Dataset folder "<< db_path.c_str() <<" doesn't exist!" << endl; + return -1; + } + + const cv::String load_db_path = parser.get("load"); + if ((load_db_path != String()) && (!utils::fs::exists(load_db_path))) + { + cout << "File " << load_db_path.c_str() + << " where to load the flann structure from doesn't exist!" << endl; + return -1; + } + + const cv::String save_db_path = parser.get("save"); + + //-- Step 1: Detect the keypoints using a detector, compute the descriptors + // in the folder containing the images of the dataset +#ifdef _SIFT_ + int minHessian = 400; + Ptr detector = SIFT::create( minHessian ); +#elif defined(_ORB_) + Ptr detector = ORB::create(); +#else + cout << "Missing or unknown defined descriptor. " + "Only SIFT and ORB are currently interfaced here" << endl; + return -1; +#endif + + std::vector db_keypoints; + Mat db_descriptors; + std::vector db_images_indice_range; //store the range of indices per image + std::vector db_indice_2_image_lut; //match descriptor indice to its image + + db_images_indice_range.push_back(0); + std::vector files; + utils::fs::glob(db_path, cv::String(), files); + for (std::vector::iterator itr = files.begin(); itr != files.end(); ++itr) + { + Mat tmp_img = imread( *itr, IMREAD_GRAYSCALE ); + if (!tmp_img.empty()) + { + std::vector kpts; + Mat descriptors; + detector->detectAndCompute( tmp_img, noArray(), kpts, descriptors ); + + db_keypoints.insert( db_keypoints.end(), kpts.begin(), kpts.end() ); + db_descriptors.push_back( descriptors ); + db_images_indice_range.push_back( db_images_indice_range.back() + + static_cast(kpts.size()) ); + } + } + + //-- Set the LUT + db_indice_2_image_lut.resize( db_images_indice_range.back() ); + const int nbr_of_imgs = static_cast( db_images_indice_range.size()-1 ); + for (int i = 0; i < nbr_of_imgs; ++i) + { + const unsigned int first_indice = db_images_indice_range[i]; + const unsigned int last_indice = db_images_indice_range[i+1]; + std::fill( db_indice_2_image_lut.begin() + first_indice, + db_indice_2_image_lut.begin() + last_indice, + i ); + } + + //-- Step 2: build the structure storing the descriptors +#if defined(_SIFT_) + cv::Ptr > > index; + if (load_db_path != String()) + index = cv::makePtr > >(db_descriptors, + cvflann::SavedIndexParams(load_db_path)); + else + index = cv::makePtr > >(db_descriptors, + cvflann::KDTreeIndexParams(4)); + +#elif defined(_ORB_) + cv::Ptr > > index; + if (load_db_path != String()) + index = cv::makePtr > > + (db_descriptors, cvflann::SavedIndexParams(load_db_path)); + else + index = cv::makePtr > > + (db_descriptors, cvflann::LshIndexParams()); +#else + cout<< "Descriptor not listed. Set the proper FLANN distance for this descriptor" <save(save_db_path); + + + // Return if no query image was set + if (img_path == String()) + return 0; + + //-- Detect the keypoints and compute the descriptors for the query image + std::vector img_keypoints; + Mat img_descriptors; + detector->detectAndCompute( img, noArray(), img_keypoints, img_descriptors ); + + + //-- Step 3: retrieve the descriptors in the dataset matching the ones of the query image + // /!\ knnSearch doesn't follow OpenCV standards by not initialising empty Mat properties + const int knn = 2; + Mat indices(img_descriptors.rows, knn, CV_32S); +#if defined(_SIFT_) +#define DIST_TYPE float + Mat dists(img_descriptors.rows, knn, CV_32F); +#elif defined(_ORB_) +#define DIST_TYPE int + Mat dists(img_descriptors.rows, knn, CV_32S); +#endif + index->knnSearch( img_descriptors, indices, dists, knn, cvflann::SearchParams(32) ); + + //-- Filter matches using the Lowe's ratio test + const float ratio_thresh = 0.7f; + std::vector good_matches; //contains + std::vector matches_per_img_histogram( nbr_of_imgs, 0 ); + for (int i = 0; i < dists.rows; ++i) + { + if (dists.at(i,0) < ratio_thresh * dists.at(i,1)) + { + const int indice_in_db = indices.at(i,0); + DMatch dmatch(i, indice_in_db, db_indice_2_image_lut[indice_in_db], + static_cast(dists.at(i,0))); + good_matches.push_back( dmatch ); + matches_per_img_histogram[ db_indice_2_image_lut[indice_in_db] ]++; + } + } + + + //-- Step 4: find the dataset image with the highest proportion of matches + std::multimap images_infos; + for (int i = 0; i < nbr_of_imgs; ++i) + { + const unsigned int nbr_of_matches = matches_per_img_histogram[i]; + if (nbr_of_matches < 4) //we need at leat 4 points for a homography + continue; + + const unsigned int nbr_of_kpts = db_images_indice_range[i+1] - db_images_indice_range[i]; + const float inverse_proportion_of_retrieved_kpts = + static_cast(nbr_of_kpts) / static_cast(nbr_of_matches); + + img_info info(i, nbr_of_matches); + images_infos.insert( std::pair(inverse_proportion_of_retrieved_kpts, + info) ); + } + + if (images_infos.begin() == images_infos.end()) + { + cout<<"No good match could be found."<first; + float new_matches_proportion = best_matches_proportion; + img_info best_img = images_infos.begin()->second; + + std::multimap::iterator it = images_infos.begin(); + ++it; + while ((it!=images_infos.end()) && (it->first < 1.1*best_matches_proportion)) + { + const float ratio = new_matches_proportion / it->first; + if( it->second.nbr_of_matches * (ratio * ratio) > best_img.nbr_of_matches) + { + new_matches_proportion = it->first; + best_img = it->second; + } + ++it; + } + + //-- Step 5: filter goodmatches that belong to the best image match of the dataset + std::vector filtered_good_matches; + for (std::vector::iterator itr(good_matches.begin()); itr != good_matches.end(); ++itr) + { + if (itr->imgIdx == best_img.img_index) + filtered_good_matches.push_back(*itr); + } + + //-- Retrieve the best image match from the dataset + Mat db_img = imread( files[best_img.img_index], IMREAD_GRAYSCALE ); + + //-- Draw matches + Mat img_matches; + drawMatches( img, img_keypoints, db_img, db_keypoints, filtered_good_matches, img_matches, Scalar::all(-1), + Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); + + //-- Show detected matches + imshow("Good Matches", img_matches ); + waitKey(); + + return 0; +} From 7ec221e73487dde351b9add3ebf33ae607ae14ef Mon Sep 17 00:00:00 2001 From: Elizarov Ilya <48130864+ieliz@users.noreply.github.com> Date: Tue, 11 Aug 2020 11:46:47 +0300 Subject: [PATCH 05/12] Merge pull request #18033 from ieliz:dasiamrpn Improving DaSiamRPN tracker sample * changed layerBlobs in dnn.cpp and added DaSiamRPN tracker * Improving DaSiamRPN tracker sample * Docs fix * Removed outdated changes * Trying to reinitialize tracker without reloading models. Worked with LaSOT-based benchmark with reinit rate=250 frames * Trying to reverse changes * Moving the model in the constructor * Fixing some issues with names * Variable name changed * Reverse parser arguments changes --- samples/dnn/dasiamrpn_tracker.py | 122 +++++++++++++++++-------------- 1 file changed, 66 insertions(+), 56 deletions(-) diff --git a/samples/dnn/dasiamrpn_tracker.py b/samples/dnn/dasiamrpn_tracker.py index df734645db..03e99d6dbf 100644 --- a/samples/dnn/dasiamrpn_tracker.py +++ b/samples/dnn/dasiamrpn_tracker.py @@ -14,8 +14,8 @@ import argparse import sys class DaSiamRPNTracker: - #initialization of used values, initial bounding box, used network - def __init__(self, im, target_pos, target_sz, net, kernel_r1, kernel_cls1): + # Initialization of used values, initial bounding box, used network + def __init__(self, net="dasiamrpn_model.onnx", kernel_r1="dasiamrpn_kernel_r1.onnx", kernel_cls1="dasiamrpn_kernel_cls1.onnx"): self.windowing = "cosine" self.exemplar_size = 127 self.instance_size = 271 @@ -28,42 +28,52 @@ class DaSiamRPNTracker: self.penalty_k = 0.055 self.window_influence = 0.42 self.lr = 0.295 - self.im_h = im.shape[0] - self.im_w = im.shape[1] - self.target_pos = target_pos - self.target_sz = target_sz - self.avg_chans = np.mean(im, axis=(0, 1)) - self.net = net self.score = [] - - if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004: - raise AssertionError("Initializing BB is too small-try to restart tracker with larger BB") - - self.anchor = self.__generate_anchor() - wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz) - hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz) - s_z = round(np.sqrt(wc_z * hc_z)) - - z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z) - z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32) - self.net.setInput(z_crop) - z_f = self.net.forward('63') - kernel_r1.setInput(z_f) - r1 = kernel_r1.forward() - kernel_cls1.setInput(z_f) - cls1 = kernel_cls1.forward() - r1 = r1.reshape(20, 256, 4, 4) - cls1 = cls1.reshape(10, 256 , 4, 4) - self.net.setParam(self.net.getLayerId('65'), 0, r1) - self.net.setParam(self.net.getLayerId('68'), 0, cls1) - if self.windowing == "cosine": self.window = np.outer(np.hanning(self.score_size), np.hanning(self.score_size)) elif self.windowing == "uniform": self.window = np.ones((self.score_size, self.score_size)) self.window = np.tile(self.window.flatten(), self.anchor_num) + # Loading network`s and kernel`s models + self.net = cv.dnn.readNet(net) + self.kernel_r1 = cv.dnn.readNet(kernel_r1) + self.kernel_cls1 = cv.dnn.readNet(kernel_cls1) - #creating anchor for tracking bounding box + def init(self, im, init_bb): + target_pos, target_sz = np.array([init_bb[0], init_bb[1]]), np.array([init_bb[2], init_bb[3]]) + self.im_h = im.shape[0] + self.im_w = im.shape[1] + self.target_pos = target_pos + self.target_sz = target_sz + self.avg_chans = np.mean(im, axis=(0, 1)) + + # When we trying to generate ONNX model from the pre-trained .pth model + # we are using only one state of the network. In our case used state + # with big bounding box, so we were forced to add assertion for + # too small bounding boxes - current state of the network can not + # work properly with such small bounding boxes + if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004: + raise AssertionError( + "Initializing BB is too small-try to restart tracker with larger BB") + + self.anchor = self.__generate_anchor() + wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz) + hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz) + s_z = round(np.sqrt(wc_z * hc_z)) + z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z) + z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32) + self.net.setInput(z_crop) + z_f = self.net.forward('63') + self.kernel_r1.setInput(z_f) + r1 = self.kernel_r1.forward() + self.kernel_cls1.setInput(z_f) + cls1 = self.kernel_cls1.forward() + r1 = r1.reshape(20, 256, 4, 4) + cls1 = cls1.reshape(10, 256 , 4, 4) + self.net.setParam(self.net.getLayerId('65'), 0, r1) + self.net.setParam(self.net.getLayerId('68'), 0, cls1) + + # Сreating anchor for tracking bounding box def __generate_anchor(self): self.anchor = np.zeros((self.anchor_num, 4), dtype = np.float32) size = self.total_stride * self.total_stride @@ -86,8 +96,8 @@ class DaSiamRPNTracker: self.anchor[:, 0], self.anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return self.anchor - #track function - def track(self, im): + # Function for updating tracker state + def update(self, im): wc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz) hc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz) s_z = np.sqrt(wc_z * hc_z) @@ -96,7 +106,7 @@ class DaSiamRPNTracker: pad = d_search / scale_z s_x = round(s_z + 2 * pad) - #region preprocessing + # Region preprocessing part x_crop = self.__get_subwindow_tracking(im, self.instance_size, s_x) x_crop = x_crop.transpose(2, 0, 1).reshape(1, 3, 271, 271).astype(np.float32) self.score = self.__tracker_eval(x_crop, scale_z) @@ -105,7 +115,12 @@ class DaSiamRPNTracker: self.target_sz[0] = max(10, min(self.im_w, self.target_sz[0])) self.target_sz[1] = max(10, min(self.im_h, self.target_sz[1])) - #update bounding box position + cx, cy = self.target_pos + w, h = self.target_sz + updated_bb = (cx, cy, w, h) + return True, updated_bb + + # Function for updating position of the bounding box def __tracker_eval(self, x_crop, scale_z): target_size = self.target_sz * scale_z self.net.setInput(x_crop) @@ -160,7 +175,7 @@ class DaSiamRPNTracker: y = e_x / e_x.sum(axis = 0) return y - #evaluations with cropped image + # Reshaping cropped image for using in the model def __get_subwindow_tracking(self, im, model_size, original_sz): im_sz = im.shape c = (original_sz + 1) / 2 @@ -171,19 +186,20 @@ class DaSiamRPNTracker: left_pad = int(max(0., -context_xmin)) top_pad = int(max(0., -context_ymin)) right_pad = int(max(0., context_xmax - im_sz[1] + 1)) - bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) + bot_pad = int(max(0., context_ymax - im_sz[0] + 1)) context_xmin += left_pad context_xmax += left_pad context_ymin += top_pad context_ymax += top_pad r, c, k = im.shape - if any([top_pad, bottom_pad, left_pad, right_pad]): - te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) + if any([top_pad, bot_pad, left_pad, right_pad]): + te_im = np.zeros(( + r + top_pad + bot_pad, c + left_pad + right_pad, k), np.uint8) te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im if top_pad: te_im[0:top_pad, left_pad:left_pad + c, :] = self.avg_chans - if bottom_pad: + if bot_pad: te_im[r + top_pad:, left_pad:left_pad + c, :] = self.avg_chans if left_pad: te_im[:, 0:left_pad, :] = self.avg_chans @@ -195,23 +211,22 @@ class DaSiamRPNTracker: if not np.array_equal(model_size, original_sz): im_patch_original = cv.resize(im_patch_original, (model_size, model_size)) - return im_patch_original -#function for reading paths, bounding box drawing, showing results +# Sample for using DaSiamRPN tracker def main(): parser = argparse.ArgumentParser(description="Run tracker") + parser.add_argument("--input", type=str, help="Full path to input (empty for camera)") parser.add_argument("--net", type=str, default="dasiamrpn_model.onnx", help="Full path to onnx model of net") parser.add_argument("--kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Full path to onnx model of kernel_r1") parser.add_argument("--kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Full path to onnx model of kernel_cls1") - parser.add_argument("--input", type=str, help="Full path to input. Do not use if input is camera") args = parser.parse_args() point1 = () point2 = () mark = True drawing = False cx, cy, w, h = 0.0, 0.0, 0, 0 - + # Fucntion for drawing during videostream def get_bb(event, x, y, flag, param): nonlocal point1, point2, cx, cy, w, h, drawing, mark @@ -233,12 +248,7 @@ def main(): h = abs(point1[1] - point2[1]) mark = False - #loading network`s and kernel`s models - net = cv.dnn.readNet(args.net) - kernel_r1 = cv.dnn.readNet(args.kernel_r1) - kernel_cls1 = cv.dnn.readNet(args.kernel_cls1) - - #initializing bounding box + # Creating window for visualization cap = cv.VideoCapture(args.input if args.input else 0) cv.namedWindow("DaSiamRPN") cv.setMouseCallback("DaSiamRPN", get_bb) @@ -257,17 +267,17 @@ def main(): cv.imshow("DaSiamRPN", twin) cv.waitKey(40) - target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) - tracker = DaSiamRPNTracker(frame, target_pos, target_sz, net, kernel_r1, kernel_cls1) + init_bb = (cx, cy, w, h) + tracker = DaSiamRPNTracker(args.net, args.kernel_r1, args.kernel_cls1) + tracker.init(frame, init_bb) - #tracking loop + # Tracking loop while cap.isOpened(): has_frame, frame = cap.read() if not has_frame: sys.exit(0) - tracker.track(frame) - w, h = tracker.target_sz - cx, cy = tracker.target_pos + _, new_bb = tracker.update(frame) + cx, cy, w, h = new_bb cv.rectangle(frame, (int(cx - w // 2), int(cy - h // 2)), (int(cx - w // 2) + int(w), int(cy - h // 2) + int(h)),(0, 255, 255), 3) cv.imshow("DaSiamRPN", frame) key = cv.waitKey(1) From 98de57c6c459765a085f50c06982a1cb60b36505 Mon Sep 17 00:00:00 2001 From: Pierre-Emmanuel Viel Date: Fri, 26 Jun 2020 23:08:04 +0200 Subject: [PATCH 06/12] Refactoring to prepare for other vector types while mutualizing some methods --- .../include/opencv2/flann/kmeans_index.h | 781 ++++++++++-------- 1 file changed, 414 insertions(+), 367 deletions(-) diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h index a823986e09..98ec68a87b 100644 --- a/modules/flann/include/opencv2/flann/kmeans_index.h +++ b/modules/flann/include/opencv2/flann/kmeans_index.h @@ -463,14 +463,10 @@ public: root_[i] = pool_.allocate(); std::memset(root_[i], 0, sizeof(KMeansNode)); - if(is_kdtree_distance::val || is_vector_space_distance::val) { - computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_); - computeClustering(root_[i], indices_[i], (int)size_, branching_,0); - } - else { - computeBitfieldNodeStatistics(root_[i], indices_[i], (unsigned int)size_); - computeBitfieldClustering(root_[i], indices_[i], (int)size_, branching_,0); - } + Distance* dummy = NULL; + computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_, dummy); + + computeClustering(root_[i], indices_[i], (int)size_, branching_,0); } } @@ -829,6 +825,413 @@ private: } + template + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const DistType* identifier) + { + (void)identifier; + computeNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::HammingLUT* identifier) + { + (void)identifier; + computeBitfieldNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::Hamming* identifier) + { + (void)identifier; + computeBitfieldNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::Hamming2* identifier) + { + (void)identifier; + computeBitfieldNodeStatistics(node, indices, indices_length); + } + + + void refineClustering(int* indices, int indices_length, int branching, CentersType** centers, + std::vector& radiuses, int* belongs_to, int* count) + { + cv::AutoBuffer dcenters_buf(branching*veclen_); + Matrix dcenters(dcenters_buf.data(), branching, veclen_); + + bool converged = false; + int iteration = 0; + while (!converged && iteration new_centroids(indices_length); + std::vector sq_dists(indices_length); + + // reassign points to clusters + KMeansDistanceComputer > invoker( + distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); + parallel_for_(cv::Range(0, (int)indices_length), invoker); + + for (int i=0; i < (int)indices_length; ++i) { + DistanceType sq_dist(sq_dists[i]); + int new_centroid(new_centroids[i]); + if (sq_dist > radiuses[new_centroid]) { + radiuses[new_centroid] = sq_dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + + for (int i=0; i& radiuses, int* belongs_to, int* count) + { + for (int i=0; i( + veclen_*sizeof(ElementType)*BITS_PER_CHAR); + cv::AutoBuffer dcenters_buf(branching*accumulator_veclen); + Matrix dcenters(dcenters_buf.data(), branching, accumulator_veclen); + + bool converged = false; + int iteration = 0; + while (!converged && iteration>1) & 0x01; + dcenter[k+2] += (vec[l]>>2) & 0x01; + dcenter[k+3] += (vec[l]>>3) & 0x01; + dcenter[k+4] += (vec[l]>>4) & 0x01; + dcenter[k+5] += (vec[l]>>5) & 0x01; + dcenter[k+6] += (vec[l]>>6) & 0x01; + dcenter[k+7] += (vec[l]>>7) & 0x01; + } + } + for (int i=0; i(count[i]); + unsigned int* dcenter = dcenters[i]; + unsigned char* charCenter = (unsigned char*)centers[i]; + for (size_t k=0, l=0; k( + (((int)(0.5 + (double)(dcenter[k]) / cnt))) + | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1) + | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2) + | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3) + | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4) + | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5) + | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6) + | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7)); + } + } + + std::vector new_centroids(indices_length); + std::vector dists(indices_length); + + // reassign points to clusters + KMeansDistanceComputer invoker( + distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists); + parallel_for_(cv::Range(0, (int)indices_length), invoker); + + for (int i=0; i < indices_length; ++i) { + DistanceType dist(dists[i]); + int new_centroid(new_centroids[i]); + if (dist > radiuses[new_centroid]) { + radiuses[new_centroid] = dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + + for (int i=0; i& radiuses, int* belongs_to, int* count) + { + // compute kmeans clustering for each of the resulting clusters + node->childs = pool_.allocate(branching); + int start = 0; + int end = start; + for (int c=0; c(), veclen_); + variance += d; + mean_radius += static_cast( sqrt(d) ); + std::swap(indices[i],indices[end]); + std::swap(belongs_to[i],belongs_to[end]); + end++; + } + } + variance /= s; + mean_radius /= s; + variance -= distance_(centers[c], ZeroIterator(), veclen_); + + node->childs[c] = pool_.allocate(); + std::memset(node->childs[c], 0, sizeof(KMeansNode)); + node->childs[c]->radius = radiuses[c]; + node->childs[c]->pivot = centers[c]; + node->childs[c]->variance = variance; + node->childs[c]->mean_radius = mean_radius; + computeClustering(node->childs[c],indices+start, end-start, branching, level+1); + start=end; + } + } + + + void computeAnyBitfieldSubClustering(KMeansNodePtr node, int* indices, int indices_length, + int branching, int level, CentersType** centers, + std::vector& radiuses, int* belongs_to, int* count) + { + // compute kmeans clustering for each of the resulting clusters + node->childs = pool_.allocate(branching); + int start = 0; + int end = start; + for (int c=0; c(), veclen_); + variance += static_cast( ensureSquareDistance(d) ); + mean_radius += ensureSimpleDistance(d); + std::swap(indices[i],indices[end]); + std::swap(belongs_to[i],belongs_to[end]); + end++; + } + } + mean_radius = static_cast( + 0.5f + static_cast(mean_radius) / static_cast(s)); + variance = static_cast( + 0.5 + static_cast(variance) / static_cast(s)); + variance -= static_cast( + ensureSquareDistance( + distance_(centers[c], ZeroIterator(), veclen_))); + + node->childs[c] = pool_.allocate(); + std::memset(node->childs[c], 0, sizeof(KMeansNode)); + node->childs[c]->radius = radiuses[c]; + node->childs[c]->pivot = centers[c]; + node->childs[c]->variance = static_cast(variance); + node->childs[c]->mean_radius = mean_radius; + computeClustering(node->childs[c],indices+start, end-start, branching, level+1); + start=end; + } + } + + + template + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const DistType* identifier) + { + (void)identifier; + refineClustering(indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + /** + * The methods responsible with doing the recursive hierarchical clustering on + * binary vectors. + * As some might have heared that KMeans on binary data doesn't make sense, + * it's worth a little explanation why it actually fairly works. As + * with the Hierarchical Clustering algortihm, we seed several centers for the + * current node by picking some of its points. Then in a first pass each point + * of the node is then related to its closest center. Now let's have a look at + * the 5 central dimensions of the 9 following points: + * + * xxxxxx11100xxxxx (1) + * xxxxxx11010xxxxx (2) + * xxxxxx11001xxxxx (3) + * xxxxxx10110xxxxx (4) + * xxxxxx10101xxxxx (5) + * xxxxxx10011xxxxx (6) + * xxxxxx01110xxxxx (7) + * xxxxxx01101xxxxx (8) + * xxxxxx01011xxxxx (9) + * sum _____ + * of 1: 66555 + * + * Even if the barycenter notion doesn't apply, we can set a center + * xxxxxx11111xxxxx that will better fit the five dimensions we are focusing + * on for these points. + * + * Note that convergence isn't ensured anymore. In practice, using Gonzales + * as seeding algorithm should be fine for getting convergence ("iterations" + * value can be set to -1). But with KMeans++ seeding you should definitely + * set a maximum number of iterations (but make it higher than the "iterations" + * default value of 11). + * + * Params: + * node = the node to cluster + * indices = indices of the points belonging to the current node + * indices_length = number of points in the current node + * branching = the branching factor to use in the clustering + * level = 0 for the root node, it increases with the subdivision levels + * centers = clusters centers to compute + * radiuses = radiuses of clusters + * belongs_to = LookUp Table returning, for a given indice id, the center id it belongs to + * count = array storing the number of indices for a given center id + * identifier = dummy pointer on an instance of Distance (use to branch correctly among templates) + */ + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::HammingLUT* identifier) + { + (void)identifier; + refineBitfieldClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::Hamming* identifier) + { + (void)identifier; + refineBitfieldClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::Hamming2* identifier) + { + (void)identifier; + refineBitfieldClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + /** * The method responsible with actually doing the recursive hierarchical @@ -893,372 +1296,16 @@ private: count[belongs_to[i]]++; } - cv::AutoBuffer dcenters_buf(branching*veclen_); - Matrix dcenters(dcenters_buf.data(), branching, veclen_); - for (int i=0; i new_centroids(indices_length); - std::vector sq_dists(indices_length); - - // reassign points to clusters - KMeansDistanceComputer > invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists); - parallel_for_(cv::Range(0, (int)indices_length), invoker); - - for (int i=0; i < (int)indices_length; ++i) { - DistanceType sq_dist(sq_dists[i]); - int new_centroid(new_centroids[i]); - if (sq_dist > radiuses[new_centroid]) { - radiuses[new_centroid] = sq_dist; - } - if (new_centroid != belongs_to[i]) { - count[belongs_to[i]]--; - count[new_centroid]++; - belongs_to[i] = new_centroid; - converged = false; - } - } - - for (int i=0; ichilds = pool_.allocate(branching); - int start = 0; - int end = start; - for (int c=0; c(), veclen_); - variance += d; - mean_radius += static_cast( sqrt(d) ); - std::swap(indices[i],indices[end]); - std::swap(belongs_to[i],belongs_to[end]); - end++; - } - } - variance /= s; - mean_radius /= s; - variance -= distance_(centers[c], ZeroIterator(), veclen_); - - node->childs[c] = pool_.allocate(); - std::memset(node->childs[c], 0, sizeof(KMeansNode)); - node->childs[c]->radius = radiuses[c]; - node->childs[c]->pivot = centers[c]; - node->childs[c]->variance = variance; - node->childs[c]->mean_radius = mean_radius; - computeClustering(node->childs[c],indices+start, end-start, branching, level+1); - start=end; - } + Distance* dummy = NULL; + refineAndSplitClustering(node, indices, indices_length, branching, level, + centers, radiuses, belongs_to, count, dummy); delete[] centers; } - /** - * The method responsible with doing the recursive hierarchical clustering on - * binary vectors. - * As some might have heared that KMeans on binary data doesn't make sense, - * it's worth a little explanation why it actually fairly works. As - * with the Hierarchical Clustering algortihm, we seed several centers for the - * current node by picking some of its points. Then in a first pass each point - * of the node is then related to its closest center. Now let's have a look at - * the 5 central dimensions of the 9 following points: - * - * xxxxxx11100xxxxx (1) - * xxxxxx11010xxxxx (2) - * xxxxxx11001xxxxx (3) - * xxxxxx10110xxxxx (4) - * xxxxxx10101xxxxx (5) - * xxxxxx10011xxxxx (6) - * xxxxxx01110xxxxx (7) - * xxxxxx01101xxxxx (8) - * xxxxxx01011xxxxx (9) - * sum _____ - * of 1: 66555 - * - * Even if the barycenter notion doesn't apply, we can set a center - * xxxxxx11111xxxxx that will better fit the five dimensions we are focusing - * on for these points. - * - * Note that convergence isn't ensured anymore. In practice, using Gonzales - * as seeding algorithm should be fine for getting convergence ("iterations" - * value can be set to -1). But with KMeans++ seeding you should definitely - * set a maximum number of iterations (but make it higher than the "iterations" - * default value of 11). - * - * Params: - * node = the node to cluster - * indices = indices of the points belonging to the current node - * indices_length = number of points in the current node - * branching = the branching factor to use in the clustering - * level = 0 for the root node, it increases with the subdivision levels - */ - void computeBitfieldClustering(KMeansNodePtr node, int* indices, - int indices_length, int branching, int level) - { - node->size = indices_length; - node->level = level; - - if (indices_length < branching) { - node->indices = indices; - std::sort(node->indices,node->indices+indices_length); - node->childs = NULL; - return; - } - - cv::AutoBuffer centers_idx_buf(branching); - int* centers_idx = centers_idx_buf.data(); - int centers_length; - (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length); - - if (centers_lengthindices = indices; - std::sort(node->indices,node->indices+indices_length); - node->childs = NULL; - return; - } - - const unsigned int accumulator_veclen = static_cast( - veclen_*sizeof(ElementType)*BITS_PER_CHAR); - cv::AutoBuffer dcenters_buf(branching*accumulator_veclen); - Matrix dcenters(dcenters_buf.data(), branching, accumulator_veclen); - - CentersType** centers = new CentersType*[branching]; - - for (int i=0; i radiuses(branching); - cv::AutoBuffer count_buf(branching); - int* count = count_buf.data(); - for (int i=0; i belongs_to_buf(indices_length); - int* belongs_to = belongs_to_buf.data(); - for (int i=0; inew_dist) { - belongs_to[i] = j; - dist = new_dist; - } - } - if (dist>radiuses[belongs_to[i]]) { - radiuses[belongs_to[i]] = dist; - } - count[belongs_to[i]]++; - } - - bool converged = false; - int iteration = 0; - while (!converged && iteration>1) & 0x01; - dcenter[k+2] += (vec[l]>>2) & 0x01; - dcenter[k+3] += (vec[l]>>3) & 0x01; - dcenter[k+4] += (vec[l]>>4) & 0x01; - dcenter[k+5] += (vec[l]>>5) & 0x01; - dcenter[k+6] += (vec[l]>>6) & 0x01; - dcenter[k+7] += (vec[l]>>7) & 0x01; - } - } - for (int i=0; i(count[i]); - unsigned int* dcenter = dcenters[i]; - unsigned char* charCenter = (unsigned char*)centers[i]; - for (size_t k=0, l=0; k( - (((int)(0.5 + (double)(dcenter[k]) / cnt))) - | (((int)(0.5 + (double)(dcenter[k+1]) / cnt))<<1) - | (((int)(0.5 + (double)(dcenter[k+2]) / cnt))<<2) - | (((int)(0.5 + (double)(dcenter[k+3]) / cnt))<<3) - | (((int)(0.5 + (double)(dcenter[k+4]) / cnt))<<4) - | (((int)(0.5 + (double)(dcenter[k+5]) / cnt))<<5) - | (((int)(0.5 + (double)(dcenter[k+6]) / cnt))<<6) - | (((int)(0.5 + (double)(dcenter[k+7]) / cnt))<<7)); - } - } - - std::vector new_centroids(indices_length); - std::vector dists(indices_length); - - // reassign points to clusters - KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists); - parallel_for_(cv::Range(0, (int)indices_length), invoker); - - for (int i=0; i < indices_length; ++i) { - DistanceType dist(dists[i]); - int new_centroid(new_centroids[i]); - if (dist > radiuses[new_centroid]) { - radiuses[new_centroid] = dist; - } - if (new_centroid != belongs_to[i]) { - count[belongs_to[i]]--; - count[new_centroid]++; - belongs_to[i] = new_centroid; - converged = false; - } - } - - for (int i=0; ichilds = pool_.allocate(branching); - int start = 0; - int end = start; - for (int c=0; c(), veclen_); - variance += static_cast( ensureSquareDistance(d) ); - mean_radius += ensureSimpleDistance(d); - std::swap(indices[i],indices[end]); - std::swap(belongs_to[i],belongs_to[end]); - end++; - } - } - mean_radius = static_cast( - 0.5f + static_cast(mean_radius) / static_cast(s)); - variance = static_cast( - 0.5 + static_cast(variance) / static_cast(s)); - variance -= static_cast( - ensureSquareDistance( - distance_(centers[c], ZeroIterator(), veclen_))); - - node->childs[c] = pool_.allocate(); - std::memset(node->childs[c], 0, sizeof(KMeansNode)); - node->childs[c]->radius = radiuses[c]; - node->childs[c]->pivot = centers[c]; - node->childs[c]->variance = static_cast(variance); - node->childs[c]->mean_radius = mean_radius; - computeBitfieldClustering(node->childs[c],indices+start, end-start, branching, level+1); - start=end; - } - - delete[] centers; - } - - - - /** * Performs one descent in the hierarchical k-means tree. The branches not * visited are stored in a priority queue. From f3cebb3e1bac73e8bdddd3de96049a46122a0214 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 12 Aug 2020 17:32:16 +0300 Subject: [PATCH 07/12] Merge pull request #18077 from l-bat:reduce_sum * Supported ReduceSum op * Skip test --- modules/dnn/src/layers/pooling_layer.cpp | 26 +++++++++++--- modules/dnn/src/tensorflow/tf_importer.cpp | 41 +++++++++++++++++++--- modules/dnn/test/test_darknet_importer.cpp | 2 ++ modules/dnn/test/test_tf_importer.cpp | 12 +++++++ 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 3f2a0f7d03..fd08fdbeb3 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -98,6 +98,8 @@ public: type = AVE; else if (pool == "stochastic") type = STOCHASTIC; + else if (pool == "sum") + type = SUM; else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); @@ -195,7 +197,7 @@ public: return type == MAX || type == AVE; } else - return type != STOCHASTIC; + return type != STOCHASTIC && type != SUM; } #endif if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) @@ -288,7 +290,7 @@ public: maxPooling(inputs[0], outputs[0], mask); break; } - case AVE: + case AVE: case SUM: CV_Assert_N(inputs.size() == 1, outputs.size() == 1); avePooling(inputs[0], outputs[0]); break; @@ -366,7 +368,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { - CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE)) || inputs.size() == 2, nodes.size() == inputs.size()); + CV_Assert_N((inputs.size() == 1 && (type == MAX || type == AVE || type == SUM)) || inputs.size() == 2, nodes.size() == inputs.size()); auto& ieInpNode = nodes[0].dynamicCast()->node; ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT; @@ -381,6 +383,19 @@ virtual Ptr initNgraph(const std::vector >& inp exclude_pad, rounding_type, pad_type); return Ptr(new InfEngineNgraphNode(ave_pool)); } + else if (type == SUM) { + ngraph::Shape inpShape = ieInpNode->get_shape(); + CV_Assert(inpShape.size() == 2 + kernel_size.size()); + std::vector axes; + for (size_t i = 0; i < kernel_size.size(); i++) + { + if (inpShape[2 + i] == kernel_size[i]) + axes.push_back(2 + i); + } + auto reduction_axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes.size()}, axes); + auto reduce_sum = std::make_shared(ieInpNode, reduction_axes, true); + return Ptr(new InfEngineNgraphNode(reduce_sum)); + } else if (type == MAX) { auto max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), @@ -739,7 +754,7 @@ virtual Ptr initNgraph(const std::vector >& inp } } } - else if (poolingType == AVE) + else if (poolingType == AVE || poolingType == SUM) { for( ; x0 < x1; ++x0) { @@ -750,7 +765,7 @@ virtual Ptr initNgraph(const std::vector >& inp xend = min(xend, inp_width); float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta * ddelta : ((dend - dstart) * (yend - ystart) * (xend - xstart)); - inv_kernel_area = 1.0 / inv_kernel_area; + inv_kernel_area = poolingType == AVE ? 1.0 / inv_kernel_area : 1.0; #if CV_SIMD128 if( isPool2D && xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width ) { @@ -1095,6 +1110,7 @@ private: MAX, AVE, STOCHASTIC, + SUM, ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf }; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index c005c99b58..9083a4d4f9 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2067,7 +2067,7 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); } - else if (type == "Mean") + else if (type == "Mean" || type == "Sum") { // Computes the mean of elements across dimensions of a tensor. // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, @@ -2116,7 +2116,7 @@ void TFImporter::populateNet(Net dstNet) LayerParams avgLp; std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); - avgLp.set("pool", "ave"); + avgLp.set("pool", type == "Mean" ? "ave" : "sum"); // pooling kernel H x 1 avgLp.set("global_pooling_h", true); avgLp.set("kernel_w", 1); @@ -2153,11 +2153,44 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; connect(layer_id, dstNet, Pin(avgName), id, 0); connect(layer_id, dstNet, Pin(layerShapeName), id, 1); + } else if (indices.total() == 1) { + int axis = toNCHW(indices.at(0)); + if (axis == 2 || axis == 3) + { + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); + layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + + if (!keepDims) + { + // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + permLP.set("order", DictValue::arrayInt(order, 4)); + std::string permName = name + "/nchw"; + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, Pin(name), permId, 0); + + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(permName), squeezeId, 0); + } + } } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) - CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation."); + CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); - layerParams.set("pool", "ave"); + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); layerParams.set("global_pooling", true); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 45edf405ac..4986e8e399 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -755,6 +755,8 @@ TEST_P(Test_Darknet_layers, connected) TEST_P(Test_Darknet_layers, relu) { + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); testDarknetLayer("relu"); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index d95c46b5d3..68b720a375 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -128,6 +128,13 @@ TEST_P(Test_TensorFlow_layers, reduce_mean) runTensorFlowNet("global_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_sum) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + runTensorFlowNet("sum_pool_by_axis"); +} + TEST_P(Test_TensorFlow_layers, conv_single_conv) { runTensorFlowNet("single_conv"); @@ -340,6 +347,11 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_mean) runTensorFlowNet("reduce_mean"); // an average pooling over all spatial dimensions. } +TEST_P(Test_TensorFlow_layers, pooling_reduce_sum) +{ + runTensorFlowNet("reduce_sum"); // a SUM pooling over all spatial dimensions. +} + TEST_P(Test_TensorFlow_layers, max_pool_grad) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) From 2171cae8ff3da3693f87274437c0fc7b48293ed0 Mon Sep 17 00:00:00 2001 From: Yashas Samaga B L Date: Thu, 13 Aug 2020 16:25:41 +0530 Subject: [PATCH 08/12] Merge pull request #17976 from YashasSamaga:dnn-fusion-tests-fix-ocl dnn: add exhaustive fusion tests, enable more eltwise fusions * add eltwise fusion tests, enable more eltwise fusions * merge weighted eltwise tests with eltwise tests --- modules/dnn/src/dnn.cpp | 2 +- modules/dnn/test/test_layers.cpp | 432 +++++++++++++++++++++++++++++++ 2 files changed, 433 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index f650a71fc2..8947791061 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2458,7 +2458,7 @@ struct Net::Impl : public detail::NetImplBase if( nextData ) nextActivLayer = nextData->layerInstance.dynamicCast(); - if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 && + if( !nextActivLayer.empty() && (!nextData->type.compare("ReLU") || !nextData->type.compare("ChannelsPReLU") || !nextData->type.compare("Power")) && diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 0c4ce11ca5..648e0aaa16 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -2053,4 +2053,436 @@ TEST_P(Layer_Test_BatchNorm, fusion) INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_BatchNorm, dnnBackendsAndTargets()); +class TestLayerFusion : public DNNTestLayer { +public: + static void makeDefaultTestConvolutionLayer(LayerParams& convParams, int in_channels, int num_filters, bool bias_term) + { + const int kernel_h = 3, kernel_w = 3; + const int pad_h = kernel_h / 2, pad_w = kernel_w / 2; + + convParams.set("kernel_h", kernel_h); + convParams.set("kernel_w", kernel_w); + convParams.set("pad_h", pad_h); + convParams.set("pad_w", pad_w); + convParams.set("num_output", num_filters); + convParams.set("bias_term", bias_term); + convParams.type = "Convolution"; + convParams.name = "convolution"; + + float conv_init_magnitude = 1.0f / in_channels / kernel_h / kernel_w; + int weightsShape[] = {num_filters, in_channels, kernel_h, kernel_w}; + Mat weights(4, &weightsShape[0], CV_32F); + randu(weights, -conv_init_magnitude, conv_init_magnitude); + convParams.blobs.push_back(weights); + if (bias_term) + { + Mat bias(1, num_filters, CV_32F); + randu(bias, -1.0f, 1.0f); + convParams.blobs.push_back(bias); + } + } + + static void makeDefaultTestActivationLayer(LayerParams& activationParams, const std::string& type, int in_channels) + { + activationParams.type = type; + activationParams.name = "activation"; + if (activationParams.type == "ReLU") + activationParams.set("negative_slope", 0.1f); + else if (activationParams.type == "Power") + { + activationParams.set("power", 2.0f); + activationParams.set("scale", 0.5f); + activationParams.set("shift", 0.3f); + } + else if (activationParams.type == "ReLU6") + { + activationParams.set("min_value", -1.0f); + activationParams.set("max_value", 1.0f); + } + else if (activationParams.type == "ChannelsPReLU") + { + Mat scales(1, in_channels, CV_32F); + randu(scales, -1.0f, 1.0f); + activationParams.blobs.push_back(scales); + } + } + + static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients) + { + eltwiseParams.type = "Eltwise"; + eltwiseParams.name = "eltwise"; + eltwiseParams.set("operation", op); + if (withCoefficients) + { + float coeff[] = {0.3f, 0.5f}; + eltwiseParams.set("coeff", DictValue::arrayReal(coeff, 2)); + } + } + + static void test(Mat& input, Net& net, Backend backendId, Target targetId, std::vector expectedFusedLayers = std::vector(), double l1 = 0.0, double lInf = 0.0) + { + DNNTestLayer::checkBackend(backendId, targetId); + + net.enableFusion(false); + net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableTarget(DNN_TARGET_CPU); + net.setInput(input); + Mat outputReference = net.forward().clone(); + std::vector refTimings; + net.getPerfProfile(refTimings); + for (int i = 0; i < refTimings.size(); i++) + { + CV_Assert(refTimings[i] != 0.0); + } + + net.enableFusion(true); + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + net.setInput(input); + Mat outputTest = net.forward().clone(); + std::vector testTimings; + net.getPerfProfile(testTimings); + for (int i = 0; i < testTimings.size(); i++) + { + if(std::find(expectedFusedLayers.begin(), expectedFusedLayers.end(), i + 1) != expectedFusedLayers.end()) + { + EXPECT_EQ(testTimings[i], 0.0); + } + else + { + EXPECT_NE(testTimings[i], 0.0); + } + } + + // double ref_max_value, ref_min_value; + // minMaxLoc(outputReference.reshape(1, 1), &ref_min_value, &ref_max_value); + // std::cout << "reference range: " << ref_min_value << ' ' << ref_max_value << std::endl; + + double default_l1, default_lInf; + DNNTestLayer::getDefaultThresholds(backendId, targetId, &default_l1, &default_lInf); + if (l1 == 0.0) + l1 = default_l1; + if (lInf == 0.0) + lInf = default_lInf; + normAssert(outputReference, outputTest, "", l1, lInf); + } + + static testing::internal::ParamGenerator eltwiseOpList() + { + // TODO: automate list generation + return Values("sum", "max", "prod", "div"); + } + + static testing::internal::ParamGenerator activationLayersList() + { + // TODO: automate list generation + return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power"); + } + + static testing::internal::ParamGenerator > dnnBackendsAndTargetsForFusionTests() + { + return dnnBackendsAndTargets(false, false, true, false); // OCV OpenCL + OCV CPU + } +}; + +typedef TestWithParam > > ConvolutionActivationFusion; +TEST_P(ConvolutionActivationFusion, Accuracy) +{ + // input + // | + // ----------------------- + // | convolution | + // ----------------------- + // | + // ----------------------- + // | activation | + // ----------------------- + // | + // output + + const int batch_size = 2, in_channels = 16; + const int in_height = 16, in_width = 16; + int inputShape[] = {batch_size, in_channels, in_height, in_width}; + Mat input(4, &inputShape[0], CV_32F); + randu(input, 1.0f, 2.0f); + + bool bias_term = get<0>(GetParam()); + LayerParams convParams; + TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term); + + std::string actType = get<1>(GetParam()); + LayerParams activationParams; + TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels); + + Backend backendId = get<0>(get<2>(GetParam())); + Target targetId = get<1>(get<2>(GetParam())); + + // bug: https://github.com/opencv/opencv/issues/17964 + if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + + // bug: https://github.com/opencv/opencv/issues/17953 + if (actType == "ChannelsPReLU" && bias_term == false && + backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + { + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + } + + Net net; + int convId = net.addLayer(convParams.name, convParams.type, convParams); + int activId = net.addLayerToPrev(activationParams.name, activationParams.type, activationParams); + net.connect(0, 0, convId, 0); + + std::vector expectedFusedLayers; + if (backendId == DNN_BACKEND_OPENCV) + { + if (targetId == DNN_TARGET_CPU) + expectedFusedLayers.push_back(activId); // all activations are fused + else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + { + if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power") + expectedFusedLayers.push_back(activId); + } + } + + TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers); +} +INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationFusion, Combine( +/* bias */ testing::Bool(), +/* activation */ TestLayerFusion::activationLayersList(), + TestLayerFusion::dnnBackendsAndTargetsForFusionTests() +)); + +typedef TestWithParam > > ConvolutionEltwiseFusion; +TEST_P(ConvolutionEltwiseFusion, Accuracy) +{ + // input + // | + // ------------------------------- + // | | + // | --------------- + // | | convolution | + // | --------------- + // | | + // | ---------------- | + // --------| eltwise op |------- + // ---------------- + // | + // output + + const int batch_size = 2, in_channels = 16; + const int in_height = 16, in_width = 16; + int inputShape[] = {batch_size, in_channels, in_height, in_width}; + Mat input(4, &inputShape[0], CV_32F); + randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div + + bool bias_term = get<0>(GetParam()); + LayerParams convParams; + TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term); + + std::string eltwiseOp = get<1>(GetParam()); + bool weightedEltwise = get<2>(GetParam()); + if (eltwiseOp != "sum" && weightedEltwise) + throw SkipTestException("weighted eltwise not supported"); + LayerParams eltwiseParams; + TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, weightedEltwise); + + Net net; + int convId = net.addLayer(convParams.name, convParams.type, convParams); + int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams); + net.connect(0, 0, convId, 0); + net.connect(convId, 0, eltwiseId, 0); + net.connect(0, 0, eltwiseId, 1); + + Backend backendId = get<0>(get<3>(GetParam())); + Target targetId = get<1>(get<3>(GetParam())); + TestLayerFusion::test(input, net, backendId, targetId); +} +INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseFusion, Combine( +/* bias */ testing::Bool(), +/* eltwise op */ TestLayerFusion::eltwiseOpList(), +/* eltwise weighted */ testing::Bool(), + TestLayerFusion::dnnBackendsAndTargetsForFusionTests() +)); + +typedef TestWithParam > > ConvolutionEltwiseActivationFusion; +TEST_P(ConvolutionEltwiseActivationFusion, Accuracy) +{ + // input + // | + // ------------------------------- + // | | + // | --------------- + // | | convolution | + // | --------------- + // | | + // | ---------------- | + // --------| eltwise op |------- + // ---------------- + // | + // ---------------- + // | activation | + // ---------------- + // | + // output + + const int batch_size = 2, in_channels = 16; + const int in_height = 16, in_width = 16; + int inputShape[] = {batch_size, in_channels, in_height, in_width}; + Mat input(4, &inputShape[0], CV_32F); + randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div + + bool bias_term = get<0>(GetParam()); + LayerParams convParams; + TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term); + + std::string eltwiseOp = get<1>(GetParam()); + bool weightedEltwise = get<2>(GetParam()); + if (eltwiseOp != "sum" && weightedEltwise) + throw SkipTestException("weighted eltwise not supported"); + LayerParams eltwiseParams; + TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, false); + + std::string actType = get<3>(GetParam()); + LayerParams activationParams; + TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels); + + Backend backendId = get<0>(get<4>(GetParam())); + Target targetId = get<1>(get<4>(GetParam())); + + // bug: https://github.com/opencv/opencv/issues/17945 + if (eltwiseOp != "sum" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + + // bug: https://github.com/opencv/opencv/issues/17953 + if (eltwiseOp == "sum" && actType == "ChannelsPReLU" && bias_term == false && + backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + { + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + } + + // bug: https://github.com/opencv/opencv/issues/17964 + if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + + Net net; + int convId = net.addLayer(convParams.name, convParams.type, convParams); + int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams); + int activId = net.addLayer(activationParams.name, activationParams.type, activationParams); + net.connect(0, 0, convId, 0); + net.connect(convId, 0, eltwiseId, 0); + net.connect(0, 0, eltwiseId, 1); + net.connect(eltwiseId, 0, activId, 0); + + std::vector expectedFusedLayers; + if (backendId == DNN_BACKEND_OPENCV) + { + if (targetId == DNN_TARGET_CPU) + expectedFusedLayers.push_back(activId); // activation is fused with eltwise layer + else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + { + if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "Power") + { + expectedFusedLayers.push_back(eltwiseId); + expectedFusedLayers.push_back(activId); + } + } + } + + TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers); +} +INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionEltwiseActivationFusion, Combine( +/* bias */ testing::Bool(), +/* eltwise op */ TestLayerFusion::eltwiseOpList(), +/* eltwise weighted */ testing::Bool(), +/* activation */ TestLayerFusion::activationLayersList(), + TestLayerFusion::dnnBackendsAndTargetsForFusionTests() +)); + +typedef TestWithParam > > ConvolutionActivationEltwiseFusion; +TEST_P(ConvolutionActivationEltwiseFusion, Accuracy) +{ + // input + // | + // ------------------------------- + // | | + // | ---------------- + // | | convolution | + // | ---------------- + // | | + // | ---------------- + // | | activation | + // | ---------------- + // | | + // | ---------------- | + // --------| eltwise sum |------- + // ---------------- + // | + + const int batch_size = 2, in_channels = 16; + const int in_height = 16, in_width = 16; + int inputShape[] = {batch_size, in_channels, in_height, in_width}; + Mat input(4, &inputShape[0], CV_32F); + randu(input, 1.0f, 2.0f); // avoid small values to test eltwise div + + bool bias_term = get<0>(GetParam()); + LayerParams convParams; + TestLayerFusion::makeDefaultTestConvolutionLayer(convParams, in_channels, in_channels, bias_term); + + std::string actType = get<1>(GetParam()); + LayerParams activationParams; + TestLayerFusion::makeDefaultTestActivationLayer(activationParams, actType, in_channels); + + std::string eltwiseOp = get<2>(GetParam()); + bool weightedEltwise = get<3>(GetParam()); + if (eltwiseOp != "sum" && weightedEltwise) + throw SkipTestException("weighted eltwise not supported"); + LayerParams eltwiseParams; + TestLayerFusion::makeDefaultTestEltwiseLayer(eltwiseParams, eltwiseOp, false); + + Backend backendId = get<0>(get<4>(GetParam())); + Target targetId = get<1>(get<4>(GetParam())); + + // bug: https://github.com/opencv/opencv/issues/17964 + if (actType == "Power" && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + + // bug: https://github.com/opencv/opencv/issues/17953 + if (actType == "ChannelsPReLU" && bias_term == false && + backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16)) + { + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + } + + Net net; + int convId = net.addLayer(convParams.name, convParams.type, convParams); + int activId = net.addLayer(activationParams.name, activationParams.type, activationParams); + int eltwiseId = net.addLayer(eltwiseParams.name, eltwiseParams.type, eltwiseParams); + net.connect(0, 0, convId, 0); + net.connect(convId, 0, activId, 0); + net.connect(activId, 0, eltwiseId, 0); + net.connect(0, 0, eltwiseId, 1); + + std::vector expectedFusedLayers; + if (backendId == DNN_BACKEND_OPENCV) + { + if (targetId == DNN_TARGET_CPU) + expectedFusedLayers.push_back(activId); // activation fused with convolution + else if (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + { + if (actType == "ReLU" || actType == "ChannelsPReLU" || actType == "ReLU6" || actType == "TanH" || actType == "Power") + expectedFusedLayers.push_back(activId); // activation fused with convolution + } + } + + TestLayerFusion::test(input, net, backendId, targetId, expectedFusedLayers); +} +INSTANTIATE_TEST_CASE_P(TestLayerFusion, ConvolutionActivationEltwiseFusion, Combine( +/* bias */ testing::Bool(), +/* activation */ TestLayerFusion::activationLayersList(), +/* eltwise op */ TestLayerFusion::eltwiseOpList(), +/* eltwise weighted */ testing::Bool(), + TestLayerFusion::dnnBackendsAndTargetsForFusionTests() +)); + }} // namespace From 00890aecdf6a117ecf2a74632ffe9b7eed6e6606 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 13 Aug 2020 18:33:18 +0000 Subject: [PATCH 09/12] core(ocl): fix ocl::Image2d::isFormatSupported() in case of OPENCV_OPENCL_DEVICE=disabled --- modules/core/src/ocl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index c6b6e2f0f0..62de280812 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -6458,6 +6458,9 @@ struct Image2D::Impl CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!"); cl_context context = (cl_context)Context::getDefault().ptr(); + if (!context) + return false; + // Figure out how many formats are supported by this context. cl_uint numFormats = 0; cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, From 339b963e6b86935788e1338397e9a0d31430fc66 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Wed, 12 Aug 2020 15:03:46 +0300 Subject: [PATCH 10/12] Fix MatMul and Add axes --- modules/dnn/src/onnx/onnx_importer.cpp | 18 ++++++++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 9 +++++++++ 2 files changed, 27 insertions(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index f6dc285fad..e65c7ac3e9 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -641,6 +641,17 @@ void ONNXImporter::populateNet(Net dstNet) { layerParams.type = "Scale"; layerParams.set("bias_term", true); + int axis = 1; + for (int i = 0; i < graph_proto.initializer_size(); i++) + { + opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); + if (tensor_proto.name() == node_proto.input(const_blob_id)) + { + axis = inpShape.size() - tensor_proto.dims_size(); + break; + } + } + layerParams.set("axis", axis); blob = blob.reshape(1, 1); layerParams.blobs.push_back((isSub ? -1 : 1) * blob); } @@ -911,13 +922,20 @@ void ONNXImporter::populateNet(Net dstNet) CV_Assert(node_proto.input_size() == 2); layerParams.type = "InnerProduct"; layerParams.set("bias_term", false); + CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end()); + int firstInpDims = outShapes[node_proto.input(0)].size(); + int secondInpDims; if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) { Mat blob = getBlob(node_proto, constBlobs, 1); + secondInpDims = blob.dims; layerParams.blobs.push_back(blob.t()); layerParams.set("num_output", layerParams.blobs[0].size[0]); + } else { + secondInpDims = outShapes[node_proto.input(1)].size(); } + layerParams.set("axis", firstInpDims - secondInpDims + 1); } else if (layer_type == "Mul" || layer_type == "Div") { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 6a9e68dbc5..a317be71fb 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -404,6 +404,15 @@ TEST_P(Test_ONNX_layers, MatMul) testONNXModels("matmul_4d"); } +TEST_P(Test_ONNX_layers, MatMulAdd) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + testONNXModels("matmul_add"); +} + TEST_P(Test_ONNX_layers, Expand) { testONNXModels("expand_batch"); From ad63d24dbaaa6801d9afc4afdccd84fbc5a68e89 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Fri, 14 Aug 2020 19:49:42 +0300 Subject: [PATCH 11/12] Merge pull request #18096 from l-bat:update_onnx_importer * Added ReduceSum to ONNX importer * Fix comments * Fix Mul --- .../dnn/src/layers/fully_connected_layer.cpp | 1 - .../dnn/src/onnx/onnx_graph_simplifier.cpp | 19 +++ modules/dnn/src/onnx/onnx_importer.cpp | 132 +++++++++++++----- modules/dnn/test/test_onnx_importer.cpp | 6 + 4 files changed, 124 insertions(+), 34 deletions(-) diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 03349253c0..4746403504 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -116,7 +116,6 @@ public: CV_CheckEQ(inputs.size(), (size_t)2, ""); numOutput = inputs[1].back(); cAxis = inputs[0].size() - 1; - CV_CheckEQ(numOutput, inputs[0][cAxis - 1], ""); int dims = inputs[0].size(); CV_CheckEQ(inputs[1].size(), (size_t)dims, ""); CV_CheckGE(dims, 2, ""); diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index 61ef8b7da6..e8b237cab4 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -262,6 +262,24 @@ public: } }; +class ExpandSubgraph : public Subgraph +{ +public: + ExpandSubgraph() + { + int input = addNodeToMatch(""); + int values = addNodeToMatch(""); + int init = addNodeToMatch("ConstantOfShape", values); + int coeff = addNodeToMatch("Constant"); + int mul = addNodeToMatch("Mul", init, coeff); + int shape = addNodeToMatch("Constant"); + int condition = addNodeToMatch("Equal", shape, mul); + int where = addNodeToMatch("Where", condition, init, addNodeToMatch("Constant")); + addNodeToMatch("Expand", input, where); + setFusedNode("Expand", input, shape); + } +}; + class MulCastSubgraph : public Subgraph { public: @@ -459,6 +477,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index e65c7ac3e9..7d37b065ab 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -387,26 +387,42 @@ void ONNXImporter::populateNet(Net dstNet) layerParams.set("ceil_mode", layerParams.has("pad_mode")); layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); } - else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" || layer_type == "ReduceMean") + else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" || + layer_type == "ReduceMean" || layer_type == "ReduceSum") { CV_Assert(node_proto.input_size() == 1); layerParams.type = "Pooling"; - layerParams.set("pool", layer_type == "GlobalMaxPool"? "MAX" : "AVE"); + String pool; + if (layer_type == "GlobalMaxPool") + pool = "MAX"; + else if (layer_type == "ReduceSum") + pool = "SUM"; + else + pool = "AVE"; + layerParams.set("pool", pool); layerParams.set("global_pooling", layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool"); - - if (layer_type == "ReduceMean") + if (layer_type == "ReduceMean" || layer_type == "ReduceSum") { - if (layerParams.get("keepdims") == 0 || !layerParams.has("axes")) - CV_Error(Error::StsNotImplemented, "Unsupported mode of ReduceMean operation."); + if (!layerParams.has("axes")) + CV_Error(Error::StsNotImplemented, "Unsupported mode of " + layer_type + " operation."); MatShape inpShape = outShapes[node_proto.input(0)]; DictValue axes = layerParams.get("axes"); + bool keepdims = layerParams.get("keepdims"); + MatShape targetShape = inpShape; + for (int i = 0; i < axes.size(); i++) { + int axis = clamp(axes.get(i), inpShape.size()); + if (keepdims) { + targetShape[axis] = 1; + } else { + targetShape.erase(targetShape.begin() + axis); + } + } + if (inpShape.size() == 3 && axes.size() <= 2) { - int axis = axes.get(0); + int axis = clamp(axes.get(0), inpShape.size()); CV_CheckNE(axis, 0, ""); - outShapes[layerParams.name] = inpShape; - outShapes[layerParams.name][axis] = 1; LayerParams reshapeLp; reshapeLp.name = layerParams.name + "/reshape"; @@ -426,13 +442,12 @@ void ONNXImporter::populateNet(Net dstNet) avgLp.name = layerParams.name + "/avg"; avgLp.type = "Pooling"; CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); - avgLp.set("pool", "ave"); + avgLp.set("pool", pool); if (axes.size() == 2) { - CV_CheckEQ(axes.get(0), 1, "Unsupported ReduceMean mode"); - CV_CheckEQ(axes.get(1), 2, "Unsupported ReduceMean mode"); + CV_CheckEQ(clamp(axes.get(0), inpShape.size()), 1, ("Unsupported " + layer_type + " mode").c_str()); + CV_CheckEQ(clamp(axes.get(1), inpShape.size()), 2, ("Unsupported " + layer_type + " mode").c_str()); avgLp.set("global_pooling", true); - outShapes[layerParams.name][axes.get(1)] = 1; } else { @@ -443,28 +458,33 @@ void ONNXImporter::populateNet(Net dstNet) node_proto.set_input(0, reshapeLp.name); node_proto.set_output(0, avgLp.name); addLayer(dstNet, avgLp, node_proto, layer_id, outShapes); - - layerParams.type = "Flatten"; - layerParams.set("axis", 0); - layerParams.set("end_axis", 1); - - node_proto.set_input(0, avgLp.name); - node_proto.set_output(0, layerParams.name); } else { if (inpShape.size() != 4 && inpShape.size() != 5) - CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation."); + CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); CV_Assert(axes.size() <= inpShape.size() - 2); std::vector kernel_size(inpShape.size() - 2, 1); for (int i = 0; i < axes.size(); i++) { - int axis = axes.get(i); + int axis = clamp(axes.get(i), inpShape.size()); CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); kernel_size[axis - 2] = inpShape[axis]; } - layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); + LayerParams poolLp = layerParams; + poolLp.name = layerParams.name + "/avg"; + CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); + poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); + + node_proto.set_output(0, poolLp.name); + addLayer(dstNet, poolLp, node_proto, layer_id, outShapes); } + + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); + + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, layerParams.name); } } else if (layer_type == "Slice") @@ -1001,15 +1021,10 @@ void ONNXImporter::populateNet(Net dstNet) { Mat inp0 = getBlob(node_proto, constBlobs, 0); Mat inp1 = getBlob(node_proto, constBlobs, 1); - if (inp0.size != inp1.size) + if (inp0.size != inp1.size && inp1.total() != 1) CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes"); - Mat out; - if (isDiv) - divide(inp0, inp1, out); - else - multiply(inp0, inp1, out); - + Mat out = isDiv ? inp0 / inp1 : inp0.mul(inp1); out = out.reshape(1, inp0.dims, inp0.size); out.dims = inp0.dims; // to workaround dims == 1 addConstant(layerParams.name, out, constBlobs, outShapes); @@ -1180,9 +1195,45 @@ void ONNXImporter::populateNet(Net dstNet) Mat newShapeMat = getBlob(node_proto, constBlobs, 1); MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); - shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - MatShape inpShape = shapeIt->second; + MatShape inpShape; + bool haveVariables = constBlobs.find(node_proto.input(0)) == constBlobs.end(); + if (haveVariables) + { + shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + inpShape = shapeIt->second; + } + else + { + inpShape = shape(getBlob(node_proto, constBlobs, 0)); + } + + String srcName = node_proto.input(0); + // Unsqueeze and repeat along new axis + if (targetShape.size() == inpShape.size() + 1) + { + for (int i = 0; i < targetShape.size(); i++) + { + if (targetShape[i] == -1 && i < inpShape.size()) + targetShape[i] = inpShape[i]; + else if (i < inpShape.size() && targetShape[i] != inpShape[i]) + inpShape.insert(inpShape.begin() + i, 1); + } + if (haveVariables) + { + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(dstNet, reshapeLp, proto, layer_id, outShapes); + srcName = reshapeLp.name; + } + } CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); std::vector broadcast_axes; @@ -1197,6 +1248,19 @@ void ONNXImporter::populateNet(Net dstNet) } } + if (!haveVariables) + { + if (broadcast_axes.size() != 1) + CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); + + Mat input = getBlob(node_proto, constBlobs, 0); + input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); + Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); + output = output.reshape(0, targetShape); + addConstant(layerParams.name, output, constBlobs, outShapes); + continue; + } + if (broadcast_axes.size() == 2 && broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) { @@ -1231,6 +1295,7 @@ void ONNXImporter::populateNet(Net dstNet) CV_Assert(layer_id.find(copyLP.name) == layer_id.end()); input_names.push_back(copyLP.name); + node_proto.set_input(0, srcName); node_proto.set_output(0, copyLP.name); addLayer(dstNet, copyLP, node_proto, layer_id, outShapes); } @@ -1241,6 +1306,7 @@ void ONNXImporter::populateNet(Net dstNet) } layerParams.set("axis", broadcast_axes[0]); layerParams.type = "Concat"; + node_proto.set_output(0, layerParams.name); } else CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index a317be71fb..25efcbb3ca 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -257,6 +257,11 @@ TEST_P(Test_ONNX_layers, ReduceMean) testONNXModels("reduce_mean_axis2"); } +TEST_P(Test_ONNX_layers, ReduceSum) +{ + testONNXModels("reduce_sum"); +} + TEST_P(Test_ONNX_layers, ReduceMean3D) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) @@ -417,6 +422,7 @@ TEST_P(Test_ONNX_layers, Expand) { testONNXModels("expand_batch"); testONNXModels("expand_channels"); + testONNXModels("expand_neg_batch"); } TEST_P(Test_ONNX_layers, ExpandHW) From 68f527267bdeb6179d0b37665b29b4412d69794a Mon Sep 17 00:00:00 2001 From: nhlsm Date: Sat, 15 Aug 2020 02:21:23 +0900 Subject: [PATCH 12/12] Merge pull request #18080 from nhlsm:improve-mat-operator-assign-scalar * improve Mat::operator=(Scalar) * touch * remove trailing whitespace * TEST: check if old code pass test or not * remove CV_Error * remove warning * fix: is -> Scalar * 1) Mat *mat -> Mat &mat 2) return bool, add output param * add comment --- modules/core/src/copy.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 48440ef265..7f4329df78 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -414,6 +414,29 @@ void Mat::copyTo( OutputArray _dst, InputArray _mask ) const copymask(ptrs[0], 0, ptrs[2], 0, ptrs[1], 0, sz, &esz); } + +static bool can_apply_memset(const Mat &mat, const Scalar &s, int &fill_value) +{ + // check if depth is 1 byte. + switch (mat.depth()) + { + case CV_8U: fill_value = saturate_cast( s.val[0] ); break; + case CV_8S: fill_value = saturate_cast( s.val[0] ); break; + default: return false; + } + + // check if all element is same. + const int64* is = (const int64*)&s.val[0]; + switch (mat.channels()) + { + case 1: return true; + case 2: return (is[0] == is[1]); + case 3: return (is[0] == is[1] && is[1] == is[2]); + case 4: return (is[0] == is[1] && is[1] == is[2] && is[2] == is[3]); + default: return false; + } +} + Mat& Mat::operator = (const Scalar& s) { CV_INSTRUMENT_REGION(); @@ -434,6 +457,14 @@ Mat& Mat::operator = (const Scalar& s) } else { + int fill_value = 0; + if ( can_apply_memset(*this, s, fill_value) ) + { + for (size_t i = 0; i < it.nplanes; i++, ++it) + memset(dptr, fill_value, elsize); + return *this; + } + if( it.nplanes > 0 ) { double scalar[12];