diff --git a/.gitignore b/.gitignore index 2ea6d3821e..8f9ef226cc 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ bin/ *.tlog build node_modules +CMakeSettings.json diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake index 7bd42d1ba2..4085afcf31 100644 --- a/3rdparty/libtengine/tengine.cmake +++ b/3rdparty/libtengine/tengine.cmake @@ -22,69 +22,70 @@ # sqfu@openailab.com # -SET(TENGINE_VERSION "tengine-opencv") -SET(OCV_TENGINE_DSTDIRECTORY ${OpenCV_BINARY_DIR}/3rdparty/libtengine) -SET(DEFAULT_OPENCV_TENGINE_SOURCE_PATH ${OCV_TENGINE_DSTDIRECTORY}/Tengine-${TENGINE_VERSION}) +SET(TENGINE_COMMIT_VERSION "2f3cd86217f3530c8e4a82f3ed5af14c7a4e3943") +SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine") +SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}") -IF(EXISTS ${DEFAULT_OPENCV_TENGINE_SOURCE_PATH}) - MESSAGE(STATUS "Tengine is exist already .") +IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}") + MESSAGE(STATUS "Tengine is exist already at: ${OCV_TENGINE_SOURCE_PATH}") SET(Tengine_FOUND ON) - set(BUILD_TENGINE ON) + SET(BUILD_TENGINE ON) ELSE() - SET(OCV_TENGINE_FILENAME "${TENGINE_VERSION}.zip")#name2 + SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name2 SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url2 - SET(tengine_md5sum 9c80d91dc8413911522ec80cde013ae2) #md5sum2 + SET(tengine_md5sum 9124324b6e2b350012e46ae1db4bad7d) #md5sum2 - MESSAGE(STATUS "**** TENGINE DOWNLOAD BEGIN ****") + #MESSAGE(STATUS "**** TENGINE DOWNLOAD BEGIN ****") ocv_download(FILENAME ${OCV_TENGINE_FILENAME} HASH ${tengine_md5sum} URL "${OPENCV_TENGINE_URL}" "$ENV{OPENCV_TENGINE_URL}" "${OCV_TENGINE_URL}" - DESTINATION_DIR ${OCV_TENGINE_DSTDIRECTORY} + DESTINATION_DIR "${OCV_TENGINE_DIR}" ID TENGINE STATUS res UNPACK RELATIVE_URL) if (NOT res) - MESSAGE(STATUS "TENGINE DOWNLOAD FAILED .Turning Tengine_FOUND off.") + MESSAGE(STATUS "TENGINE DOWNLOAD FAILED. Turning Tengine_FOUND off.") SET(Tengine_FOUND OFF) else () MESSAGE(STATUS "TENGINE DOWNLOAD success . ") SET(Tengine_FOUND ON) - set(BUILD_TENGINE ON) + SET(BUILD_TENGINE ON) endif() ENDIF() -if (BUILD_TENGINE) - set(HAVE_TENGINE 1) +if(BUILD_TENGINE) + SET(HAVE_TENGINE 1) # android system if(ANDROID) if(${ANDROID_ABI} STREQUAL "armeabi-v7a") - set(CONFIG_ARCH_ARM32 ON) + SET(CONFIG_ARCH_ARM32 ON) elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") - set(CONFIG_ARCH_ARM64 ON) + SET(CONFIG_ARCH_ARM64 ON) endif() + SET(Tengine_LIB "tengine" CACHE INTERNAL "") + else() + # linux system + if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm) + SET(CONFIG_ARCH_ARM32 ON) + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64 + SET(CONFIG_ARCH_ARM64 ON) + endif() + SET(Tengine_LIB "tengine" CACHE INTERNAL "") endif() - # linux system - if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm) - set(CONFIG_ARCH_ARM32 ON) - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64 - set(CONFIG_ARCH_ARM64 ON) - endif() - - SET(DEFAULT_OPENCV_TENGINE_SOURCE_PATH ${OCV_TENGINE_DSTDIRECTORY}/Tengine-${TENGINE_VERSION}) - set(BUILT_IN_OPENCV ON) ## set for tengine compile discern . - set(Tengine_INCLUDE_DIR ${DEFAULT_OPENCV_TENGINE_SOURCE_PATH}/core/include) - set(Tengine_LIB ${CMAKE_BINARY_DIR}/lib/${ANDROID_ABI}/libtengine.a) - if ( IS_DIRECTORY ${DEFAULT_OPENCV_TENGINE_SOURCE_PATH}) - add_subdirectory("${DEFAULT_OPENCV_TENGINE_SOURCE_PATH}" ${OCV_TENGINE_DSTDIRECTORY}/build) + SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern . + SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/core/include" CACHE INTERNAL "") + if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt") + add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build") + else() + message(WARNING "TENGINE: Missing 'CMakeLists.txt' in source code package: ${OCV_TENGINE_SOURCE_PATH}") + SET(HAVE_TENGINE 1) endif() endif() - - diff --git a/CMakeLists.txt b/CMakeLists.txt index 41a4306e43..9a35cebb1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -423,7 +423,7 @@ OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" VISIBLE_IF ANDROID VERIFY HAVE_ANDROID_MEDIANDK) OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF - VISIBLE_IF (ARM OR AARCH64) AND UNIX AND NOT ANDROID AND NOT IOS + VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS VERIFY HAVE_TENGINE) # OpenCV build components diff --git a/cmake/OpenCVFindTengine.cmake b/cmake/OpenCVFindTengine.cmake index 3f0bb7029c..2d33f5c993 100644 --- a/cmake/OpenCVFindTengine.cmake +++ b/cmake/OpenCVFindTengine.cmake @@ -21,25 +21,42 @@ # # ---------------------------------------------------------------------------- -# Path for Tengine modules +# Path for Tengine binaries # ---------------------------------------------------------------------------- -set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Where to look for additional OpenCV modules (can be ;-separated list of paths)") +set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Path to TENGINE binaries installation") -IF(OPENCV_LIBTENGINE_ROOT_DIR) +IF(OPENCV_LIBTENGINE_ROOT_DIR AND NOT BUILD_TENGINE) - MESSAGE(STATUS "TENGINE:-- Set tengine lib dir by user ") + MESSAGE(STATUS "TENGINE:-- Use binaries at ${OPENCV_LIBTENGINE_ROOT_DIR}") SET(Tengine_FOUND ON) set(BUILD_TENGINE OFF) - SET(Tengine_INCLUDE_DIR ${OPENCV_LIBTENGINE_ROOT_DIR}/include) - SET(Tengine_LIB ${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a) + SET(Tengine_INCLUDE_DIR "${OPENCV_LIBTENGINE_ROOT_DIR}/include" CACHE PATH "TENGINE include dir") + SET(Tengine_LIB "${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a" CACHE PATH "TENGINE library dir") ELSE() - - MESSAGE(STATUS "TENGINE:-- Auto download Tengine source code. ") + IF(ANDROID) + IF(OPENCV_TENGINE_FORCE_ANDROID) + # nothing, use Android + ELSEIF(OPENCV_TENGINE_SKIP_ANDROID) + set(Tengine_FOUND OFF) + set(HAVE_TENGINE FALSE) + return() + ELSEIF(NOT DEFINED ANDROID_NDK_REVISION) + MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION is not defined") + set(Tengine_FOUND OFF) + set(HAVE_TENGINE FALSE) + return() + ELSEIF(ANDROID_NDK_REVISION VERSION_LESS 14) + MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION=${ANDROID_NDK_REVISION}") + set(Tengine_FOUND OFF) + set(HAVE_TENGINE FALSE) + return() + ENDIF() + ENDIF() + MESSAGE(STATUS "TENGINE:-- Build Tengine from source code. ") include("${OpenCV_SOURCE_DIR}/3rdparty/libtengine/tengine.cmake") - ENDIF() IF(NOT Tengine_LIB) @@ -55,11 +72,7 @@ IF (Tengine_FOUND) set(TENGINE_INCLUDE_DIRS ${Tengine_INCLUDE_DIR}) ENDIF (Tengine_FOUND) -MESSAGE(STATUS "Tengine include is:" ${Tengine_INCLUDE_DIR}) -MESSAGE(STATUS "Tengine library is:" ${Tengine_LIB}) - MARK_AS_ADVANCED( Tengine_INCLUDE_DIR Tengine_LIB - Tengine ) diff --git a/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown b/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown index 2d0c0a205f..5cc39e3b17 100644 --- a/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown +++ b/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown @@ -31,7 +31,7 @@ that should be used to find the match. - We need two primary components: -# **Source image (I):** The image in which we expect to find a match to the template image - -# **Template image (T):** The patch image which will be compared to the template image + -# **Template image (T):** The patch image which will be compared to the source image our goal is to detect the highest matching area: @@ -61,7 +61,7 @@ that should be used to find the match. - If masking is needed for the match, three components are required: -# **Source image (I):** The image in which we expect to find a match to the template image - -# **Template image (T):** The patch image which will be compared to the template image + -# **Template image (T):** The patch image which will be compared to the source image -# **Mask image (M):** The mask, a grayscale image that masks the template diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 614516a9c3..5066fb1b89 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -1269,6 +1269,8 @@ const _Tp& Mat::at(const Vec& idx) const template inline MatConstIterator_<_Tp> Mat::begin() const { + if (empty()) + return MatConstIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this); } @@ -1276,6 +1278,8 @@ MatConstIterator_<_Tp> Mat::begin() const template inline MatConstIterator_<_Tp> Mat::end() const { + if (empty()) + return MatConstIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this); it += total(); @@ -1285,6 +1289,8 @@ MatConstIterator_<_Tp> Mat::end() const template inline MatIterator_<_Tp> Mat::begin() { + if (empty()) + return MatIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); return MatIterator_<_Tp>((Mat_<_Tp>*)this); } @@ -1292,6 +1298,8 @@ MatIterator_<_Tp> Mat::begin() template inline MatIterator_<_Tp> Mat::end() { + if (empty()) + return MatIterator_<_Tp>(); CV_DbgAssert( elemSize() == sizeof(_Tp) ); MatIterator_<_Tp> it((Mat_<_Tp>*)this); it += total(); @@ -2640,6 +2648,7 @@ MatConstIterator::MatConstIterator(const Mat* _m) { if( m && m->isContinuous() ) { + CV_Assert(!m->empty()); sliceStart = m->ptr(); sliceEnd = sliceStart + m->total()*elemSize; } @@ -2653,6 +2662,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col) CV_Assert(m && m->dims <= 2); if( m->isContinuous() ) { + CV_Assert(!m->empty()); sliceStart = m->ptr(); sliceEnd = sliceStart + m->total()*elemSize; } @@ -2667,6 +2677,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, Point _pt) CV_Assert(m && m->dims <= 2); if( m->isContinuous() ) { + CV_Assert(!m->empty()); sliceStart = m->ptr(); sliceEnd = sliceStart + m->total()*elemSize; } diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 54ba911f58..5ee56c94dc 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2072,4 +2072,12 @@ TEST(Mat, regression_12943) // memory usage: ~4.5 Gb cv::flip(src, dst, 0); } +TEST(Mat, empty_iterator_16855) +{ + cv::Mat m; + EXPECT_NO_THROW(m.begin()); + EXPECT_NO_THROW(m.end()); + EXPECT_TRUE(m.begin() == m.end()); +} + }} // namespace diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index a28c98483d..3b12508c74 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1050,7 +1050,7 @@ CV__DNN_INLINE_NS_BEGIN * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$. * @param top_k if `>0`, keep at most @p top_k picked indices. */ - CV_EXPORTS_W void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + CV_EXPORTS void NMSBoxes(const std::vector& bboxes, const std::vector& scores, const float score_threshold, const float nms_threshold, CV_OUT std::vector& indices, const float eta = 1.f, const int top_k = 0); diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index d71a9c5a15..746dabf4ea 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -279,6 +279,12 @@ class dnn_test(NewOpenCVTests): self.assertTrue(ret) normAssert(self, refs[i], result, 'Index: %d' % i, 1e-10) + def test_nms(self): + confs = (1, 1) + rects = ((0, 0, 0.4, 0.4), (0, 0, 0.2, 0.4)) # 0.5 overlap + + self.assertTrue(all(cv.dnn.NMSBoxes(rects, confs, 0, 0.6).ravel() == (0, 1))) + def test_custom_layer(self): class CropLayer(object): def __init__(self, params, blobs): diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 09900556f7..2516cbf145 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -367,45 +367,97 @@ void ONNXImporter::populateNet(Net dstNet) } else if (layer_type == "Slice") { - if (layerParams.has("steps")) { - DictValue steps = layerParams.get("steps"); - for (int i = 0; i < steps.size(); ++i) { - if (steps.get(i) != 1) - CV_Error(Error::StsNotImplemented, - "Slice layer only supports steps = 1"); - } - } - int axis = 0; - if (layerParams.has("axes")) { - DictValue axes = layerParams.get("axes"); - for (int i = 1; i < axes.size(); ++i) { - CV_Assert(axes.get(i - 1) == axes.get(i) - 1); - } - axis = axes.get(0); - } - layerParams.set("axis", axis); - - DictValue starts = layerParams.get("starts"); - DictValue ends = layerParams.get("ends"); - CV_Assert(starts.size() == ends.size()); - std::vector begin; std::vector end; - if (axis > 0) { - begin.resize(axis, 0); - end.resize(axis, -1); - } + int inp_size = node_proto.input_size(); - for (int i = 0; i < starts.size(); ++i) + if (inp_size == 1) { - begin.push_back(starts.get(i)); - int finish = ends.get(i); - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim + if (layerParams.has("steps")) + { + DictValue steps = layerParams.get("steps"); + for (int i = 0; i < steps.size(); ++i) + { + if (steps.get(i) != 1) + CV_Error(Error::StsNotImplemented, + "Slice layer only supports steps = 1"); + } + } + if (layerParams.has("axes")) { + DictValue axes = layerParams.get("axes"); + for (int i = 1; i < axes.size(); ++i) { + CV_Assert(axes.get(i - 1) == axes.get(i) - 1); + } + axis = axes.get(0); + } + + DictValue starts = layerParams.get("starts"); + DictValue ends = layerParams.get("ends"); + CV_Assert(starts.size() == ends.size()); + + if (axis > 0) { + begin.resize(axis, 0); + end.resize(axis, -1); + } + for (int i = 0; i < starts.size(); ++i) + { + begin.push_back(starts.get(i)); + int finish = ends.get(i); + end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim + } + } else { + CV_Assert(inp_size >= 3); + for (int i = 1; i < inp_size; i++) { + CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); + } + Mat start_blob = getBlob(node_proto, constBlobs, 1); + Mat end_blob = getBlob(node_proto, constBlobs, 2); + CV_Assert(start_blob.total() == end_blob.total()); + + if (inp_size > 3) { + Mat axes_blob = getBlob(node_proto, constBlobs, 3); + const int* axes = (int*)axes_blob.data; + for (int i = 1; i < axes_blob.total(); ++i) { + CV_Assert(axes[i - 1] == axes[i] - 1); + } + axis = axes[0]; + } + + const int* starts = start_blob.ptr(); + const int* ends = end_blob.ptr(); + if (axis > 0) { + begin.resize(axis, 0); + end.resize(axis, -1); + } + std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); + for (int i = 0; i < end_blob.total(); ++i) + { + int finish = ends[i]; + end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim + } + + if (inp_size == 5) { + CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); + Mat step_blob = getBlob(node_proto, constBlobs, 4); + CV_CheckEQ(countNonZero(step_blob != 1), 0, "Slice layer only supports steps = 1"); + } } layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); - } + layerParams.set("axis", axis); + + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inp = getBlob(node_proto, constBlobs, 0); + std::vector inputs, sliced; + inputs.push_back(inp); + runLayer(layerParams, inputs, sliced); + CV_Assert(sliced.size() == 1); + constBlobs.insert(std::make_pair(layerParams.name, sliced[0])); + continue; + } + } else if (layer_type == "Split") { if (layerParams.has("split")) @@ -444,16 +496,35 @@ void ONNXImporter::populateNet(Net dstNet) } else if (is_const_0 || is_const_1) { - Mat blob = getBlob(node_proto, constBlobs, is_const_0 ? 0 : 1); - blob = blob.reshape(1, 1); - if (blob.total() == 1) { + int const_blob_id = is_const_0 ? 0 : 1; + Mat blob = getBlob(node_proto, constBlobs, const_blob_id); + int blob_total = blob.total(); + if (blob_total == 1) { layerParams.type = "Power"; layerParams.set("shift", (isSub ? -1 : 1) * blob.at(0)); } else { - layerParams.type = "Scale"; - layerParams.set("bias_term", true); - layerParams.blobs.push_back((isSub ? -1 : 1) * blob); + MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)]; + if (shape(blob) == inpShape) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + constParams.type = "Const"; + constParams.blobs.push_back(blob); + int id = dstNet.addLayer(constParams.name, constParams.type, constParams); + layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); + outShapes[constParams.name] = shape(blob); + + layerParams.type = "Eltwise"; + node_proto.set_input(const_blob_id, constParams.name); + } + else + { + layerParams.type = "Scale"; + layerParams.set("bias_term", true); + blob = blob.reshape(1, 1); + layerParams.blobs.push_back((isSub ? -1 : 1) * blob); + } } } else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) @@ -947,6 +1018,17 @@ void ONNXImporter::populateNet(Net dstNet) else layerParams.type = "Identity"; } + else if (layer_type == "ConstantOfShape") + { + float fill_value = layerParams.blobs.empty() ? 0 : layerParams.blobs[0].at(0, 0); + MatShape inpShape = getBlob(node_proto, constBlobs, 0); + for (int i = 0; i < inpShape.size(); i++) + CV_CheckGT(inpShape[i], 0, ""); + Mat tensor(inpShape.size(), &inpShape[0], CV_32F, Scalar(fill_value)); + constBlobs.insert(std::make_pair(layerParams.name, tensor)); + outShapes[node_proto.output(0)] = shape(tensor); + continue; + } else if (layer_type == "Gather") { CV_Assert(node_proto.input_size() == 2); @@ -990,6 +1072,39 @@ void ONNXImporter::populateNet(Net dstNet) continue; } } + else if (layer_type == "Resize") + { + for (int i = 1; i < node_proto.input_size(); i++) + CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); + + String interp_mode = layerParams.get("coordinate_transformation_mode"); + CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "asymmetric", + interp_mode != "tf_half_pixel_for_nn"); + + layerParams.set("align_corners", interp_mode == "align_corners"); + Mat shapes = getBlob(node_proto, constBlobs, node_proto.input_size() - 1); + CV_CheckEQ(shapes.size[0], 4, ""); + CV_CheckEQ(shapes.size[1], 1, ""); + CV_CheckTypeEQ(shapes.depth(), CV_32S, ""); + int height = shapes.at(2); + int width = shapes.at(3); + if (node_proto.input_size() == 3) + { + shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + MatShape scales = shapeIt->second; + height *= scales[2]; + width *= scales[3]; + } + layerParams.set("width", width); + layerParams.set("height", height); + + if (layerParams.get("mode") == "linear") { + layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? + "opencv_linear" : "bilinear"); + } + replaceLayerParam(layerParams, "mode", "interpolation"); + } else if (layer_type == "Upsample") { layerParams.type = "Resize"; @@ -1038,10 +1153,12 @@ void ONNXImporter::populateNet(Net dstNet) } std::vector layerInpShapes, layerOutShapes, layerInternalShapes; + int inpNum = 0; for (int j = 0; j < node_proto.input_size(); j++) { layerId = layer_id.find(node_proto.input(j)); if (layerId != layer_id.end()) { - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); + ++inpNum; // Collect input shapes. shapeIt = outShapes.find(node_proto.input(j)); CV_Assert(shapeIt != outShapes.end()); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index b2b7b5debe..19b6bfed27 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -57,8 +57,13 @@ public: net.setPreferableBackend(backend); net.setPreferableTarget(target); + std::vector inputNames; for (int i = 0; i < numInps; ++i) - net.setInput(inps[i], numInps > 1 ? format("%d", i) : ""); + inputNames.push_back(format("%d", i)); + net.setInputsNames(inputNames); + + for (int i = 0; i < numInps; ++i) + net.setInput(inps[i], inputNames[i]); Mat out = net.forward(""); if (useSoftmax) @@ -173,6 +178,11 @@ TEST_P(Test_ONNX_layers, Clip) testONNXModels("clip", npy); } +TEST_P(Test_ONNX_layers, Shape) +{ + testONNXModels("shape_of_constant"); +} + TEST_P(Test_ONNX_layers, ReduceMean) { testONNXModels("reduce_mean"); @@ -371,6 +381,11 @@ TEST_P(Test_ONNX_layers, Broadcast) testONNXModels("channel_broadcast", npy, 0, 0, false, true, 2); } +TEST_P(Test_ONNX_layers, DynamicResize) +{ + testONNXModels("dynamic_resize", npy, 0, 0, false, true, 2); +} + TEST_P(Test_ONNX_layers, Div) { const String model = _tf("models/div.onnx"); @@ -400,10 +415,8 @@ TEST_P(Test_ONNX_layers, Div) TEST_P(Test_ONNX_layers, DynamicReshape) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - } + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + testONNXModels("dynamic_reshape"); testONNXModels("dynamic_reshape_opset_11"); testONNXModels("flatten_by_prod"); @@ -443,6 +456,7 @@ TEST_P(Test_ONNX_layers, Slice) testONNXModels("slice", npy, 0, 0, false, false); #else testONNXModels("slice"); + testONNXModels("slice_opset_11"); #endif } diff --git a/modules/python/common.cmake b/modules/python/common.cmake index b69d13d5ae..6a438fd1a2 100644 --- a/modules/python/common.cmake +++ b/modules/python/common.cmake @@ -164,7 +164,12 @@ if(OPENCV_SKIP_PYTHON_LOADER) endif() else() ocv_assert(DEFINED OPENCV_PYTHON_INSTALL_PATH) - set(__python_binary_install_path "${OPENCV_PYTHON_INSTALL_PATH}/${__python_loader_subdir}python-${${PYTHON}_VERSION_MAJOR}.${${PYTHON}_VERSION_MINOR}") + if(${PYTHON}_LIMITED_API) + set(__python_binary_subdir "python-${${PYTHON}_VERSION_MAJOR}") + else() + set(__python_binary_subdir "python-${${PYTHON}_VERSION_MAJOR}.${${PYTHON}_VERSION_MINOR}") + endif() + set(__python_binary_install_path "${OPENCV_PYTHON_INSTALL_PATH}/${__python_loader_subdir}${__python_binary_subdir}") endif() install(TARGETS ${the_module} @@ -192,7 +197,7 @@ if(NOT OPENCV_SKIP_PYTHON_LOADER) set(CMAKE_PYTHON_EXTENSION_INSTALL_PATH_BASE "LOADER_DIR") endif() - if(DEFINED ${PYTHON}_VERSION_MINOR) + if(DEFINED ${PYTHON}_VERSION_MINOR AND NOT ${PYTHON}_LIMITED_API) set(__target_config "config-${${PYTHON}_VERSION_MAJOR}.${${PYTHON}_VERSION_MINOR}.py") else() set(__target_config "config-${${PYTHON}_VERSION_MAJOR}.py") diff --git a/samples/cpp/intelligent_scissors.cpp b/samples/cpp/intelligent_scissors.cpp index 6141c1f7b5..22f72d64e7 100644 --- a/samples/cpp/intelligent_scissors.cpp +++ b/samples/cpp/intelligent_scissors.cpp @@ -189,7 +189,10 @@ int main( int argc, const char** argv ) const int EDGE_THRESHOLD_LOW = 50; const int EDGE_THRESHOLD_HIGH = 100; CommandLineParser parser(argc, argv, keys); - parser.about("\nThis program demonstrates implementation of 'intelligent scissors' algorithm\n" + parser.about("\nThis program demonstrates implementation of 'Intelligent Scissors' algorithm designed\n" + "by Eric N. Mortensen and William A. Barrett, and described in article\n" + "'Intelligent Scissors for Image Composition':\n" + "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.138.3811&rep=rep1&type=pdf\n" "To start drawing a new contour select a pixel, click LEFT mouse button.\n" "To fix a path click LEFT mouse button again.\n" "To finish drawing a contour click RIGHT mouse button.\n"); diff --git a/samples/dnn/dasiamrpn_tracker.py b/samples/dnn/dasiamrpn_tracker.py new file mode 100644 index 0000000000..df734645db --- /dev/null +++ b/samples/dnn/dasiamrpn_tracker.py @@ -0,0 +1,281 @@ +""" +DaSiamRPN tracker. +Original paper: https://arxiv.org/abs/1808.06048 +Link to original repo: https://github.com/foolwood/DaSiamRPN +Links to onnx models: +network: https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0 +kernel_r1: https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0 +kernel_cls1: https://www.dropbox.com/s/qvmtszx5h339a0w/dasiamrpn_kernel_cls1.onnx?dl=0 +""" + +import numpy as np +import cv2 as cv +import argparse +import sys + +class DaSiamRPNTracker: + #initialization of used values, initial bounding box, used network + def __init__(self, im, target_pos, target_sz, net, kernel_r1, kernel_cls1): + self.windowing = "cosine" + self.exemplar_size = 127 + self.instance_size = 271 + self.total_stride = 8 + self.score_size = (self.instance_size - self.exemplar_size) // self.total_stride + 1 + self.context_amount = 0.5 + self.ratios = [0.33, 0.5, 1, 2, 3] + self.scales = [8, ] + self.anchor_num = len(self.ratios) * len(self.scales) + self.penalty_k = 0.055 + self.window_influence = 0.42 + self.lr = 0.295 + self.im_h = im.shape[0] + self.im_w = im.shape[1] + self.target_pos = target_pos + self.target_sz = target_sz + self.avg_chans = np.mean(im, axis=(0, 1)) + self.net = net + self.score = [] + + if ((self.target_sz[0] * self.target_sz[1]) / float(self.im_h * self.im_w)) < 0.004: + raise AssertionError("Initializing BB is too small-try to restart tracker with larger BB") + + self.anchor = self.__generate_anchor() + wc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz) + hc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz) + s_z = round(np.sqrt(wc_z * hc_z)) + + z_crop = self.__get_subwindow_tracking(im, self.exemplar_size, s_z) + z_crop = z_crop.transpose(2, 0, 1).reshape(1, 3, 127, 127).astype(np.float32) + self.net.setInput(z_crop) + z_f = self.net.forward('63') + kernel_r1.setInput(z_f) + r1 = kernel_r1.forward() + kernel_cls1.setInput(z_f) + cls1 = kernel_cls1.forward() + r1 = r1.reshape(20, 256, 4, 4) + cls1 = cls1.reshape(10, 256 , 4, 4) + self.net.setParam(self.net.getLayerId('65'), 0, r1) + self.net.setParam(self.net.getLayerId('68'), 0, cls1) + + if self.windowing == "cosine": + self.window = np.outer(np.hanning(self.score_size), np.hanning(self.score_size)) + elif self.windowing == "uniform": + self.window = np.ones((self.score_size, self.score_size)) + self.window = np.tile(self.window.flatten(), self.anchor_num) + + #creating anchor for tracking bounding box + def __generate_anchor(self): + self.anchor = np.zeros((self.anchor_num, 4), dtype = np.float32) + size = self.total_stride * self.total_stride + count = 0 + + for ratio in self.ratios: + ws = int(np.sqrt(size / ratio)) + hs = int(ws * ratio) + for scale in self.scales: + wws = ws * scale + hhs = hs * scale + self.anchor[count] = [0, 0, wws, hhs] + count += 1 + + score_sz = int(self.score_size) + self.anchor = np.tile(self.anchor, score_sz * score_sz).reshape((-1, 4)) + ori = - (score_sz / 2) * self.total_stride + xx, yy = np.meshgrid([ori + self.total_stride * dx for dx in range(score_sz)], [ori + self.total_stride * dy for dy in range(score_sz)]) + xx, yy = np.tile(xx.flatten(), (self.anchor_num, 1)).flatten(), np.tile(yy.flatten(), (self.anchor_num, 1)).flatten() + self.anchor[:, 0], self.anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) + return self.anchor + + #track function + def track(self, im): + wc_z = self.target_sz[1] + self.context_amount * sum(self.target_sz) + hc_z = self.target_sz[0] + self.context_amount * sum(self.target_sz) + s_z = np.sqrt(wc_z * hc_z) + scale_z = self.exemplar_size / s_z + d_search = (self.instance_size - self.exemplar_size) / 2 + pad = d_search / scale_z + s_x = round(s_z + 2 * pad) + + #region preprocessing + x_crop = self.__get_subwindow_tracking(im, self.instance_size, s_x) + x_crop = x_crop.transpose(2, 0, 1).reshape(1, 3, 271, 271).astype(np.float32) + self.score = self.__tracker_eval(x_crop, scale_z) + self.target_pos[0] = max(0, min(self.im_w, self.target_pos[0])) + self.target_pos[1] = max(0, min(self.im_h, self.target_pos[1])) + self.target_sz[0] = max(10, min(self.im_w, self.target_sz[0])) + self.target_sz[1] = max(10, min(self.im_h, self.target_sz[1])) + + #update bounding box position + def __tracker_eval(self, x_crop, scale_z): + target_size = self.target_sz * scale_z + self.net.setInput(x_crop) + outNames = self.net.getUnconnectedOutLayersNames() + outNames = ['66', '68'] + delta, score = self.net.forward(outNames) + delta = np.transpose(delta, (1, 2, 3, 0)) + delta = np.ascontiguousarray(delta, dtype = np.float32) + delta = np.reshape(delta, (4, -1)) + score = np.transpose(score, (1, 2, 3, 0)) + score = np.ascontiguousarray(score, dtype = np.float32) + score = np.reshape(score, (2, -1)) + score = self.__softmax(score)[1, :] + delta[0, :] = delta[0, :] * self.anchor[:, 2] + self.anchor[:, 0] + delta[1, :] = delta[1, :] * self.anchor[:, 3] + self.anchor[:, 1] + delta[2, :] = np.exp(delta[2, :]) * self.anchor[:, 2] + delta[3, :] = np.exp(delta[3, :]) * self.anchor[:, 3] + + def __change(r): + return np.maximum(r, 1./r) + + def __sz(w, h): + pad = (w + h) * 0.5 + sz2 = (w + pad) * (h + pad) + return np.sqrt(sz2) + + def __sz_wh(wh): + pad = (wh[0] + wh[1]) * 0.5 + sz2 = (wh[0] + pad) * (wh[1] + pad) + return np.sqrt(sz2) + + s_c = __change(__sz(delta[2, :], delta[3, :]) / (__sz_wh(target_size))) + r_c = __change((target_size[0] / target_size[1]) / (delta[2, :] / delta[3, :])) + penalty = np.exp(-(r_c * s_c - 1.) * self.penalty_k) + pscore = penalty * score + pscore = pscore * (1 - self.window_influence) + self.window * self.window_influence + best_pscore_id = np.argmax(pscore) + target = delta[:, best_pscore_id] / scale_z + target_size /= scale_z + lr = penalty[best_pscore_id] * score[best_pscore_id] * self.lr + res_x = target[0] + self.target_pos[0] + res_y = target[1] + self.target_pos[1] + res_w = target_size[0] * (1 - lr) + target[2] * lr + res_h = target_size[1] * (1 - lr) + target[3] * lr + self.target_pos = np.array([res_x, res_y]) + self.target_sz = np.array([res_w, res_h]) + return score[best_pscore_id] + + def __softmax(self, x): + x_max = x.max(0) + e_x = np.exp(x - x_max) + y = e_x / e_x.sum(axis = 0) + return y + + #evaluations with cropped image + def __get_subwindow_tracking(self, im, model_size, original_sz): + im_sz = im.shape + c = (original_sz + 1) / 2 + context_xmin = round(self.target_pos[0] - c) + context_xmax = context_xmin + original_sz - 1 + context_ymin = round(self.target_pos[1] - c) + context_ymax = context_ymin + original_sz - 1 + left_pad = int(max(0., -context_xmin)) + top_pad = int(max(0., -context_ymin)) + right_pad = int(max(0., context_xmax - im_sz[1] + 1)) + bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) + context_xmin += left_pad + context_xmax += left_pad + context_ymin += top_pad + context_ymax += top_pad + r, c, k = im.shape + + if any([top_pad, bottom_pad, left_pad, right_pad]): + te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) + te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im + if top_pad: + te_im[0:top_pad, left_pad:left_pad + c, :] = self.avg_chans + if bottom_pad: + te_im[r + top_pad:, left_pad:left_pad + c, :] = self.avg_chans + if left_pad: + te_im[:, 0:left_pad, :] = self.avg_chans + if right_pad: + te_im[:, c + left_pad:, :] = self.avg_chans + im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] + else: + im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] + + if not np.array_equal(model_size, original_sz): + im_patch_original = cv.resize(im_patch_original, (model_size, model_size)) + + return im_patch_original + +#function for reading paths, bounding box drawing, showing results +def main(): + parser = argparse.ArgumentParser(description="Run tracker") + parser.add_argument("--net", type=str, default="dasiamrpn_model.onnx", help="Full path to onnx model of net") + parser.add_argument("--kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Full path to onnx model of kernel_r1") + parser.add_argument("--kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Full path to onnx model of kernel_cls1") + parser.add_argument("--input", type=str, help="Full path to input. Do not use if input is camera") + args = parser.parse_args() + point1 = () + point2 = () + mark = True + drawing = False + cx, cy, w, h = 0.0, 0.0, 0, 0 + + def get_bb(event, x, y, flag, param): + nonlocal point1, point2, cx, cy, w, h, drawing, mark + + if event == cv.EVENT_LBUTTONDOWN: + if not drawing: + drawing = True + point1 = (x, y) + else: + drawing = False + + elif event == cv.EVENT_MOUSEMOVE: + if drawing: + point2 = (x, y) + + elif event == cv.EVENT_LBUTTONUP: + cx = point1[0] - (point1[0] - point2[0]) / 2 + cy = point1[1] - (point1[1] - point2[1]) / 2 + w = abs(point1[0] - point2[0]) + h = abs(point1[1] - point2[1]) + mark = False + + #loading network`s and kernel`s models + net = cv.dnn.readNet(args.net) + kernel_r1 = cv.dnn.readNet(args.kernel_r1) + kernel_cls1 = cv.dnn.readNet(args.kernel_cls1) + + #initializing bounding box + cap = cv.VideoCapture(args.input if args.input else 0) + cv.namedWindow("DaSiamRPN") + cv.setMouseCallback("DaSiamRPN", get_bb) + + whitespace_key = 32 + while cv.waitKey(40) != whitespace_key: + has_frame, frame = cap.read() + if not has_frame: + sys.exit(0) + cv.imshow("DaSiamRPN", frame) + + while mark: + twin = np.copy(frame) + if point1 and point2: + cv.rectangle(twin, point1, point2, (0, 255, 255), 3) + cv.imshow("DaSiamRPN", twin) + cv.waitKey(40) + + target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) + tracker = DaSiamRPNTracker(frame, target_pos, target_sz, net, kernel_r1, kernel_cls1) + + #tracking loop + while cap.isOpened(): + has_frame, frame = cap.read() + if not has_frame: + sys.exit(0) + tracker.track(frame) + w, h = tracker.target_sz + cx, cy = tracker.target_pos + cv.rectangle(frame, (int(cx - w // 2), int(cy - h // 2)), (int(cx - w // 2) + int(w), int(cy - h // 2) + int(h)),(0, 255, 255), 3) + cv.imshow("DaSiamRPN", frame) + key = cv.waitKey(1) + if key == ord("q"): + break + + cap.release() + cv.destroyAllWindows() + +if __name__ == "__main__": + main()